From 6a0cb9818f4ef46539127b816d0badf13ca3dd51 Mon Sep 17 00:00:00 2001 From: Tim Steiner <tsteiner2@unl.edu> Date: Fri, 29 Oct 2010 21:28:05 +0000 Subject: [PATCH] The unl migration tool will now ignore pages/files if they are duplicates of pages/files it has already seen. git-svn-id: file:///tmp/wdn_thm_drupal/trunk@279 20a16fea-79d4-4915-8869-1ea9d5ebf173 --- sites/all/modules/unl/unl_migration.php | 33 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/sites/all/modules/unl/unl_migration.php b/sites/all/modules/unl/unl_migration.php index aaf65715..abb76427 100644 --- a/sites/all/modules/unl/unl_migration.php +++ b/sites/all/modules/unl/unl_migration.php @@ -72,18 +72,19 @@ class Unl_Migration_Tool private $_curl; - private $_siteMap = array(); - private $_processedPages = array(); - private $_content = array(); - private $_createdContent = array(); - private $_lastModifications = array(); - private $_hrefTransform = array(); - private $_hrefTransformFiles = array(); - private $_menu = array(); - private $_nodeMap = array(); - private $_pageTitles = array(); - private $_log = array(); - private $_blocks = array(); + private $_siteMap = array(); + private $_processedPages = array(); + private $_processedPageHashes = array(); + private $_content = array(); + private $_createdContent = array(); + private $_lastModifications = array(); + private $_hrefTransform = array(); + private $_hrefTransformFiles = array(); + private $_menu = array(); + private $_nodeMap = array(); + private $_pageTitles = array(); + private $_log = array(); + private $_blocks = array(); /** * Keep track of the state of the migration progress so that we can resume later @@ -397,6 +398,14 @@ class Unl_Migration_Tool $this->_log('The file at ' . $fullPath . ' was empty! Ignoring.'); return; } + + $pageHash = hash('md5', $data['content']); + if (($matchingPath = array_search($pageHash, $this->_processedPageHashes)) !== FALSE) { + $this->_log("The file found at $fullPath was a duplicate of the file at {$this->_baseUrl}$matchingPath ! Ignoring."); + return; + } + $this->_processedPageHashes[$path] = $pageHash; + if ($data['lastModified']) { $this->_lastModifications[$path] = $data['lastModified']; } -- GitLab