From 4fdaa2da3ffe694b63b1d4f70d48f5ebc57fbd44 Mon Sep 17 00:00:00 2001 From: Tim Steiner <tsteiner2@unl.edu> Date: Fri, 2 Sep 2011 21:41:08 +0000 Subject: [PATCH] [gh-194] Merging from testing into staging git-svn-id: file:///tmp/wdn_thm_drupal/branches/drupal-7.x/staging@1019 20a16fea-79d4-4915-8869-1ea9d5ebf173 --- sites/all/modules/unl/unl_migration.php | 44 ++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/sites/all/modules/unl/unl_migration.php b/sites/all/modules/unl/unl_migration.php index 9cfa2fc9..a8364610 100644 --- a/sites/all/modules/unl/unl_migration.php +++ b/sites/all/modules/unl/unl_migration.php @@ -485,10 +485,11 @@ class Unl_Migration_Tool $this->_blocks['contact_info'] = $this->_get_instance_editable_content($html, 'contactinfo'); $this->_blocks['optional_footer'] = $this->_get_instance_editable_content($html, 'optionalfooter'); $this->_blocks['footer_content'] = $this->_get_instance_editable_content($html, 'footercontent'); - // @TODO replace these with str_replace calls - $this->_blocks['related_links'] = trim(strtr($this->_blocks['related_links'], array('<h3>Related Links</h3>' => ''))); - $this->_blocks['contact_info'] = trim(strtr($this->_blocks['contact_info'], array('<h3>Contacting Us</h3>' => ''))); - $this->_blocks['contact_info'] = trim(strtr($this->_blocks['contact_info'], array('<h3>Contact Us</h3>' => ''))); + + // Filter out the existing headers. + $this->_blocks['related_links'] = preg_replace('/\s*<h3>\s*Related Links\s*<\/h3>\s*/', '', $this->_blocks['related_links']); + $this->_blocks['contact_info'] = preg_replace('/\s*<h3>\sContacting Us*\s*<\/h3>\s*/', '', $this->_blocks['contact_info']); + $this->_blocks['contact_info'] = preg_replace('/\s*<h3>\s*Contact Us\s*<\/h3>\s*/', '', $this->_blocks['contact_info']); } private function _create_blocks() { @@ -599,11 +600,6 @@ class Unl_Migration_Tool } $html = $data['content']; - if (preg_match('/charset=(.*);?/', $data['contentType'], $matches)) { - $charset = $matches[1]; - $html = iconv($charset, 'UTF-8', $html); - } - $maincontentarea = $this->_get_instance_editable_content($html, 'maincontentarea'); if (!$maincontentarea) { $maincontentarea = $this->_get_old_main_content_area($html); @@ -616,10 +612,12 @@ class Unl_Migration_Tool if (!$maincontentarea) { // its possible the body tag has attributes. Check for this and filter them out. - $maincontentarea = $this->_get_text_between_tokens($html, '<body', '</body>'); + $maincontentarea = $this->_get_text_between_tokens($html, '<body', '</body>', FALSE); // As long as we find a closing bracket before the next opening bracket, its probably safe to assume the body tag is intact. if (strpos($maincontentarea, '>') < strpos($maincontentarea, '<')) { $maincontentarea = trim(substr($maincontentarea, strpos($maincontentarea, '>') + 1)); + // Tidy the output here, otherwise tidy would see HTML starting in the middle of a <body key="val"> tag. + $maincontentarea = $this->_tidy_html_fragment($maincontentarea); // Otherwise, ignore it all. (Will be an issue if the body has no other tags, but how likely is this?) } else { $maincontentarea = ''; @@ -942,6 +940,12 @@ class Unl_Migration_Tool $data['lastModified'] = strtotime($headers['Last-Modified']); } } + + // Convert non-UTF-8 data to UTF-8. + if (preg_match('/charset=(.*);?/', $data['contentType'], $matches)) { + $charset = $matches[1]; + $data['content'] = iconv($charset, 'UTF-8', $data['content']); + } return $data; } @@ -1100,7 +1104,7 @@ class Unl_Migration_Tool return $content; } - private function _get_text_between_tokens($text, $start_token, $end_token) { + private function _get_text_between_tokens($text, $start_token, $end_token, $tidy_output = TRUE) { $content_start = strpos($text, $start_token); $content_end = strpos($text, $end_token, $content_start); $content = substr($text, @@ -1109,12 +1113,30 @@ class Unl_Migration_Tool $content = trim($content); if ($content && $content_start !== FALSE && $content_end !== FALSE) { + if ($tidy_output) { + $content = $this->_tidy_html_fragment($content); + } return $content; } return FALSE; } + private function _tidy_html_fragment($html) { + $config = array( + 'doctype' => 'transitional', + 'indent' => TRUE, + 'output-xhtml' => TRUE, + 'show-body-only' => TRUE, + 'wrap' => 0, + ); + $tidy = new Tidy(); + $tidy->parseString($html, $config, 'utf8'); + $tidy->cleanRepair(); + + return (string) $tidy; + } + static public function save_to_disk(Unl_Migration_Tool $instance) { $migration_storage_file = drupal_tempnam(file_directory_temp(), 'unl_migration_'); -- GitLab