Skip to content
Snippets Groups Projects
Commit 245d3969 authored by Eric Rasmussen's avatar Eric Rasmussen
Browse files

Merge pull request #521 from tsteiner2/liferay-migration

Liferay migration (Closes [gh-471], [gh-504], [gh-505], [gh-506])
parents 844f6500 047afc99
No related branches found
No related tags found
No related merge requests found
...@@ -48,6 +48,12 @@ function unl_migration($form, &$form_state) ...@@ -48,6 +48,12 @@ function unl_migration($form, &$form_state)
'#title' => t('Ignore Duplicate Pages/Files'), '#title' => t('Ignore Duplicate Pages/Files'),
'#description' => t("This may be needed if your site has an unlimited number of dynamicly generated paths."), '#description' => t("This may be needed if your site has an unlimited number of dynamicly generated paths."),
); );
$form['root']['use_liferay_code'] = array(
'#type' => 'checkbox',
'#title' => t('Use Liferay Detection'),
'#description' => t("Normally, this won't interfere with non-liferay sites. If you have a /web directory, you should turn this off."),
'#default_value' => 1,
);
$form['submit'] = array( $form['submit'] = array(
'#type' => 'submit', '#type' => 'submit',
...@@ -64,7 +70,8 @@ function unl_migration_submit($form, &$form_state) { ...@@ -64,7 +70,8 @@ function unl_migration_submit($form, &$form_state) {
$form_state['values']['frontier_path'], $form_state['values']['frontier_path'],
$form_state['values']['frontier_user'], $form_state['values']['frontier_user'],
$form_state['values']['frontier_pass'], $form_state['values']['frontier_pass'],
$form_state['values']['ignore_duplicates'] $form_state['values']['ignore_duplicates'],
$form_state['values']['use_liferay_code']
); );
$operations = array( $operations = array(
...@@ -157,6 +164,7 @@ class Unl_Migration_Tool ...@@ -157,6 +164,7 @@ class Unl_Migration_Tool
private $_frontierIndexFiles = array('low_bandwidth.shtml', 'index.shtml', 'index.html', 'index.htm', 'default.shtml'); private $_frontierIndexFiles = array('low_bandwidth.shtml', 'index.shtml', 'index.html', 'index.htm', 'default.shtml');
private $_frontierFilesScanned = array(); private $_frontierFilesScanned = array();
private $_ignoreDuplicates = FALSE; private $_ignoreDuplicates = FALSE;
private $_useLiferayCode = TRUE;
/** /**
* Keep track of the state of the migration progress so that we can resume later * Keep track of the state of the migration progress so that we can resume later
...@@ -171,7 +179,7 @@ class Unl_Migration_Tool ...@@ -171,7 +179,7 @@ class Unl_Migration_Tool
private $_start_time; private $_start_time;
public function __construct($baseUrl, $frontierPath, $frontierUser, $frontierPass, $ignoreDuplicates) public function __construct($baseUrl, $frontierPath, $frontierUser, $frontierPass, $ignoreDuplicates, $useLiferayCode = FALSE)
{ {
header('Content-type: text/plain'); header('Content-type: text/plain');
...@@ -198,6 +206,7 @@ class Unl_Migration_Tool ...@@ -198,6 +206,7 @@ class Unl_Migration_Tool
$this->_frontierPass = $frontierPass; $this->_frontierPass = $frontierPass;
$this->_ignoreDuplicates = (bool) $ignoreDuplicates; $this->_ignoreDuplicates = (bool) $ignoreDuplicates;
$this->_useLiferayCode = (bool) $useLiferayCode;
$this->_baseUrl = $baseUrl; $this->_baseUrl = $baseUrl;
$this->_addSitePath(''); $this->_addSitePath('');
...@@ -234,7 +243,7 @@ class Unl_Migration_Tool ...@@ -234,7 +243,7 @@ class Unl_Migration_Tool
if ($this->_state == self::STATE_PROCESSING_PAGES) { if ($this->_state == self::STATE_PROCESSING_PAGES) {
// Process all of the pages on the site (Takes a while) // Process all of the pages on the site (Takes a while)
do { do {
set_time_limit(30); set_time_limit(max(30, $time_limit));
$pagesToProcess = $this->_getPagesToProcess(); $pagesToProcess = $this->_getPagesToProcess();
foreach ($pagesToProcess as $pageToProcess) { foreach ($pagesToProcess as $pageToProcess) {
...@@ -264,6 +273,7 @@ class Unl_Migration_Tool ...@@ -264,6 +273,7 @@ class Unl_Migration_Tool
$this->_state = self::STATE_CREATING_NODES; $this->_state = self::STATE_CREATING_NODES;
} }
if ($this->_state == self::STATE_CREATING_NODES) { if ($this->_state == self::STATE_CREATING_NODES) {
// Update links and then create new page nodes. (Takes a while) // Update links and then create new page nodes. (Takes a while)
foreach ($this->_content as $path => $content) { foreach ($this->_content as $path => $content) {
...@@ -273,10 +283,12 @@ class Unl_Migration_Tool ...@@ -273,10 +283,12 @@ class Unl_Migration_Tool
if (time() - $this->_start_time > $time_limit) { if (time() - $this->_start_time > $time_limit) {
return FALSE; return FALSE;
} }
set_time_limit(30); set_time_limit(max(30, $time_limit));
$hrefTransform = isset($this->_hrefTransform[$path]) ? $this->_hrefTransform[$path] : array(); $hrefTransforms = isset($this->_hrefTransform[$path]) ? $this->_hrefTransform[$path] : array();
$content = strtr($content, $hrefTransform); foreach ($hrefTransforms as $hrefTransformFrom => $hrefTransformTo) {
$content = str_replace(htmlspecialchars($hrefTransformFrom), htmlspecialchars($hrefTransformTo), $content);
}
$pageTitle = $this->_pageTitles[$path]; $pageTitle = $this->_pageTitles[$path];
$this->_createPage($pageTitle, $content, $path, '' == $path); $this->_createPage($pageTitle, $content, $path, '' == $path);
...@@ -306,6 +318,10 @@ class Unl_Migration_Tool ...@@ -306,6 +318,10 @@ class Unl_Migration_Tool
if (($fragmentStart = strrpos($path, '#')) !== FALSE) { if (($fragmentStart = strrpos($path, '#')) !== FALSE) {
$path = substr($path, 0, $fragmentStart); $path = substr($path, 0, $fragmentStart);
} }
$path = trim($path, '/');
if (array_search(strtolower($path), array_map('strtolower', $this->_siteMap)) !== FALSE) {
return;
}
$this->_siteMap[hash('SHA256', $path)] = $path; $this->_siteMap[hash('SHA256', $path)] = $path;
} }
...@@ -414,6 +430,8 @@ class Unl_Migration_Tool ...@@ -414,6 +430,8 @@ class Unl_Migration_Tool
if (!$path) { if (!$path) {
$path = ''; $path = '';
} }
$path = trim($path, '/');
if ($fragmentPos = strrpos($path, '#') !== FALSE) { if ($fragmentPos = strrpos($path, '#') !== FALSE) {
$item['options']['fragment'] = substr($path, $fragmentPos + 1); $item['options']['fragment'] = substr($path, $fragmentPos + 1);
$path = substr($path, 0, $fragmentPos); $path = substr($path, 0, $fragmentPos);
...@@ -421,7 +439,7 @@ class Unl_Migration_Tool ...@@ -421,7 +439,7 @@ class Unl_Migration_Tool
if (substr($path, -1) == '/') { if (substr($path, -1) == '/') {
$path = substr($path, 0, -1); $path = substr($path, 0, -1);
} }
$nodeId = array_search($path, $this->_nodeMap, TRUE); $nodeId = array_search(strtolower($path), array_map('strtolower', $this->_nodeMap), TRUE);
if ($nodeId) { if ($nodeId) {
$item['link_path'] = 'node/' . $nodeId; $item['link_path'] = 'node/' . $nodeId;
} }
...@@ -458,6 +476,8 @@ class Unl_Migration_Tool ...@@ -458,6 +476,8 @@ class Unl_Migration_Tool
if (!$path) { if (!$path) {
$path = ''; $path = '';
} }
$path = trim($path, '/');
if (($fragmentPos = strrpos($path, '#')) !== FALSE) { if (($fragmentPos = strrpos($path, '#')) !== FALSE) {
$item['options']['fragment'] = substr($path, $fragmentPos + 1); $item['options']['fragment'] = substr($path, $fragmentPos + 1);
$path = substr($path, 0, $fragmentPos); $path = substr($path, 0, $fragmentPos);
...@@ -465,7 +485,7 @@ class Unl_Migration_Tool ...@@ -465,7 +485,7 @@ class Unl_Migration_Tool
if (substr($path, -1) == '/') { if (substr($path, -1) == '/') {
$path = substr($path, 0, -1); $path = substr($path, 0, -1);
} }
$nodeId = array_search($path, $this->_nodeMap, TRUE); $nodeId = array_search(strtolower($path), array_map('strtolower', $this->_nodeMap), TRUE);
if ($nodeId) { if ($nodeId) {
$item['link_path'] = 'node/' . $nodeId; $item['link_path'] = 'node/' . $nodeId;
} }
...@@ -596,7 +616,7 @@ class Unl_Migration_Tool ...@@ -596,7 +616,7 @@ class Unl_Migration_Tool
$link_node = $link_nodes->item($i); $link_node = $link_nodes->item($i);
$this->_breadcrumbs[] = array( $this->_breadcrumbs[] = array(
'text' => trim($link_node->textContent), 'text' => trim($link_node->textContent),
'href' => $this->_makeLinkAbsolute($link_node->getAttribute('href', '')) 'href' => $this->_makeLinkAbsolute($link_node->getAttribute('href'), ''),
); );
} }
} }
...@@ -635,26 +655,48 @@ class Unl_Migration_Tool ...@@ -635,26 +655,48 @@ class Unl_Migration_Tool
if (isset($data['lastModified'])) { if (isset($data['lastModified'])) {
$this->_lastModifications[$path] = $data['lastModified']; $this->_lastModifications[$path] = $data['lastModified'];
} }
$cleanPath = $path;
$pathParts = parse_url($path);
// If the path contains a query, we'll have to change it.
if (array_key_exists('query', $pathParts)) {
$matches = array();
if (array_key_exists('Content-Disposition', $data['headers']) &&
preg_match('/filename="(.*)"/', $data['headers']['Content-Disposition'], $matches)) {
$cleanPath = $pathParts['path'] . '/' . $matches[1];
} else {
$cleanPath = $pathParts['path'] . '/' . $pathParts['query'];
}
$cleanPath = strtr($cleanPath, array('%2f' => '/', '%2F' => '/'));
}
if (strpos($data['contentType'], 'html') === FALSE) { if (strpos($data['contentType'], 'html') === FALSE) {
if (!$data['contentType']) { if (!$data['contentType']) {
$this->_log('The file type at ' . $fullPath . ' was not specified. Ignoring.', WATCHDOG_ERROR); $this->_log('The file type at ' . $fullPath . ' was not specified. Ignoring.', WATCHDOG_ERROR);
return; return;
} }
@drupal_mkdir('public://' . urldecode(dirname($path)), NULL, TRUE);
@drupal_mkdir('public://' . urldecode(dirname($cleanPath)), NULL, TRUE);
if (!mb_check_encoding($path, 'UTF-8')) { if (!mb_check_encoding($path, 'UTF-8')) {
$path = iconv('ISO-8859-1', 'UTF-8', $path); $path = iconv('ISO-8859-1', 'UTF-8', $path);
} }
try { try {
$file = file_save_data($data['content'], 'public://' . urldecode($path), FILE_EXISTS_REPLACE); $file = file_save_data($data['content'], 'public://' . urldecode($cleanPath), FILE_EXISTS_REPLACE);
} catch (Exception $e) { } catch (Exception $e) {
$this->_log('Could not migrate file "' . $path . '"! File name too long?', WATCHDOG_ERROR); $this->_log('Could not migrate file "' . $path . '"! File name too long?', WATCHDOG_ERROR);
} }
$this->_hrefTransformFiles[$path] = $this->_makeRelativeUrl(file_create_url('public://' . $path)); $this->_hrefTransformFiles[$path] = $this->_makeRelativeUrl(file_create_url('public://' . $cleanPath));
return; return;
} }
$html = $data['content']; $html = $data['content'];
$maincontentarea = $this->_get_liferay_content_area($html);
if (!$maincontentarea) {
$maincontentarea = $this->_get_instance_editable_content($html, 'maincontentarea'); $maincontentarea = $this->_get_instance_editable_content($html, 'maincontentarea');
}
if (!$maincontentarea) { if (!$maincontentarea) {
$maincontentarea = $this->_get_old_main_content_area($html); $maincontentarea = $this->_get_old_main_content_area($html);
} }
...@@ -753,8 +795,8 @@ class Unl_Migration_Tool ...@@ -753,8 +795,8 @@ class Unl_Migration_Tool
$this->_processLinks($linkNode->getAttribute('src'), $path, $page_base); $this->_processLinks($linkNode->getAttribute('src'), $path, $page_base);
} }
$this->_content[$path] = $maincontentarea; $this->_content[$cleanPath] = $maincontentarea;
$this->_pageTitles[$path] = $pageTitle; $this->_pageTitles[$cleanPath] = $pageTitle;
// Scan the page for the parent breadcrumb // Scan the page for the parent breadcrumb
$breadcrumbs = $dom->getElementById('breadcrumbs'); $breadcrumbs = $dom->getElementById('breadcrumbs');
...@@ -772,13 +814,15 @@ class Unl_Migration_Tool ...@@ -772,13 +814,15 @@ class Unl_Migration_Tool
if ($pageParentLink == $path) { if ($pageParentLink == $path) {
$pageParentLink = ''; $pageParentLink = '';
} }
$this->_pageParentLinks[$path] = $pageParentLink; $this->_pageParentLinks[$cleanPath] = $pageParentLink;
}
} }
if ($cleanPath != $path) {
$this->_hrefTransformFiles[$path] = $cleanPath;
} }
} }
private function _processLinks($originalHref, $path, $page_base = NULL, $tag = NULL) private function _processLinks($originalHref, $path, $page_base = NULL, $tag = NULL) {
{
if (substr($originalHref, 0, 1) == '#') { if (substr($originalHref, 0, 1) == '#') {
return; return;
} }
...@@ -788,19 +832,73 @@ class Unl_Migration_Tool ...@@ -788,19 +832,73 @@ class Unl_Migration_Tool
} }
$href = $this->_makeLinkAbsolute($originalHref, $page_base); $href = $this->_makeLinkAbsolute($originalHref, $page_base);
$href = $this->_translateLiferayWeb($href);
if (substr($href, 0, strlen($this->_baseUrl)) == $this->_baseUrl) { if (substr($href, 0, strlen($this->_baseUrl)) == $this->_baseUrl) {
$newPath = substr($href, strlen($this->_baseUrl)); $newPath = substr($href, strlen($this->_baseUrl));
if ($newPath === FALSE) { if ($newPath === FALSE) {
$newPath = ''; $newPath = '';
} }
$this->_addSitePath($newPath);
} else {
$newPath = $href;
}
if ($tag) { if ($tag) {
$this->_hrefTransform[$tag][$originalHref] = $newPath; $this->_hrefTransform[$tag][$originalHref] = $newPath;
} else { } else {
$this->_hrefTransform[$path][$originalHref] = $newPath; $this->_hrefTransform[$path][$originalHref] = $newPath;
} }
$this->_addSitePath($newPath);
} }
/**
* Provided an absolute URL, handles translating Liferay /web/site/some/path
* paths to http://site.unl.edu/some/path/
*
* @param string $url
* @return string
*/
private function _translateLiferayWeb($url) {
if (!$this->_useLiferayCode) {
return $url;
}
if (substr($url, 0, strlen($this->_baseUrl)) != $this->_baseUrl) {
return $url;
}
$urlParts = parse_url($url);
$pathParts = explode('/', ltrim($urlParts['path'], '/'));
$exceptions = array(
'cropwatch.unl.edu' => array('corn', 'drybeans', 'forages', 'organic', 'potato', 'sorghum', 'soybeans', 'wheat', 'bioenergy', 'insect', 'economics', 'ssm', 'soils', 'tillage', 'weed', 'varietytest', 'biotechnology', 'farmresearch', 'cropwatch-youth', 'militaryresources', 'gaps', 'sugarbeets'),
'4h.unl.edu' => array('extension-4-h-horse', '4hcamps', '4hcurriclum'),
'animalscience.unl.edu' => array('fernando-lab', 'anscgenomics', 'rprb-lab', 'ruminutrition-lab'),
'beef.unl.edu' => array('cattleproduction'),
'biochem.unl.edu' => array('barycki', 'bailey', 'becker', 'adamec', 'wilson', 'biochem-fatttlab', 'simpson'),
'bse.unl.edu' => array('p2guidelines'),
'edmedia.unl.edu' => array('techtraining'),
'food.unl.edu' => array('localfoods', 'allergy', 'fnh', 'preservation', 'fpc', 'safety', 'meatproducts', 'youth'),
'ianrhome.unl.edu' => array('ianrinternational'),
'water.unl.edu' => array('crops', 'cropswater', 'drinkingwater', 'drought', 'wildlife', 'hydrology', 'lakes', 'landscapes', 'landscapewater', 'laweconomics', 'manure', 'propertydesign', 'research', 'sewage', 'students', 'watershed', 'wells', 'wetlands'),
'westcentral.unl.edu' => array('wcentomology', 'wcacreage'),
);
if (
count($pathParts) >= 2 && $pathParts[0] == 'web'
&& !(in_array($urlParts['host'], array_keys($exceptions)) && in_array($pathParts[1], $exceptions[$urlParts['host']]))
) {
$urlParts['host'] = strtolower($pathParts[1]) . '.unl.edu';
$pathParts = array_splice($pathParts, 2);
$urlParts['path'] = '/' . implode('/', $pathParts);
$url = $urlParts['scheme'] . '://' . $urlParts['host'];
$url .= isset($urlParts['path']) ? $urlParts['path'] : '';
$url .= isset($urlParts['query']) ? '?' . $urlParts['query'] : '';
$url .= isset($urlParts['fragment']) ? '#'.$urlParts['fragment'] : '';
}
return $url;
} }
private function _makeLinkAbsolute($href, $path) private function _makeLinkAbsolute($href, $path)
...@@ -835,6 +933,9 @@ class Unl_Migration_Tool ...@@ -835,6 +933,9 @@ class Unl_Migration_Tool
} else if (isset($parts['path']) && substr($parts['path'], 0, 1) == '/') { } else if (isset($parts['path']) && substr($parts['path'], 0, 1) == '/') {
$baseParts = parse_url($this->_baseUrl); $baseParts = parse_url($this->_baseUrl);
$absoluteUrl = $baseParts['scheme'] . '://' . $baseParts['host'] . $parts['path']; $absoluteUrl = $baseParts['scheme'] . '://' . $baseParts['host'] . $parts['path'];
if (isset($parts['query'])) {
$absoluteUrl .= '?' . $parts['query'];
}
if (isset($parts['fragment'])) { if (isset($parts['fragment'])) {
$absoluteUrl .= '#' . $parts['fragment']; $absoluteUrl .= '#' . $parts['fragment'];
} }
...@@ -1033,6 +1134,7 @@ class Unl_Migration_Tool ...@@ -1033,6 +1134,7 @@ class Unl_Migration_Tool
$data = array( $data = array(
'content' => $content, 'content' => $content,
'contentType' => $meta['content_type'], 'contentType' => $meta['content_type'],
'headers' => $headers,
); );
if ($this->_frontierPath) { if ($this->_frontierPath) {
...@@ -1047,8 +1149,10 @@ class Unl_Migration_Tool ...@@ -1047,8 +1149,10 @@ class Unl_Migration_Tool
// Convert non-UTF-8 data to UTF-8. // Convert non-UTF-8 data to UTF-8.
if (preg_match('/charset=(.*);?/', $data['contentType'], $matches)) { if (preg_match('/charset=(.*);?/', $data['contentType'], $matches)) {
$charset = $matches[1]; $charset = $matches[1];
if ($charset != 'UTF-8') {
$data['content'] = iconv($charset, 'UTF-8', $data['content']); $data['content'] = iconv($charset, 'UTF-8', $data['content']);
} }
}
return $data; return $data;
} }
...@@ -1207,6 +1311,18 @@ class Unl_Migration_Tool ...@@ -1207,6 +1311,18 @@ class Unl_Migration_Tool
return $content; return $content;
} }
private function _get_liferay_content_area($html) {
if (!$this->_useLiferayCode) {
return FALSE;
}
return $this->_get_text_between_tokens(
$html,
"<!-- End of shared left start of right -->\n<div class=\"three_col right\">",
'<form action="" method="post" name="hrefFm">'
);
}
private function _get_text_between_tokens($text, $start_token, $end_token, $tidy_output = TRUE) { private function _get_text_between_tokens($text, $start_token, $end_token, $tidy_output = TRUE) {
$content_start = strpos($text, $start_token); $content_start = strpos($text, $start_token);
$content_end = strpos($text, $end_token, $content_start); $content_end = strpos($text, $end_token, $content_start);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment