Skip to content
Snippets Groups Projects
Commit 287b2130 authored by Kevin Abel's avatar Kevin Abel
Browse files

Switch to using cheerio for DOM parsing.

It is much faster than the jsdom + jquery combo.
parent bffd9d34
No related branches found
No related tags found
No related merge requests found
......@@ -8,8 +8,7 @@
"author": "UNL Web Developer Network <wdn@unl.edu> (http://wdn.unl.edu/)",
"license": "BSD-3-Clause",
"dependencies": {
"jquery": "^2.1.4",
"jsdom": "^7.1.1",
"cheerio": "^0.19.0",
"negotiator": "^0.6.0",
"request": "^2.67.0"
},
......
......@@ -3,12 +3,7 @@ var http = require('http');
var request = require('request');
var Neg = require('negotiator');
var url = require('url');
var jsdom = require("jsdom").jsdom;
require("jsdom").defaultDocumentFeatures = {
FetchExternalResources: false,
ProcessExternalResources: false
};
var cheerio = require('cheerio');
var port = process.env.npm_package_config_port || 8080;
var allowedDomainsConf = process.env.npm_package_config_allowedDomains;
......@@ -51,26 +46,23 @@ function sendHtml(res, html, req) {
res.end(html);
}
function parseNavigation(html, baseUrl, callback) {
try {
var doc = jsdom(html);
var global = doc.defaultView;
var $ = require('jquery')(global);
function parseNavigation(html, baseUrl) {
var $ = cheerio.load(html);
// check for a document provided URL base
var baseElement;
baseElement = $('head > base')[0];
baseElement = $('head > base').first();
if (baseElement) {
baseUrl = url.resolve(baseUrl, baseElement.getAttribute('href'));
if (baseElement.length) {
baseUrl = url.resolve(baseUrl, baseElement.attr('href'));
}
// find the navigation list element
var navListElement;
navListElement = $('#navigation > ul, #navigation > * > ul')[0];
navListElement = $('#navigation > ul, #navigation > * > ul').first();
if (!navListElement) {
callback(true, false);
if (!navListElement.length) {
throw 'No navigation list found.';
}
var elements, element, realUrl;
......@@ -82,15 +74,12 @@ function parseNavigation(html, baseUrl, callback) {
// resolve the elements' href attributes using the baseUrl
elements = $('[href]', navListElement);
elements.each(function() {
element = this;
realUrl = url.resolve(baseUrl, element.getAttribute('href'));
element.setAttribute('href', realUrl);
element = $(this);
realUrl = url.resolve(baseUrl, element.attr('href'));
element.attr('href', realUrl);
});
callback(false, navListElement.outerHTML);
} catch (err) {
callback('Bad navigation HTML found. ' + err, false);
}
return navListElement.html();
}
function getGz (req) {
......@@ -150,16 +139,16 @@ http.createServer(function handler(req, res) {
}
var startTime = Date.now();
var output;
parseNavigation(body, clientReq.uri.href, function(err, output) {
console.log('processed navigation in %dms (%dms)', Date.now() - startTime, clientRes.elapsedTime);
if (err) {
send404(res, err, req);
return;
}
try {
output = parseNavigation(body, clientReq.uri.href);
console.log('processed navigation for "%s" in %dms (%dms)', clientReq.uri.href, Date.now() - startTime, clientRes.elapsedTime);
sendHtml(res, output, req);
});
} catch (parseError) {
send404(res, 'Bad navigation HTML found. ' + parseError, req);
}
});
}).listen(port);
console.log('Navigation Proxy listening on port ' + port);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment