test_lib.js
const js_beautify = require('js-beautify')
// https://stackoverflow.com/questions/25753368/performant-parsing-of-html-pages-with-node-js-and-xpath/25971812#25971812
// Not using because too broken.
// https://github.com/hieuvp/xpath-html/issues/10
//const xpath = require("xpath-html");
const parse5 = require('parse5');
const xmlserializer = require('xmlserializer');
const xmldom = require('xmldom').DOMParser;
const xpath = require('xpath');
const assert = require('assert');
function xpath_html(html, xpathStr) {
const document = parse5.parse(html);
const xhtml = xmlserializer.serializeToString(document);
const doc = new xmldom().parseFromString(xhtml);
const select = xpath.useNamespaces({"x": "http://www.w3.org/1999/xhtml"});
return select(xpathStr, doc);
}
function assert_xpath(xpath_expr, html, options={}) {
const xpath_matches = xpath_html(html, xpath_expr);
if (!('count' in options)) {
options.count = 1;
}
if (!('stdout' in options)) {
options.stdout = true;
}
if (!('message' in options)) {
options.message = '';
}
if (xpath_matches.length !== options.count) {
let count_str
if (options.count === 1) {
count_str = ''
} else {
count_str = ` count=${options.count}`
}
console.error(`assert_xpath${options.stdout ? '_stdout' : ''}${count_str}: ` + options.message);
console.error('xpath: ' + xpath_expr);
console.error('html:');
console.error(js_beautify.html(html));
assert.strictEqual(xpath_matches.length, options.count);
}
}
// xpath to match the parent div of a given header.
function xpath_header(n, id, insideH, opts={}) {
if (insideH) {
insideH = '//' + insideH
} else {
insideH = ''
}
const { hasToc } = opts
// The horror:
// https://stackoverflow.com/questions/1604471/how-can-i-find-an-element-by-css-class-with-xpath
let ret = `//x:div[(@class='h' or contains(@class, 'h '))`
if (id) {
ret += ` and @id='${id}'`
}
if (n <= 6) {
ret += ` and .//x:h${n}${insideH}`
} else {
ret += ` and .//x:h6[@data-level="${n}"]`
}
if (hasToc !== undefined) {
if (hasToc) {
ret += ` and @data-has-toc="1"`
} else {
ret += ` and not(@data-has-toc)`
}
}
ret += ']'
return ret
}
// xpath to match the split/nosplit link inside of a header.
function xpath_header_split(n, id, href, marker) {
let href_xpath
if (href === undefined) {
href_xpath = ''
} else {
href_xpath = `@href='${href}' and `
}
return `${xpath_header(n, id)}//x:a[${href_xpath}@class='${marker}']`;
}
// xpath to match the parent link inside of a header.
function xpath_header_parent(n, id, href, title) {
return `${xpath_header(n, id)}//x:a[@href='${href}' and text()=' ${title}']`;
}
module.exports = {
assert_xpath,
xpath_header,
xpath_header_split,
xpath_header_parent,
}