Commit 8e200e68 authored by Sean Roberts's avatar Sean Roberts Committed by GitHub

Merge pull request #154 from hypothesis/pdf-anchoring-test

Add PDF anchoring unit tests
parents 8f7925a4 e0da5628
'use strict';
var RenderingStates = require('../../pdfjs-rendering-states');
/**
* @typedef {Object} Options
* @property {Element} container - The container into which the fake PDF viewer
* should render the content
* @property {string[]} content - Array of strings containing the text for each
* page
*/
/**
* A minimal fake implementation of PDF.js' PDFViewerApplication interface.
*
* This emulates the parts of PDFViewerApplication that are relevant to
* anchoring tests.
*
* @param {Options} options
*/
function FakePDFViewerApplication(options) {
function checkBounds(index) {
if (index < 0 || index >= options.content.length) {
throw new Error('Invalid page index ' + index.toString());
}
}
this._checkBounds = checkBounds;
this._content = options.content;
this._container = options.container;
this._pages = [];
var self = this;
this.pdfViewer = {
pagesCount: options.content.length,
getPageView: function (index) {
checkBounds(index);
var page = self._pages[index];
var textLayerEl = page.querySelector('.textLayer');
return {
div: page,
textLayer: textLayerEl ?
{ textLayerDiv: textLayerEl, renderingDone: true } : null,
renderingState: textLayerEl ? RenderingStates.FINISHED : RenderingStates.INITIAL,
};
},
getPageTextContent: function (index) {
checkBounds(index);
return Promise.resolve({
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: options.content[index].split(/\n/).map(function (line) {
return {str: line};
}),
});
},
};
}
/**
* Remove the DOM elements created by the PDF viewer and cleanup any timeouts etc.
*/
FakePDFViewerApplication.prototype.dispose = function () {
this._pages.forEach(function (pageEl) {
pageEl.remove();
});
};
/**
* Create the DOM structure for a page which matches the structure produced by
* PDF.js
*
* @param {string} content - The text content for the page
* @param {boolean} rendered - True if the page should be "rendered" or false if
* it should be an empty placeholder for a not-yet-rendered page
* @return {Element} - The root Element for the page
*/
function createPage(content, rendered) {
var pageEl = document.createElement('div');
pageEl.classList.add('page');
if (!rendered) {
return pageEl;
}
var textLayer = document.createElement('div');
textLayer.classList.add('textLayer');
content.split(/\n/).forEach(function (item) {
var itemEl = document.createElement('div');
itemEl.textContent = item;
textLayer.appendChild(itemEl);
});
pageEl.appendChild(textLayer);
return pageEl;
}
/**
* Set the index of the page which is currently visible in the viewport.
*
* The page which is visible will be "rendered" and have a text layer available.
* For other pages, there will only be a placeholder element for the whole page.
*/
FakePDFViewerApplication.prototype.setCurrentPage = function (index) {
var self = this;
this._checkBounds(index);
var pages = this._content.map(function (text, idx) {
return createPage(text, idx === index /* rendered */);
});
this._container.innerHTML = '';
this._pages = pages;
this._pages.forEach(function (p) { self._container.appendChild(p); });
};
module.exports = FakePDFViewerApplication;
'use strict';
var TextQuoteAnchor = require('dom-anchor-text-quote');
var FakePDFViewerApplication = require('./fake-pdf-viewer-application');
var pdfAnchoring = require('../pdf');
/**
* Return a DOM Range which refers to the specified `text` in `container`.
*
* @param {Element} container
* @param {string} text
* @return {Range}
*/
function findText(container, text) {
return new TextQuoteAnchor(container, text).toRange();
}
var fixtures = {
// Each item in this list contains the text for one page of the "PDF"
pdfContent: [
'Pride And Prejudice And Zombies\n' +
'By Jane Austin and Seth Grahame-Smith ',
'IT IS A TRUTH universally acknowledged that a zombie in possession of\n' +
'brains must be in want of more brains. Never was this truth more plain\n' +
'than during the recent attacks at Netherfield Park, in which a household\n' +
'of eighteen was slaughtered and consumed by a horde of the living dead.',
'"My dear Mr. Bennet," said his lady to him one day, "have you heard that\n' +
'Netherfield Park is occupied again?" ',
],
};
describe('PDF anchoring', function () {
var container;
var viewer;
beforeEach(function () {
// The rendered text for each page is cached during anchoring.
// Clear this here so that each test starts from the same state.
pdfAnchoring.purgeCache();
container = document.createElement('div');
document.body.appendChild(container);
window.PDFViewerApplication = viewer = new FakePDFViewerApplication({
container: container,
content: fixtures.pdfContent,
});
viewer.setCurrentPage(0);
});
afterEach(function () {
window.PDFViewerApplication.dispose();
window.PDFViewerApplication = null;
container.remove();
});
describe('#describe', function () {
it('returns position and quote selectors', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var types = selectors.map(function (s) { return s.type; });
assert.deepEqual(types, ['TextPositionSelector', 'TextQuoteSelector']);
});
});
it('returns a position selector with correct start/end offsets', function () {
viewer.setCurrentPage(2);
var quote = 'Netherfield Park';
var range = findText(container, quote);
var contentStr = fixtures.pdfContent.join('');
var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote);
return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0];
assert.equal(position.start, expectedPos);
assert.equal(position.end, expectedPos + quote.length);
});
});
it('returns a quote selector with the correct quote', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var quote = selectors[1];
assert.deepEqual(quote, {
type: 'TextQuoteSelector',
exact: 'Netherfield Park',
prefix: 'im one day, "have you heard that',
suffix: ' is occupied again?" ',
});
});
});
});
describe('#anchor', function () {
it('anchors previously created selectors if the page is rendered', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0];
var quote = selectors[1];
// Test that all of the selectors anchor and that each selector individually
// anchors correctly as well
var subsets = [
[position, quote],
[position],
// FIXME - Anchoring a quote on its own does not currently work
// [quote],
];
var subsetsAnchored = subsets.map(function (subset) {
var types = subset.map(function (s) { return s.type; });
var description = 'anchoring failed with ' + types.join(', ');
return pdfAnchoring.anchor(container, subset).then(function (anchoredRange) {
assert.equal(anchoredRange.toString(), range.toString(), description);
}).catch(function (err) {
console.warn(description);
throw err;
});
});
return Promise.all(subsetsAnchored);
});
});
it('anchors to a placeholder element if the page is not rendered', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
viewer.setCurrentPage(0);
return pdfAnchoring.anchor(container, selectors);
}).then(function (anchoredRange) {
assert.equal(anchoredRange.toString(), 'Loading annotations…');
});
});
});
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment