Unverified Commit 7f0a2338 authored by Robert Knight's avatar Robert Knight Committed by GitHub

Merge pull request #888 from hypothesis/modernize-fake-pdf-viewer

Modernize PDFViewerApplication fake
parents d53e41e8 dea7c287
'use strict';
const RenderingStates = require('../../pdfjs-rendering-states');
/**
* @typedef {Object} Options
* @property {Element} container - The container into which the fake PDF viewer
* should render the content
* @property {string[]} content - Array of strings containing the text for each
* page
*/
/**
* A minimal fake implementation of PDF.js' PDFViewerApplication interface.
* Fake implementation of the parts of PDF.js that the Hypothesis client's
* anchoring interacts with.
*
* This emulates the parts of PDFViewerApplication that are relevant to
* anchoring tests.
* This is used to create PDF anchoring tests which can execute quickly and be
* easier to debug than the full PDF.js viewer application.
*
* @param {Options} options
* The general structure is to have `Fake{OriginalClassName}` classes for
* each of the relevant classes in PDF.js.
*/
function FakePDFViewerApplication(options) {
function checkBounds(index) {
if (index < 0 || index >= options.content.length) {
throw new Error('Invalid page index ' + index.toString());
}
}
this._checkBounds = checkBounds;
this._content = options.content;
this._container = options.container;
this._pages = [];
const self = this;
this.pdfViewer = {
pagesCount: options.content.length,
getPageView: function (index) {
checkBounds(index);
const page = self._pages[index];
const textLayerEl = page.querySelector('.textLayer');
return {
div: page,
textLayer: textLayerEl ?
{ textLayerDiv: textLayerEl, renderingDone: true } : null,
renderingState: textLayerEl ? RenderingStates.FINISHED : RenderingStates.INITIAL,
};
},
getPageTextContent: function (index) {
checkBounds(index);
return Promise.resolve({
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: options.content[index].split(/\n/).map(function (line) {
return {str: line};
}),
});
},
};
}
/**
* Remove the DOM elements created by the PDF viewer and cleanup any timeouts etc.
*/
FakePDFViewerApplication.prototype.dispose = function () {
this._pages.forEach(function (pageEl) {
pageEl.remove();
});
};
const RenderingStates = require('../../pdfjs-rendering-states');
/**
* Create the DOM structure for a page which matches the structure produced by
......@@ -95,7 +33,7 @@ function createPage(content, rendered) {
const textLayer = document.createElement('div');
textLayer.classList.add('textLayer');
content.split(/\n/).forEach(function (item) {
content.split(/\n/).forEach((item) => {
const itemEl = document.createElement('div');
itemEl.textContent = item;
textLayer.appendChild(itemEl);
......@@ -106,25 +44,144 @@ function createPage(content, rendered) {
}
/**
* Set the index of the page which is currently visible in the viewport.
* @typedef FakePDFPageViewOptions
* @prop [boolean] rendered - Whether this page is "rendered", as if it were
* near the viewport, or not.
*/
/**
* Fake implementation of PDF.js `PDFPageView` class.
*
* Pages from `index` up to and including `lastRenderedPage` will be
* "rendered" and have a text layer available. Other pages will be "un-rendered"
* with no text layer available, but only a placeholder element for the whole
* page.
* The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/pdf_page_view.js
*/
FakePDFViewerApplication.prototype.setCurrentPage = function (index, lastRenderedPage=index) {
const self = this;
class FakePDFPageView {
/**
* @param {string} text - Text of the page
* @param {FakePDFPageViewOptions} options
*/
constructor(text, options) {
const pageEl = createPage(text, options.rendered);
const textLayerEl = pageEl.querySelector('.textLayer');
this.div = pageEl;
this.textLayer = textLayerEl
? { textLayerDiv: textLayerEl, renderingDone: true }
: null;
this.renderingState = textLayerEl
? RenderingStates.FINISHED
: RenderingStates.INITIAL;
}
this._checkBounds(index);
dispose() {
this.div.remove();
}
}
const pages = this._content.map(function (text, idx) {
return createPage(text, idx >= index && idx <= lastRenderedPage);
});
/**
* Fake implementation of PDF.js' `PDFViewer` class.
*
* The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/pdf_viewer.js
*/
class FakePDFViewer {
/**
* @param {Options} options
*/
constructor(options) {
this._container = options.container;
this._content = options.content;
/** @type {FakePDFPageView} */
this._pages = [];
}
this._container.innerHTML = '';
this._pages = pages;
this._pages.forEach(function (p) { self._container.appendChild(p); });
};
get pagesCount() {
return this._content.length;
}
getPageView(index) {
this._checkBounds(index);
return this._pages[index];
}
getPageTextContent(index) {
this._checkBounds(index);
return Promise.resolve({
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: this._content[index].split(/\n/).map(line => ({ str: line })),
});
}
/**
* Set the index of the page which is currently visible in the viewport.
*
* Pages from `index` up to and including `lastRenderedPage` will be
* "rendered" and have a text layer available. Other pages will be "un-rendered"
* with no text layer available, but only a placeholder element for the whole
* page.
*/
setCurrentPage(index, lastRenderedPage = index) {
this._checkBounds(index);
const pages = this._content.map(
(text, idx) =>
new FakePDFPageView(text, {
rendered: idx >= index && idx <= lastRenderedPage,
})
);
this._container.innerHTML = '';
this._pages = pages;
this._pages.forEach(page => this._container.appendChild(page.div));
}
dispose() {
this._pages.forEach(page => page.dispose());
}
_checkBounds(index) {
if (index < 0 || index >= this._content.length) {
throw new Error('Invalid page index ' + index.toString());
}
}
}
/**
* @typedef {Object} Options
* @property {Element} container - The container into which the fake PDF viewer
* should render the content
* @property {string[]} content - Array of strings containing the text for each
* page
*/
/**
* A minimal fake implementation of PDF.js' PDFViewerApplication interface.
*
* This emulates the parts of PDF.js that are relevant to anchoring tests.
*
* The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/app.js
*/
class FakePDFViewerApplication {
/**
* @param {Options} options
*/
constructor(options) {
this.pdfViewer = new FakePDFViewer({
content: options.content,
container: options.container,
});
}
/**
* Remove any DOM elements, timers etc. created by the fake PDF viewer.
*/
dispose() {
this.pdfViewer.dispose();
}
}
module.exports = FakePDFViewerApplication;
......@@ -51,7 +51,7 @@ describe('annotator.anchoring.pdf', function () {
container: container,
content: fixtures.pdfPages,
});
viewer.setCurrentPage(0);
viewer.pdfViewer.setCurrentPage(0);
});
afterEach(function () {
......@@ -62,7 +62,7 @@ describe('annotator.anchoring.pdf', function () {
describe('#describe', function () {
it('returns position and quote selectors', function () {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
const types = selectors.map(function (s) { return s.type; });
......@@ -71,7 +71,7 @@ describe('annotator.anchoring.pdf', function () {
});
it('returns a position selector with correct start/end offsets', function () {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const quote = 'Netherfield Park';
const range = findText(container, quote);
const contentStr = fixtures.pdfPages.join('');
......@@ -85,7 +85,7 @@ describe('annotator.anchoring.pdf', function () {
});
it('returns a quote selector with the correct quote', function () {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
const quote = selectors[1];
......@@ -108,7 +108,7 @@ describe('annotator.anchoring.pdf', function () {
// parent set, this fails.
viewer.setCurrentPage(3);
viewer.pdfViewer.setCurrentPage(3);
const quote = 'NODE B';
......@@ -136,7 +136,7 @@ describe('annotator.anchoring.pdf', function () {
});
it('rejects when text selection spans multiple pages', () => {
viewer.setCurrentPage(2, 3);
viewer.pdfViewer.setCurrentPage(2, 3);
const range = findText(container, 'occupied again? NODE A');
return pdfAnchoring.describe(container, range).catch(err => {
......@@ -147,7 +147,7 @@ describe('annotator.anchoring.pdf', function () {
describe('#anchor', function () {
it('anchors previously created selectors if the page is rendered', function () {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'My dear Mr. Bennet');
return pdfAnchoring.describe(container, range).then(function (selectors) {
const position = selectors[0];
......@@ -186,7 +186,7 @@ describe('annotator.anchoring.pdf', function () {
offset: 100000,
}].forEach(({ offset }) => {
it('anchors using a quote if the position selector fails', function () {
viewer.setCurrentPage(0);
viewer.pdfViewer.setCurrentPage(0);
const range = findText(container, 'Pride And Prejudice');
return pdfAnchoring.describe(container, range).then(function (selectors) {
const position = selectors[0];
......@@ -203,10 +203,10 @@ describe('annotator.anchoring.pdf', function () {
});
it('anchors to a placeholder element if the page is not rendered', function () {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(function (selectors) {
viewer.setCurrentPage(0);
viewer.pdfViewer.setCurrentPage(0);
return pdfAnchoring.anchor(container, selectors);
}).then(function (anchoredRange) {
assert.equal(anchoredRange.toString(), 'Loading annotations…');
......@@ -214,7 +214,7 @@ describe('annotator.anchoring.pdf', function () {
});
it('rejects if quote cannot be anchored', () => {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const selectors = [{
type: 'TextQuoteSelector',
exact: 'phrase that does not exist in the PDF',
......@@ -226,7 +226,7 @@ describe('annotator.anchoring.pdf', function () {
});
it('re-anchors successfully using caches', () => {
viewer.setCurrentPage(2);
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'said his lady');
let selectors;
return pdfAnchoring.describe(container, range).then(selectors_ => {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment