Merge pull request #888 from hypothesis/modernize-fake-pdf-viewer

Modernize PDFViewerApplication fake

Merge pull request #888 from hypothesis/modernize-fake-pdf-viewer
Modernize PDFViewerApplication fake
7f0a2338 · Robert Knight · GitHub · d53e41e8 · dea7c287 · 7f0a2338
Unverified Commit 7f0a2338 authored Feb 01, 2019 by Robert Knight Committed by GitHub Feb 01, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 154 additions and 97 deletions

fake-pdf-viewer-application.js src/annotator/anchoring/test/fake-pdf-viewer-application.js +142 -85

pdf-test.js src/annotator/anchoring/test/pdf-test.js +12 -12

No files found.
--- a/src/annotator/anchoring/test/fake-pdf-viewer-application.js
+++ b/src/annotator/anchoring/test/fake-pdf-viewer-application.js
 'use strict';

-const RenderingStates = require('../../pdfjs-rendering-states');
-
-/**
- * @typedef {Object} Options
- * @property {Element} container - The container into which the fake PDF viewer
- *           should render the content
- * @property {string[]} content - Array of strings containing the text for each
- *           page
- */
-
 /**
- * A minimal fake implementation of PDF.js' PDFViewerApplication interface.
+ * Fake implementation of the parts of PDF.js that the Hypothesis client's
+ * anchoring interacts with.
 *
- * This emulates the parts of PDFViewerApplication that are relevant to
- * anchoring tests.
+ * This is used to create PDF anchoring tests which can execute quickly and be
+ * easier to debug than the full PDF.js viewer application.
 *
- * @param {Options} options
+ * The general structure is to have `Fake{OriginalClassName}` classes for
+ * each of the relevant classes in PDF.js.
 */
-function FakePDFViewerApplication(options) {
-  function checkBounds(index) {
-    if (index < 0 || index >= options.content.length) {
-      throw new Error('Invalid page index ' + index.toString());
-    }
-  }
-
-  this._checkBounds = checkBounds;
-  this._content = options.content;
-  this._container = options.container;
-  this._pages = [];
-
-  const self = this;
-
-  this.pdfViewer = {
-    pagesCount: options.content.length,
-
-    getPageView: function (index) {
-      checkBounds(index);
-
-      const page = self._pages[index];
-      const textLayerEl = page.querySelector('.textLayer');
-
-      return {
-        div: page,
-        textLayer: textLayerEl ?
-          { textLayerDiv: textLayerEl, renderingDone: true } : null,
-        renderingState: textLayerEl ? RenderingStates.FINISHED : RenderingStates.INITIAL,
-      };
-    },
-
-    getPageTextContent: function (index) {
-      checkBounds(index);
-      return Promise.resolve({
-        // The way that the page text is split into items will depend on
-        // the PDF and the version of PDF.js - individual text items might be
-        // just symbols, words, phrases or whole lines.
-        //
-        // Here we split items by line which matches the typical output for a
-        // born-digital PDF.
-        items: options.content[index].split(/\n/).map(function (line) {
-          return {str: line};
-        }),
-      });
-    },
-  };
-}

-/**
- * Remove the DOM elements created by the PDF viewer and cleanup any timeouts etc.
- */
-FakePDFViewerApplication.prototype.dispose = function () {
-  this._pages.forEach(function (pageEl) {
-    pageEl.remove();
-  });
-};
+const RenderingStates = require('../../pdfjs-rendering-states');

 /**
 * Create the DOM structure for a page which matches the structure produced by
@@ -95,7 +33,7 @@ function createPage(content, rendered) {
  const textLayer = document.createElement('div');
  textLayer.classList.add('textLayer');

-  content.split(/\n/).forEach(function (item) {
+  content.split(/\n/).forEach((item) => {
    const itemEl = document.createElement('div');
    itemEl.textContent = item;
    textLayer.appendChild(itemEl);
@@ -106,25 +44,144 @@ function createPage(content, rendered) {
 }

 /**
- * Set the index of the page which is currently visible in the viewport.
+ * @typedef FakePDFPageViewOptions
+ * @prop [boolean] rendered - Whether this page is "rendered", as if it were
+ *   near the viewport, or not.
+ */
+
+/**
+ * Fake implementation of PDF.js `PDFPageView` class.
 *
- * Pages from `index` up to and including `lastRenderedPage` will be
- * "rendered" and have a text layer available. Other pages will be "un-rendered"
- * with no text layer available, but only a placeholder element for the whole
- * page.
+ * The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/pdf_page_view.js
 */
-FakePDFViewerApplication.prototype.setCurrentPage = function (index, lastRenderedPage=index) {
-  const self = this;
+class FakePDFPageView {
+  /**
+   * @param {string} text - Text of the page
+   * @param {FakePDFPageViewOptions} options
+   */
+  constructor(text, options) {
+    const pageEl = createPage(text, options.rendered);
+    const textLayerEl = pageEl.querySelector('.textLayer');
+
+    this.div = pageEl;
+    this.textLayer = textLayerEl
+      ? { textLayerDiv: textLayerEl, renderingDone: true }
+      : null;
+    this.renderingState = textLayerEl
+      ? RenderingStates.FINISHED
+      : RenderingStates.INITIAL;
+  }

-  this._checkBounds(index);
+  dispose() {
+    this.div.remove();
+  }
+}

-  const pages = this._content.map(function (text, idx) {
-    return createPage(text, idx >= index && idx <= lastRenderedPage);
-  });
+/**
+ * Fake implementation of PDF.js' `PDFViewer` class.
+ *
+ * The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/pdf_viewer.js
+ */
+class FakePDFViewer {
+  /**
+   * @param {Options} options
+   */
+  constructor(options) {
+    this._container = options.container;
+    this._content = options.content;
+
+    /** @type {FakePDFPageView} */
+    this._pages = [];
+  }

-  this._container.innerHTML = '';
-  this._pages = pages;
-  this._pages.forEach(function (p) { self._container.appendChild(p); });
-};
+  get pagesCount() {
+    return this._content.length;
+  }
+
+  getPageView(index) {
+    this._checkBounds(index);
+    return this._pages[index];
+  }
+
+  getPageTextContent(index) {
+    this._checkBounds(index);
+    return Promise.resolve({
+      // The way that the page text is split into items will depend on
+      // the PDF and the version of PDF.js - individual text items might be
+      // just symbols, words, phrases or whole lines.
+      //
+      // Here we split items by line which matches the typical output for a
+      // born-digital PDF.
+      items: this._content[index].split(/\n/).map(line => ({ str: line })),
+    });
+  }
+
+  /**
+   * Set the index of the page which is currently visible in the viewport.
+   *
+   * Pages from `index` up to and including `lastRenderedPage` will be
+   * "rendered" and have a text layer available. Other pages will be "un-rendered"
+   * with no text layer available, but only a placeholder element for the whole
+   * page.
+   */
+  setCurrentPage(index, lastRenderedPage = index) {
+    this._checkBounds(index);
+
+    const pages = this._content.map(
+      (text, idx) =>
+        new FakePDFPageView(text, {
+          rendered: idx >= index && idx <= lastRenderedPage,
+        })
+    );
+
+    this._container.innerHTML = '';
+    this._pages = pages;
+    this._pages.forEach(page => this._container.appendChild(page.div));
+  }
+
+  dispose() {
+    this._pages.forEach(page => page.dispose());
+  }
+
+  _checkBounds(index) {
+    if (index < 0 || index >= this._content.length) {
+      throw new Error('Invalid page index ' + index.toString());
+    }
+  }
+}
+
+/**
+ * @typedef {Object} Options
+ * @property {Element} container - The container into which the fake PDF viewer
+ *           should render the content
+ * @property {string[]} content - Array of strings containing the text for each
+ *           page
+ */
+
+/**
+ * A minimal fake implementation of PDF.js' PDFViewerApplication interface.
+ *
+ * This emulates the parts of PDF.js that are relevant to anchoring tests.
+ *
+ * The original is defined at https://github.com/mozilla/pdf.js/blob/master/web/app.js
+ */
+class FakePDFViewerApplication {
+  /**
+   * @param {Options} options
+   */
+  constructor(options) {
+    this.pdfViewer = new FakePDFViewer({
+      content: options.content,
+      container: options.container,
+    });
+  }
+
+  /**
+   * Remove any DOM elements, timers etc. created by the fake PDF viewer.
+   */
+  dispose() {
+    this.pdfViewer.dispose();
+  }
+}

 module.exports = FakePDFViewerApplication;
--- a/src/annotator/anchoring/test/pdf-test.js
+++ b/src/annotator/anchoring/test/pdf-test.js
@@ -51,7 +51,7 @@ describe('annotator.anchoring.pdf', function () {
      container: container,
      content: fixtures.pdfPages,
    });
-    viewer.setCurrentPage(0);
+    viewer.pdfViewer.setCurrentPage(0);
  });

  afterEach(function () {
@@ -62,7 +62,7 @@ describe('annotator.anchoring.pdf', function () {

  describe('#describe', function () {
    it('returns position and quote selectors', function () {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const range = findText(container, 'Netherfield Park');
      return pdfAnchoring.describe(container, range).then(function (selectors) {
        const types = selectors.map(function (s) { return s.type; });
@@ -71,7 +71,7 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('returns a position selector with correct start/end offsets', function () {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const quote = 'Netherfield Park';
      const range = findText(container, quote);
      const contentStr = fixtures.pdfPages.join('');
@@ -85,7 +85,7 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('returns a quote selector with the correct quote', function () {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const range = findText(container, 'Netherfield Park');
      return pdfAnchoring.describe(container, range).then(function (selectors) {
        const quote = selectors[1];
@@ -108,7 +108,7 @@ describe('annotator.anchoring.pdf', function () {
      // parent set, this fails.


-      viewer.setCurrentPage(3);
+      viewer.pdfViewer.setCurrentPage(3);

      const quote = 'NODE B';

@@ -136,7 +136,7 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('rejects when text selection spans multiple pages', () => {
-      viewer.setCurrentPage(2, 3);
+      viewer.pdfViewer.setCurrentPage(2, 3);
      const range = findText(container, 'occupied again? NODE A');

      return pdfAnchoring.describe(container, range).catch(err => {
@@ -147,7 +147,7 @@ describe('annotator.anchoring.pdf', function () {

  describe('#anchor', function () {
    it('anchors previously created selectors if the page is rendered', function () {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const range = findText(container, 'My dear Mr. Bennet');
      return pdfAnchoring.describe(container, range).then(function (selectors) {
        const position = selectors[0];
@@ -186,7 +186,7 @@ describe('annotator.anchoring.pdf', function () {
      offset: 100000,
    }].forEach(({ offset }) => {
      it('anchors using a quote if the position selector fails', function () {
-        viewer.setCurrentPage(0);
+        viewer.pdfViewer.setCurrentPage(0);
        const range = findText(container, 'Pride And Prejudice');
        return pdfAnchoring.describe(container, range).then(function (selectors) {
          const position = selectors[0];
@@ -203,10 +203,10 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('anchors to a placeholder element if the page is not rendered', function () {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const range = findText(container, 'Netherfield Park');
      return pdfAnchoring.describe(container, range).then(function (selectors) {
-        viewer.setCurrentPage(0);
+        viewer.pdfViewer.setCurrentPage(0);
        return pdfAnchoring.anchor(container, selectors);
      }).then(function (anchoredRange) {
        assert.equal(anchoredRange.toString(), 'Loading annotations…');
@@ -214,7 +214,7 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('rejects if quote cannot be anchored', () => {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const selectors = [{
        type: 'TextQuoteSelector',
        exact: 'phrase that does not exist in the PDF',
@@ -226,7 +226,7 @@ describe('annotator.anchoring.pdf', function () {
    });

    it('re-anchors successfully using caches', () => {
-      viewer.setCurrentPage(2);
+      viewer.pdfViewer.setCurrentPage(2);
      const range = findText(container, 'said his lady');
      let selectors;
      return pdfAnchoring.describe(container, range).then(selectors_ => {