Merge pull request #887 from hypothesis/convert-pdf-anchoring-to-js

Convert PDF anchoring code to JS

Merge pull request #887 from hypothesis/convert-pdf-anchoring-to-js
Convert PDF anchoring code to JS
d53e41e8 · Robert Knight · GitHub · c3faed17 · 9cfdc3c2 · c3faed17
Unverified Commit d53e41e8 authored Feb 01, 2019 by Robert Knight Committed by GitHub Feb 01, 2019
5 changed files
--- a/src/annotator/anchoring/pdf.coffee
+++ b/src/annotator/anchoring/pdf.coffee
-seek = require('dom-seek')
-
-# `dom-node-iterator` polyfills optional arguments of `createNodeIterator`
-# and properties of the returned `NodeIterator` for IE 11 compatibility.
-createNodeIterator = require('dom-node-iterator/polyfill')()
-xpathRange = require('./range')
-
-html = require('./html')
-RenderingStates = require('../pdfjs-rendering-states')
-{TextPositionAnchor, TextQuoteAnchor} = require('./types')
-
-# Caches for performance
-
-# Map of page index to page text content as a `Promise<string>`
-pageTextCache = {}
-# Two-dimensional map from `[quote][position]` to `{page, anchor}` intended to
-# optimize re-anchoring of a pair of quote and position selectors if the
-# position selector fails to anchor on its own.
-quotePositionCache = {}
-
-
-getSiblingIndex = (node) ->
-  siblings = Array.prototype.slice.call(node.parentNode.childNodes)
-  return siblings.indexOf(node)
-
-
-getNodeTextLayer = (node) ->
-  until node.classList?.contains('page')
-    node = node.parentNode
-  return node.getElementsByClassName('textLayer')[0]
-
-
-getPage = (pageIndex) ->
-  return PDFViewerApplication.pdfViewer.getPageView(pageIndex)
-
-
-getPageTextContent = (pageIndex) ->
-  if pageTextCache[pageIndex]?
-    return pageTextCache[pageIndex]
-  else
-    joinItems = ({items}) ->
-      # Skip empty items since PDF-js leaves their text layer divs blank.
-      # Excluding them makes our measurements match the rendered text layer.
-      # Otherwise, the selectors we generate would not match this stored text.
-      # See the appendText method of TextLayerBuilder in pdf.js.
-      nonEmpty = (item.str for item in items when /\S/.test(item.str))
-      textContent = nonEmpty.join('')
-      return textContent
-
-    pageTextCache[pageIndex] = PDFViewerApplication.pdfViewer.getPageTextContent(pageIndex)
-    .then(joinItems)
-    return pageTextCache[pageIndex]
-
-
-# Return the offset in the text for the whole document at which the text for
-# `pageIndex` begins.
-getPageOffset = (pageIndex) ->
-  index = -1
-
-  next = (offset) ->
-    if ++index is pageIndex
-      return Promise.resolve(offset)
-
-    return getPageTextContent(index)
-    .then((textContent) -> next(offset + textContent.length))
-
-  return next(0)
-
-
-# Return an {index, offset, textContent} object for the page where the given
-# `offset` in the full text of the document occurs.
-findPage = (offset) ->
-  index = 0
-  total = 0
-
-  # We call `count` once for each page, in order. The passed offset is found on
-  # the first page where the cumulative length of the text content exceeds the
-  # offset value.
-  #
-  # When we find the page the offset is on, we return an object containing the
-  # page index, the offset at the start of that page, and the textContent of
-  # that page.
-  #
-  # To understand this a little better, here's a worked example. Imagine a
-  # document with the following page lengths:
-  #
-  #    Page 0 has length 100
-  #    Page 1 has length 50
-  #    Page 2 has length 50
-  #
-  # Then here are the pages that various offsets are found on:
-  #
-  #    offset | index
-  #    --------------
-  #    0      | 0
-  #    99     | 0
-  #    100    | 1
-  #    101    | 1
-  #    149    | 1
-  #    150    | 2
-  #
-  count = (textContent) ->
-    lastPageIndex = PDFViewerApplication.pdfViewer.pagesCount - 1
-    if total + textContent.length > offset or index == lastPageIndex
-      offset = total
-      return Promise.resolve({index, offset, textContent})
-    else
-      index++
-      total += textContent.length
-      return getPageTextContent(index).then(count)
-
-  return getPageTextContent(0).then(count)
-
-
-# Search for a position anchor within a page, creating a placeholder and
-# anchoring to that if the page is not rendered.
-anchorByPosition = (page, anchor, options) ->
-  renderingState = page.renderingState
-  renderingDone = page.textLayer?.renderingDone
-  if renderingState is RenderingStates.FINISHED and renderingDone
-    root = page.textLayer.textLayerDiv
-    selector = anchor.toSelector(options)
-    return html.anchor(root, [selector])
-  else
-    div = page.div ? page.el
-    placeholder = div.getElementsByClassName('annotator-placeholder')[0]
-    unless placeholder?
-      placeholder = document.createElement('span')
-      placeholder.classList.add('annotator-placeholder')
-      placeholder.textContent = 'Loading annotations…'
-      div.appendChild(placeholder)
-    range = document.createRange()
-    range.setStartBefore(placeholder)
-    range.setEndAfter(placeholder)
-    return range
-
-
-# Search for a quote (with optional position hint) in the given pages.
-# Returns a `Promise<Range>` for the location of the quote.
-findInPages = ([pageIndex, rest...], quote, position) ->
-  unless pageIndex?
-    return Promise.reject(new Error('Quote not found'))
-
-  attempt = (info) ->
-    # Try to find the quote in the current page.
-    [page, content, offset] = info
-    root = {textContent: content}
-    anchor = new TextQuoteAnchor.fromSelector(root, quote)
-    if position?
-      hint = position.start - offset
-      hint = Math.max(0, hint)
-      hint = Math.min(hint, content.length)
-      return anchor.toPositionAnchor({hint})
-    else
-      return anchor.toPositionAnchor()
-
-  next = ->
-    return findInPages(rest, quote, position)
-
-  cacheAndFinish = (anchor) ->
-    if position
-      quotePositionCache[quote.exact] ?= {}
-      quotePositionCache[quote.exact][position.start] = {page, anchor}
-    return anchorByPosition(page, anchor)
-
-  page = getPage(pageIndex)
-  content = getPageTextContent(pageIndex)
-  offset = getPageOffset(pageIndex)
-
-  return Promise.all([page, content, offset])
-  .then(attempt)
-  .then(cacheAndFinish)
-  .catch(next)
-
-
-# When a position anchor is available, quote search can prioritize pages by
-# the position, searching pages outward starting from the page containing the
-# expected offset. This should speed up anchoring by searching fewer pages.
-prioritizePages = (position) ->
-  {pagesCount} = PDFViewerApplication.pdfViewer
-  pageIndices = [0...pagesCount]
-
-  sort = (pageIndex) ->
-    left = pageIndices.slice(0, pageIndex)
-    right = pageIndices.slice(pageIndex)
-    result = []
-    while left.length or right.length
-      if right.length
-        result.push(right.shift())
-      if left.length
-        result.push(left.pop())
-    return result
-
-  if position?
-    return findPage(position.start)
-    .then(({index}) -> return sort(index))
-  else
-    return Promise.resolve(pageIndices)
-
-
-###*
-# Anchor a set of selectors.
-#
-# This function converts a set of selectors into a document range.
-# It encapsulates the core anchoring algorithm, using the selectors alone or
-# in combination to establish the best anchor within the document.
-#
-# :param Element root: The root element of the anchoring context.
-# :param Array selectors: The selectors to try.
-# :param Object options: Options to pass to the anchor implementations.
-# :return: A Promise that resolves to a Range on success.
-# :rtype: Promise
-####
-exports.anchor = (root, selectors, options = {}) ->
-  # Selectors
-  position = null
-  quote = null
-
-  # Collect all the selectors
-  for selector in selectors ? []
-    switch selector.type
-      when 'TextPositionSelector'
-        position = selector
-      when 'TextQuoteSelector'
-        quote = selector
-
-  # Until we successfully anchor, we fail.
-  promise = Promise.reject('unable to anchor')
-
-  # Assert the quote matches the stored quote, if applicable
-  assertQuote = (range) ->
-    if quote?.exact? and range.toString() != quote.exact
-      throw new Error('quote mismatch')
-    else
-      return range
-
-  if position?
-    promise = promise.catch ->
-      return findPage(position.start)
-      .then ({index, offset, textContent}) ->
-        page = getPage(index)
-        start = position.start - offset
-        end = position.end - offset
-        length = end - start
-        assertQuote(textContent.substr(start, length))
-        anchor = new TextPositionAnchor(root, start, end)
-        return anchorByPosition(page, anchor, options)
-
-  if quote?
-    promise = promise.catch ->
-      if position? and quotePositionCache[quote.exact]?[position.start]?
-        {page, anchor} = quotePositionCache[quote.exact][position.start]
-        return anchorByPosition(page, anchor, options)
-
-      return prioritizePages(position)
-      .then((pageIndices) -> findInPages(pageIndices, quote, position))
-
-  return promise
-
-
-###*
-# Convert a DOM Range object into a set of selectors.
-#
-# Converts a DOM `Range` object describing a start and end point within a
-# `root` `Element` and converts it to a `[position, quote]` tuple of selectors
-# which can be saved into an annotation and later passed to `anchor` to map
-# the selectors back to a `Range`.
-#
-# :param Element root: The root Element
-# :param Range range: DOM Range object
-# :param Object options: Options passed to `TextQuoteAnchor` and
-#                        `TextPositionAnchor`'s `toSelector` methods.
-###
-exports.describe = (root, range, options = {}) ->
-
-  range = new xpathRange.BrowserRange(range).normalize()
-
-  startTextLayer = getNodeTextLayer(range.start)
-  endTextLayer = getNodeTextLayer(range.end)
-
-  # XXX: range covers only one page
-  if startTextLayer isnt endTextLayer
-    throw new Error('selecting across page breaks is not supported')
-
-  startRange = range.limit(startTextLayer)
-  endRange = range.limit(endTextLayer)
-
-  startPageIndex = getSiblingIndex(startTextLayer.parentNode)
-  endPageIndex = getSiblingIndex(endTextLayer.parentNode)
-
-  iter = createNodeIterator.call(document, startTextLayer, NodeFilter.SHOW_TEXT)
-
-  start = seek(iter, range.start)
-  end = seek(iter, range.end) + start + range.end.textContent.length
-
-  return getPageOffset(startPageIndex).then (pageOffset) ->
-    # XXX: range covers only one page
-    start += pageOffset
-    end += pageOffset
-
-    position = new TextPositionAnchor(root, start, end).toSelector(options)
-
-    r = document.createRange()
-    r.setStartBefore(startRange.start)
-    r.setEndAfter(endRange.end)
-
-    quote = TextQuoteAnchor.fromRange(root, r, options).toSelector(options)
-
-    return Promise.all([position, quote])
-
-
-###*
-# Clear the internal caches of page text contents and quote locations.
-#
-# This exists mainly as a helper for use in tests.
-###
-exports.purgeCache = ->
-  pageTextCache = {}
-  quotePositionCache = {}
--- a/src/annotator/anchoring/pdf.js
+++ b/src/annotator/anchoring/pdf.js
+'use strict';
+
+/* global PDFViewerApplication */
+
+const seek = require('dom-seek');
+
+// `dom-node-iterator` polyfills optional arguments of `createNodeIterator`
+// and properties of the returned `NodeIterator` for IE 11 compatibility.
+const createNodeIterator = require('dom-node-iterator/polyfill')();
+
+const xpathRange = require('./range');
+const html = require('./html');
+const RenderingStates = require('../pdfjs-rendering-states');
+const { TextPositionAnchor, TextQuoteAnchor } = require('./types');
+
+// Caches for performance.
+
+/**
+ * Map of page index to page text content as a `Promise<string>`.
+ */
+let pageTextCache = {};
+
+/**
+ * 2D map from `[quote][position]` to `{page, anchor}` intended to optimize
+ * re-anchoring of a pair of quote and position selectors if the position
+ * selector fails to anchor on its own.
+ */
+let quotePositionCache = {};
+
+function getSiblingIndex(node) {
+  return Array.from(node.parentNode.childNodes).indexOf(node);
+}
+
+function getNodeTextLayer(node) {
+  while (!node.classList || !node.classList.contains('page')) {
+    node = node.parentNode;
+  }
+  return node.getElementsByClassName('textLayer')[0];
+}
+
+/**
+ * Returns the view into which a PDF page is drawn.
+ *
+ * @param {number} pageIndex
+ * @return {PDFPageView}
+ */
+function getPage(pageIndex) {
+  return PDFViewerApplication.pdfViewer.getPageView(pageIndex);
+}
+
+/**
+ * Return the text of a given PDF page.
+ *
+ * @param {number} pageIndex
+ * @return {Promise<string>}
+ */
+function getPageTextContent(pageIndex) {
+  if (pageTextCache[pageIndex]) {
+    return pageTextCache[pageIndex];
+  }
+
+  // Join together PDF.js `TextItem`s representing pieces of text in a PDF page.
+  const joinItems = ({ items }) => {
+    // Skip empty items since PDF.js leaves their text layer divs blank.
+    // Excluding them makes our measurements match the rendered text layer.
+    // Otherwise, the selectors we generate would not match this stored text.
+    // See the `appendText` method of `TextLayerBuilder` in PDF.js.
+    const nonEmpty = items
+      .filter(item => /\S/.test(item.str))
+      .map(item => item.str);
+    const textContent = nonEmpty.join('');
+    return textContent;
+  };
+
+  // FIXME - `pdfViewer.getPageTextContent` was removed in recent versions of PDF.js.
+  pageTextCache[pageIndex] = PDFViewerApplication.pdfViewer
+    .getPageTextContent(pageIndex)
+    .then(joinItems);
+
+  return pageTextCache[pageIndex];
+}
+
+/**
+ * Return the offset in the text for the whole document at which the text for
+ * `pageIndex` begins.
+ *
+ * @param {number} pageIndex
+ * @return {Promise<number>} - Character position at which page text starts
+ */
+function getPageOffset(pageIndex) {
+  let index = -1;
+
+  const next = offset => {
+    ++index;
+    if (index === pageIndex) {
+      return Promise.resolve(offset);
+    }
+
+    return getPageTextContent(index).then(textContent =>
+      next(offset + textContent.length)
+    );
+  };
+
+  return next(0);
+}
+
+/**
+ * Information about the page where a particular character position in the
+ * text of the document occurs.
+ *
+ * @typedef PageOffset
+ * @prop {number} index - Index of page containing offset
+ * @prop {number} offset -
+ *  Character position of the start of `textContent`
+ *  within the full text of the document
+ * @prop {string} textContent - Full text of page containing offset
+ */
+
+/**
+ * Find the index and text content of a page containing the character position
+ * `offset` within the complete text of the document.
+ *
+ * @param {number} offset
+ * @return {PageOffset}
+ */
+function findPage(offset) {
+  let index = 0;
+  let total = 0;
+
+  // We call `count` once for each page, in order. The passed offset is found on
+  // the first page where the cumulative length of the text content exceeds the
+  // offset value.
+  //
+  // When we find the page the offset is on, we return an object containing the
+  // page index, the offset at the start of that page, and the textContent of
+  // that page.
+  //
+  // To understand this a little better, here's a worked example. Imagine a
+  // document with the following page lengths:
+  //
+  //    Page 0 has length 100
+  //    Page 1 has length 50
+  //    Page 2 has length 50
+  //
+  // Then here are the pages that various offsets are found on:
+  //
+  //    offset | index
+  //    --------------
+  //    0      | 0
+  //    99     | 0
+  //    100    | 1
+  //    101    | 1
+  //    149    | 1
+  //    150    | 2
+  const count = textContent => {
+    const lastPageIndex = PDFViewerApplication.pdfViewer.pagesCount - 1;
+    if (total + textContent.length > offset || index === lastPageIndex) {
+      // Offset is in current page.
+      offset = total;
+      return Promise.resolve({ index, offset, textContent });
+    } else {
+      // Offset is within a subsequent page.
+      ++index;
+      total += textContent.length;
+      return getPageTextContent(index).then(count);
+    }
+  };
+
+  return getPageTextContent(0).then(count);
+}
+
+/**
+ * Locate the DOM Range which a position selector refers to.
+ *
+ * If the page is off-screen it may be in an unrendered state, in which case
+ * the text layer will not have been created. In that case a placeholder
+ * DOM element is created and the returned range refers to that placeholder.
+ * In that case, the selector will need to be re-anchored when the page is
+ * scrolled into view.
+ *
+ * @param {PDFPageView} page - The PDF.js viewer page
+ * @param {TextPositionAnchor} anchor - Anchor to locate in page
+ * @param {Object} options - Options for `anchor.toSelector`
+ * @return {Range}
+ */
+function anchorByPosition(page, anchor, options) {
+  let renderingDone = false;
+  if (page.textLayer) {
+    renderingDone = page.textLayer.renderingDone;
+  }
+  if (page.renderingState === RenderingStates.FINISHED && renderingDone) {
+    // The page has been rendered. Use HTML anchoring to locate the quote in
+    // the text layer.
+    const root = page.textLayer.textLayerDiv;
+    const selector = anchor.toSelector(options);
+    return html.anchor(root, [selector]);
+  }
+
+  // The page has not been rendered yet. Create a placeholder element and
+  // anchor to that instead.
+  const div = page.div || page.el;
+  let placeholder = div.getElementsByClassName('annotator-placeholder')[0];
+  if (!placeholder) {
+    placeholder = document.createElement('span');
+    placeholder.classList.add('annotator-placeholder');
+    placeholder.textContent = 'Loading annotations…';
+    div.appendChild(placeholder);
+  }
+  const range = document.createRange();
+  range.setStartBefore(placeholder);
+  range.setEndAfter(placeholder);
+  return range;
+}
+
+/**
+ * Search for a quote in the given pages.
+ *
+ * @param {number[]} pageIndexes - Pages to search in priority order
+ * @param {TextQuoteSelector} quoteSelector
+ * @param {Object} positionHint - Options to pass to `TextQuoteAnchor#toPositionAnchor`
+ * @return {Promise<Range>} Location of quote
+ */
+function findInPages(pageIndexes, quoteSelector, positionHint) {
+  if (pageIndexes.length === 0) {
+    // We reached the end of the document without finding a match for the quote.
+    return Promise.reject(new Error('Quote not found'));
+  }
+
+  const [pageIndex, ...rest] = pageIndexes;
+
+  const content = getPageTextContent(pageIndex);
+  const page = getPage(pageIndex);
+  const offset = getPageOffset(pageIndex);
+
+  const attempt = ([, content, offset]) => {
+    const root = { textContent: content };
+    const anchor = TextQuoteAnchor.fromSelector(root, quoteSelector);
+    if (positionHint) {
+      let hint = positionHint.start - offset;
+      hint = Math.max(0, hint);
+      hint = Math.min(hint, content.length);
+      return anchor.toPositionAnchor({ hint });
+    }
+    return anchor.toPositionAnchor();
+  };
+
+  const next = () => findInPages(rest, quoteSelector, positionHint);
+
+  const cacheAndFinish = anchor => {
+    if (positionHint) {
+      if (!quotePositionCache[quoteSelector.exact]) {
+        quotePositionCache[quoteSelector.exact] = {};
+      }
+      quotePositionCache[quoteSelector.exact][positionHint.start] = {
+        page,
+        anchor,
+      };
+    }
+    return anchorByPosition(page, anchor);
+  };
+
+  // First, get the text offset and other details of the current page.
+  return Promise.all([page, content, offset])
+    // Attempt to locate the quote in the current page.
+    .then(attempt)
+    // If the quote is located, find the DOM range and return it.
+    .then(cacheAndFinish)
+    // If the quote was not found, try the next page.
+    .catch(next);
+}
+
+/**
+ * Return a list of page indexes to search for a quote in priority order.
+ *
+ * When a position anchor is available, quote search can be optimized by
+ * searching pages nearest the expected position first.
+ *
+ * @param [TextPositionAnchor] position
+ * @return {number[]}
+ */
+function prioritizePages(position) {
+  const pageCount = PDFViewerApplication.pdfViewer.pagesCount;
+  const pageIndices = Array(pageCount)
+    .fill(0)
+    .map((_, i) => i);
+
+  if (!position) {
+    return Promise.resolve(pageIndices);
+  }
+
+  // Sort page indexes by offset from `pageIndex`.
+  function sortPages(pageIndex) {
+    const left = pageIndices.slice(0, pageIndex);
+    const right = pageIndices.slice(pageIndex);
+    const result = [];
+    while (left.length > 0 || right.length > 0) {
+      if (right.length) {
+        result.push(right.shift());
+      }
+      if (left.length) {
+        result.push(left.pop());
+      }
+    }
+    return result;
+  }
+
+  return findPage(position.start).then(({ index }) => sortPages(index));
+}
+
+/**
+ * Anchor a set of selectors to a DOM Range.
+ *
+ * @param {HTMLElement} root
+ * @param {Array} selectors - Selector objects to anchor
+ * @param {Object} options - Options to pass to selector anchoring
+ * @return {Promise<Range>}
+ */
+function anchor(root, selectors, options = {}) {
+  const position = selectors.find(s => s.type === 'TextPositionSelector');
+  const quote = selectors.find(s => s.type === 'TextQuoteSelector');
+
+  let result = Promise.reject('unable to anchor');
+
+  const checkQuote = range => {
+    if (quote && quote.exact !== range.toString()) {
+      throw new Error('quote mismatch');
+    }
+    return range;
+  };
+
+  if (position) {
+    result = result.catch(() => {
+      return findPage(position.start).then(({ index, offset, textContent }) => {
+        const page = getPage(index);
+        const start = position.start - offset;
+        const end = position.end - offset;
+        const length = end - start;
+
+        checkQuote(textContent.substr(start, length));
+
+        const anchor = new TextPositionAnchor(root, start, end);
+        return anchorByPosition(page, anchor, options);
+      });
+    });
+  }
+
+  if (quote) {
+    result = result.catch(() => {
+      if (
+        position &&
+        quotePositionCache[quote.exact] &&
+        quotePositionCache[quote.exact][position.start]
+      ) {
+        const { page, anchor } = quotePositionCache[quote.exact][
+          position.start
+        ];
+        return anchorByPosition(page, anchor, options);
+      }
+
+      return prioritizePages(position).then(pageIndices => {
+        return findInPages(pageIndices, quote, position);
+      });
+    });
+  }
+
+  return result;
+}
+
+/**
+ * Convert a DOM Range object into a set of selectors.
+ *
+ * Converts a DOM `Range` object into a `[position, quote]` tuple of selectors
+ * which can be saved with an annotation and later passed to `anchor` to
+ * convert the selectors back to a `Range`.
+ *
+ * @param {HTMLElement} root - The root element
+ * @param {Range} range
+ * @param {Object} options -
+ *   Options passed to `TextQuoteAnchor` and `TextPositionAnchor`'s
+ *   `toSelector` methods.
+ * @return {Promise<[TextPositionSelector, TextQuoteSelector]>}
+ */
+function describe(root, range, options = {}) {
+  const normalizedRange = new xpathRange.BrowserRange(range).normalize();
+
+  const startTextLayer = getNodeTextLayer(normalizedRange.start);
+  const endTextLayer = getNodeTextLayer(normalizedRange.end);
+
+  if (startTextLayer !== endTextLayer) {
+    return Promise.reject(
+      new Error('selecting across page breaks is not supported')
+    );
+  }
+
+  const startRange = normalizedRange.limit(startTextLayer);
+  const endRange = normalizedRange.limit(endTextLayer);
+
+  const startPageIndex = getSiblingIndex(startTextLayer.parentNode);
+
+  const iter = createNodeIterator.call(
+    document,
+    startTextLayer,
+    NodeFilter.SHOW_TEXT
+  );
+  let startPos = seek(iter, normalizedRange.start);
+  let endPos =
+    seek(iter, normalizedRange.end) +
+    startPos +
+    normalizedRange.end.textContent.length;
+
+  return getPageOffset(startPageIndex).then(pageOffset => {
+    startPos += pageOffset;
+    endPos += pageOffset;
+
+    const position = new TextPositionAnchor(root, startPos, endPos).toSelector(
+      options
+    );
+
+    const quoteRange = document.createRange();
+    quoteRange.setStartBefore(startRange.start);
+    quoteRange.setEndAfter(endRange.end);
+
+    const quote = TextQuoteAnchor.fromRange(
+      root,
+      quoteRange,
+      options
+    ).toSelector(options);
+
+    return Promise.all([position, quote]);
+  });
+}
+
+/**
+ * Clear this module's internal caches.
+ *
+ * This exists mainly as a helper for use in tests.
+ */
+function purgeCache() {
+  pageTextCache = {};
+  quotePositionCache = {};
+}
+
+module.exports = {
+  anchor,
+  describe,
+  purgeCache,
+};
--- a/src/annotator/anchoring/test/fake-pdf-viewer-application.js
+++ b/src/annotator/anchoring/test/fake-pdf-viewer-application.js
@@ -108,16 +108,18 @@ function createPage(content, rendered) {
 /**
 * Set the index of the page which is currently visible in the viewport.
 *
- * The page which is visible will be "rendered" and have a text layer available.
- * For other pages, there will only be a placeholder element for the whole page.
+ * Pages from `index` up to and including `lastRenderedPage` will be
+ * "rendered" and have a text layer available. Other pages will be "un-rendered"
+ * with no text layer available, but only a placeholder element for the whole
+ * page.
 */
-FakePDFViewerApplication.prototype.setCurrentPage = function (index) {
+FakePDFViewerApplication.prototype.setCurrentPage = function (index, lastRenderedPage=index) {
  const self = this;

  this._checkBounds(index);

  const pages = this._content.map(function (text, idx) {
-    return createPage(text, idx === index /* rendered */);
+    return createPage(text, idx >= index && idx <= lastRenderedPage);
  });

  this._container.innerHTML = '';

--- a/src/annotator/anchoring/test/pdf-test.js
+++ b/src/annotator/anchoring/test/pdf-test.js
@@ -134,6 +134,15 @@ describe('annotator.anchoring.pdf', function () {
        assert.equal(position.end, expectedPos + quote.length);
      });
    });
+
+    it('rejects when text selection spans multiple pages', () => {
+      viewer.setCurrentPage(2, 3);
+      const range = findText(container, 'occupied again? NODE A');
+
+      return pdfAnchoring.describe(container, range).catch(err => {
+        assert.equal(err.message, 'selecting across page breaks is not supported');
+      });
+    });
  });

  describe('#anchor', function () {
@@ -203,6 +212,41 @@ describe('annotator.anchoring.pdf', function () {
        assert.equal(anchoredRange.toString(), 'Loading annotations…');
      });
    });
+
+    it('rejects if quote cannot be anchored', () => {
+      viewer.setCurrentPage(2);
+      const selectors = [{
+        type: 'TextQuoteSelector',
+        exact: 'phrase that does not exist in the PDF',
+      }];
+      return pdfAnchoring.anchor(container, selectors)
+        .catch(err => {
+          assert.equal(err.message, 'Quote not found');
+        });
+    });
+
+    it('re-anchors successfully using caches', () => {
+      viewer.setCurrentPage(2);
+      const range = findText(container, 'said his lady');
+      let selectors;
+      return pdfAnchoring.describe(container, range).then(selectors_ => {
+        selectors = selectors_;
+
+        // Adjust the position selector so that anchoring fails, and a fallback
+        // to the quote selector is required.
+        const position = selectors.find(s => s.type === 'TextPositionSelector');
+        position.start += 100;
+        position.end += 100;
+
+        return pdfAnchoring.anchor(container, selectors);
+      }).then(() => {
+        // Anchor again using the same selectors. This time anchoring will
+        // use the existing cache.
+        return pdfAnchoring.anchor(container, selectors);
+      }).then(range => {
+        assert.equal(range.toString(), 'said his lady');
+      });
+    });
  });

 });
--- a/src/shared/polyfills.js
+++ b/src/shared/polyfills.js
@@ -5,6 +5,7 @@ require('core-js/es6/promise');
 require('core-js/es6/map');
 require('core-js/es6/set');
 require('core-js/es6/symbol');
+require('core-js/fn/array/fill');
 require('core-js/fn/array/find');
 require('core-js/fn/array/find-index');
 require('core-js/fn/array/from');