Commit 5b836627 authored by Sean Hammond's avatar Sean Hammond Committed by GitHub

Merge pull request #262 from hypothesis/fix-pdf-quote-anchoring-2

Fix PDF quote anchoring (Part II)
parents cffcd255 2381e2c9
......@@ -8,8 +8,11 @@ RenderingStates = require('../pdfjs-rendering-states')
# Caches for performance
# Map of page index to Promise<string>
# Map of page index to page text content as a `Promise<string>`
pageTextCache = {}
# Two-dimensional map from `[quote][position]` to `{page, anchor}` intended to
# optimize re-anchoring of a pair of quote and position selectors if the
# position selector fails to anchor on its own.
quotePositionCache = {}
......@@ -132,7 +135,7 @@ anchorByPosition = (page, anchor, options) ->
# Returns a `Promise<Range>` for the location of the quote.
findInPages = ([pageIndex, rest...], quote, position) ->
unless pageIndex?
return Promise.reject('quote not found')
return Promise.reject(new Error('Quote not found'))
attempt = (info) ->
# Try to find the quote in the current page.
......@@ -151,8 +154,9 @@ findInPages = ([pageIndex, rest...], quote, position) ->
return findInPages(rest, quote, position)
cacheAndFinish = (anchor) ->
quotePositionCache[quote.exact] ?= {}
quotePositionCache[quote.exact][position.start] = {page, anchor}
if position
quotePositionCache[quote.exact] ?= {}
quotePositionCache[quote.exact][position.start] = {page, anchor}
return anchorByPosition(page, anchor)
page = getPage(pageIndex)
......@@ -160,8 +164,9 @@ findInPages = ([pageIndex, rest...], quote, position) ->
offset = getPageOffset(pageIndex)
return Promise.all([page, content, offset])
.then(attempt, next)
.then(attempt)
.then(cacheAndFinish)
.catch(next)
# When a position anchor is available, quote search can prioritize pages by
......
......@@ -18,7 +18,7 @@ function findText(container, text) {
var fixtures = {
// Each item in this list contains the text for one page of the "PDF"
pdfContent: [
pdfPages: [
'Pride And Prejudice And Zombies\n' +
'By Jane Austin and Seth Grahame-Smith ',
......@@ -49,7 +49,7 @@ describe('PDF anchoring', function () {
window.PDFViewerApplication = viewer = new FakePDFViewerApplication({
container: container,
content: fixtures.pdfContent,
content: fixtures.pdfPages,
});
viewer.setCurrentPage(0);
});
......@@ -74,7 +74,7 @@ describe('PDF anchoring', function () {
viewer.setCurrentPage(2);
var quote = 'Netherfield Park';
var range = findText(container, quote);
var contentStr = fixtures.pdfContent.join('');
var contentStr = fixtures.pdfPages.join('');
var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote);
return pdfAnchoring.describe(container, range).then(function (selectors) {
......@@ -125,7 +125,7 @@ describe('PDF anchoring', function () {
commonAncestorContainer: staticRange.commonAncestorContainer,
};
var contentStr = fixtures.pdfContent.join('');
var contentStr = fixtures.pdfPages.join('');
var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote);
return pdfAnchoring.describe(container, range).then(function (selectors) {
......@@ -139,7 +139,7 @@ describe('PDF anchoring', function () {
describe('#anchor', function () {
it('anchors previously created selectors if the page is rendered', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
var range = findText(container, 'My dear Mr. Bennet');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0];
var quote = selectors[1];
......@@ -149,8 +149,7 @@ describe('PDF anchoring', function () {
var subsets = [
[position, quote],
[position],
// FIXME - Anchoring a quote on its own does not currently work
// [quote],
[quote],
];
var subsetsAnchored = subsets.map(function (subset) {
var types = subset.map(function (s) { return s.type; });
......@@ -185,6 +184,24 @@ describe('PDF anchoring', function () {
});
});
it('anchors using a quote if the position selector refers to the wrong page', function () {
viewer.setCurrentPage(0);
var range = findText(container, 'Pride And Prejudice');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0];
var quote = selectors[1];
// Manipulate the position selector so that it refers to a location
// a long way away, on a different page, than the quote.
position.start += fixtures.pdfPages[0].length + 10;
position.end += fixtures.pdfPages[0].length + 10;
return pdfAnchoring.anchor(container, [position, quote]);
}).then(function (range) {
assert.equal(range.toString(), 'Pride And Prejudice');
});
});
it('anchors to a placeholder element if the page is not rendered', function () {
viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment