Commit 5b836627 authored by Sean Hammond's avatar Sean Hammond Committed by GitHub

Merge pull request #262 from hypothesis/fix-pdf-quote-anchoring-2

Fix PDF quote anchoring (Part II)
parents cffcd255 2381e2c9
...@@ -8,8 +8,11 @@ RenderingStates = require('../pdfjs-rendering-states') ...@@ -8,8 +8,11 @@ RenderingStates = require('../pdfjs-rendering-states')
# Caches for performance # Caches for performance
# Map of page index to Promise<string> # Map of page index to page text content as a `Promise<string>`
pageTextCache = {} pageTextCache = {}
# Two-dimensional map from `[quote][position]` to `{page, anchor}` intended to
# optimize re-anchoring of a pair of quote and position selectors if the
# position selector fails to anchor on its own.
quotePositionCache = {} quotePositionCache = {}
...@@ -132,7 +135,7 @@ anchorByPosition = (page, anchor, options) -> ...@@ -132,7 +135,7 @@ anchorByPosition = (page, anchor, options) ->
# Returns a `Promise<Range>` for the location of the quote. # Returns a `Promise<Range>` for the location of the quote.
findInPages = ([pageIndex, rest...], quote, position) -> findInPages = ([pageIndex, rest...], quote, position) ->
unless pageIndex? unless pageIndex?
return Promise.reject('quote not found') return Promise.reject(new Error('Quote not found'))
attempt = (info) -> attempt = (info) ->
# Try to find the quote in the current page. # Try to find the quote in the current page.
...@@ -151,8 +154,9 @@ findInPages = ([pageIndex, rest...], quote, position) -> ...@@ -151,8 +154,9 @@ findInPages = ([pageIndex, rest...], quote, position) ->
return findInPages(rest, quote, position) return findInPages(rest, quote, position)
cacheAndFinish = (anchor) -> cacheAndFinish = (anchor) ->
quotePositionCache[quote.exact] ?= {} if position
quotePositionCache[quote.exact][position.start] = {page, anchor} quotePositionCache[quote.exact] ?= {}
quotePositionCache[quote.exact][position.start] = {page, anchor}
return anchorByPosition(page, anchor) return anchorByPosition(page, anchor)
page = getPage(pageIndex) page = getPage(pageIndex)
...@@ -160,8 +164,9 @@ findInPages = ([pageIndex, rest...], quote, position) -> ...@@ -160,8 +164,9 @@ findInPages = ([pageIndex, rest...], quote, position) ->
offset = getPageOffset(pageIndex) offset = getPageOffset(pageIndex)
return Promise.all([page, content, offset]) return Promise.all([page, content, offset])
.then(attempt, next) .then(attempt)
.then(cacheAndFinish) .then(cacheAndFinish)
.catch(next)
# When a position anchor is available, quote search can prioritize pages by # When a position anchor is available, quote search can prioritize pages by
......
...@@ -18,7 +18,7 @@ function findText(container, text) { ...@@ -18,7 +18,7 @@ function findText(container, text) {
var fixtures = { var fixtures = {
// Each item in this list contains the text for one page of the "PDF" // Each item in this list contains the text for one page of the "PDF"
pdfContent: [ pdfPages: [
'Pride And Prejudice And Zombies\n' + 'Pride And Prejudice And Zombies\n' +
'By Jane Austin and Seth Grahame-Smith ', 'By Jane Austin and Seth Grahame-Smith ',
...@@ -49,7 +49,7 @@ describe('PDF anchoring', function () { ...@@ -49,7 +49,7 @@ describe('PDF anchoring', function () {
window.PDFViewerApplication = viewer = new FakePDFViewerApplication({ window.PDFViewerApplication = viewer = new FakePDFViewerApplication({
container: container, container: container,
content: fixtures.pdfContent, content: fixtures.pdfPages,
}); });
viewer.setCurrentPage(0); viewer.setCurrentPage(0);
}); });
...@@ -74,7 +74,7 @@ describe('PDF anchoring', function () { ...@@ -74,7 +74,7 @@ describe('PDF anchoring', function () {
viewer.setCurrentPage(2); viewer.setCurrentPage(2);
var quote = 'Netherfield Park'; var quote = 'Netherfield Park';
var range = findText(container, quote); var range = findText(container, quote);
var contentStr = fixtures.pdfContent.join(''); var contentStr = fixtures.pdfPages.join('');
var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote); var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote);
return pdfAnchoring.describe(container, range).then(function (selectors) { return pdfAnchoring.describe(container, range).then(function (selectors) {
...@@ -125,7 +125,7 @@ describe('PDF anchoring', function () { ...@@ -125,7 +125,7 @@ describe('PDF anchoring', function () {
commonAncestorContainer: staticRange.commonAncestorContainer, commonAncestorContainer: staticRange.commonAncestorContainer,
}; };
var contentStr = fixtures.pdfContent.join(''); var contentStr = fixtures.pdfPages.join('');
var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote); var expectedPos = contentStr.replace(/\n/g,'').lastIndexOf(quote);
return pdfAnchoring.describe(container, range).then(function (selectors) { return pdfAnchoring.describe(container, range).then(function (selectors) {
...@@ -139,7 +139,7 @@ describe('PDF anchoring', function () { ...@@ -139,7 +139,7 @@ describe('PDF anchoring', function () {
describe('#anchor', function () { describe('#anchor', function () {
it('anchors previously created selectors if the page is rendered', function () { it('anchors previously created selectors if the page is rendered', function () {
viewer.setCurrentPage(2); viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park'); var range = findText(container, 'My dear Mr. Bennet');
return pdfAnchoring.describe(container, range).then(function (selectors) { return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0]; var position = selectors[0];
var quote = selectors[1]; var quote = selectors[1];
...@@ -149,8 +149,7 @@ describe('PDF anchoring', function () { ...@@ -149,8 +149,7 @@ describe('PDF anchoring', function () {
var subsets = [ var subsets = [
[position, quote], [position, quote],
[position], [position],
// FIXME - Anchoring a quote on its own does not currently work [quote],
// [quote],
]; ];
var subsetsAnchored = subsets.map(function (subset) { var subsetsAnchored = subsets.map(function (subset) {
var types = subset.map(function (s) { return s.type; }); var types = subset.map(function (s) { return s.type; });
...@@ -185,6 +184,24 @@ describe('PDF anchoring', function () { ...@@ -185,6 +184,24 @@ describe('PDF anchoring', function () {
}); });
}); });
it('anchors using a quote if the position selector refers to the wrong page', function () {
viewer.setCurrentPage(0);
var range = findText(container, 'Pride And Prejudice');
return pdfAnchoring.describe(container, range).then(function (selectors) {
var position = selectors[0];
var quote = selectors[1];
// Manipulate the position selector so that it refers to a location
// a long way away, on a different page, than the quote.
position.start += fixtures.pdfPages[0].length + 10;
position.end += fixtures.pdfPages[0].length + 10;
return pdfAnchoring.anchor(container, [position, quote]);
}).then(function (range) {
assert.equal(range.toString(), 'Pride And Prejudice');
});
});
it('anchors to a placeholder element if the page is not rendered', function () { it('anchors to a placeholder element if the page is not rendered', function () {
viewer.setCurrentPage(2); viewer.setCurrentPage(2);
var range = findText(container, 'Netherfield Park'); var range = findText(container, 'Netherfield Park');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment