Commit 47706128 authored by Robert Knight's avatar Robert Knight

Fail anchoring in PDFs if there is no quote selector

Changes in text rendering across PDF.js versions can render position
selectors invalid. Therefore any anchoring done with position selectors must
be checked against the quote, as we do with HTML annotations.

This commit disallows anchoring using only position selectors in PDFs
and restructures `anchor` control flow using async/await to make it
easier to follow.

We have been capturing quote selectors with PDF annotations forever, so
there should be no impact on old annotations.
parent 1651c1cc
......@@ -446,57 +446,71 @@ function prioritizePages(position) {
/**
* Anchor a set of selectors to a DOM Range.
*
* `selectors` must include a `TextQuoteSelector` and may include other selector
* types.
*
* @param {HTMLElement} root
* @param {Selector[]} selectors - Selector objects to anchor
* @param {Selector[]} selectors
* @return {Promise<Range>}
*/
export function anchor(root, selectors) {
const position = /** @type {TextPositionSelector|undefined} */ (
selectors.find(s => s.type === 'TextPositionSelector')
);
export async function anchor(root, selectors) {
const quote = /** @type {TextQuoteSelector|undefined} */ (
selectors.find(s => s.type === 'TextQuoteSelector')
);
/** @type {Promise<Range>} */
let result = Promise.reject('unable to anchor');
// The quote selector is required in order to check that text position
// selector results are still valid.
if (!quote) {
throw new Error('No quote selector found');
}
const position = /** @type {TextPositionSelector|undefined} */ (
selectors.find(s => s.type === 'TextPositionSelector')
);
if (position) {
result = result.catch(() => {
return findPage(position.start).then(({ index, offset, textContent }) => {
const start = position.start - offset;
const end = position.end - offset;
const length = end - start;
const matchedText = textContent.substr(start, length);
if (quote && quote.exact !== matchedText) {
throw new Error('quote mismatch');
}
// If we have a position selector, try using that first as it is the fastest
// anchoring method.
try {
const { index, offset, textContent } = await findPage(position.start);
const start = position.start - offset;
const end = position.end - offset;
const length = end - start;
const matchedText = textContent.substr(start, length);
if (quote.exact !== matchedText) {
throw new Error('quote mismatch');
}
return anchorByPosition(index, start, end);
});
});
}
const range = await anchorByPosition(index, start, end);
return range;
} catch {
// Fall back to quote selector
}
if (quote) {
result = result.catch(() => {
// If anchoring with the position failed, check for a cached quote-based
// match using the quote + position as a cache key.
try {
if (
position &&
quotePositionCache[quote.exact] &&
quotePositionCache[quote.exact][position.start]
) {
const { pageIndex, anchor } =
quotePositionCache[quote.exact][position.start];
return anchorByPosition(pageIndex, anchor.start, anchor.end);
const range = await anchorByPosition(
pageIndex,
anchor.start,
anchor.end
);
return range;
}
return prioritizePages(position?.start ?? 0).then(pageIndices => {
return findInPages(pageIndices, quote, position);
});
});
} catch {
// Fall back to uncached quote selector match
}
}
return result;
const pageIndices = await prioritizePages(position?.start ?? 0);
return findInPages(pageIndices, quote, position);
}
/**
......
......@@ -245,7 +245,7 @@ describe('annotator/anchoring/pdf', () => {
// Test that all of the selectors anchor and that each selector individually
// anchors correctly as well
const subsets = [[position, quote], [position], [quote]];
const subsets = [[position, quote], [quote]];
const subsetsAnchored = subsets.map(subset => {
const types = subset.map(s => {
return s.type;
......@@ -270,6 +270,21 @@ describe('annotator/anchoring/pdf', () => {
});
});
[[], [{ type: 'TextPositionSelector', start: 0, end: 200 }]].forEach(
selectors => {
it('fails to anchor if there is no quote selector', async () => {
let error;
try {
await pdfAnchoring.anchor(container, selectors);
} catch (err) {
error = err;
}
assert.instanceOf(error, Error);
assert.equal(error.message, 'No quote selector found');
});
}
);
it('anchors text in older PDF.js versions', async () => {
initViewer(fixtures.pdfPages, { newTextRendering: false });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment