Unverified Commit 1c80ed66 authored by Robert Knight's avatar Robert Knight Committed by GitHub

Merge pull request #1449 from hypothesis/fix-pdf-position-anchoring-bug

Fix PDF anchoring when annotation refers to last text on a page
parents 7cf89d61 8c45ac32
......@@ -9,9 +9,9 @@ const seek = require('dom-seek');
const createNodeIterator = require('dom-node-iterator/polyfill')();
const xpathRange = require('./range');
const html = require('./html');
const RenderingStates = require('../pdfjs-rendering-states');
const { TextPositionAnchor, TextQuoteAnchor } = require('./types');
const { toRange: textPositionToRange } = require('./text-position');
// Caches for performance.
......@@ -210,10 +210,9 @@ function findPage(offset) {
*
* @param {number} pageIndex - The PDF page index
* @param {TextPositionAnchor} anchor - Anchor to locate in page
* @param {Object} options - Options for `anchor.toSelector`
* @return {Promise<Range>}
*/
async function anchorByPosition(pageIndex, anchor, options) {
async function anchorByPosition(pageIndex, anchor) {
const page = await getPageView(pageIndex);
let renderingDone = false;
......@@ -221,11 +220,9 @@ async function anchorByPosition(pageIndex, anchor, options) {
renderingDone = page.textLayer.renderingDone;
}
if (page.renderingState === RenderingStates.FINISHED && renderingDone) {
// The page has been rendered. Use HTML anchoring to locate the quote in
// the text layer.
// The page has been rendered. Locate the position in the text layer.
const root = page.textLayer.textLayerDiv;
const selector = anchor.toSelector(options);
return html.anchor(root, [selector]);
return textPositionToRange(root, anchor.start, anchor.end);
}
// The page has not been rendered yet. Create a placeholder element and
......@@ -345,10 +342,9 @@ function prioritizePages(position) {
*
* @param {HTMLElement} root
* @param {Array} selectors - Selector objects to anchor
* @param {Object} options - Options to pass to selector anchoring
* @return {Promise<Range>}
*/
function anchor(root, selectors, options = {}) {
function anchor(root, selectors) {
const position = selectors.find(s => s.type === 'TextPositionSelector');
const quote = selectors.find(s => s.type === 'TextQuoteSelector');
......@@ -371,7 +367,7 @@ function anchor(root, selectors, options = {}) {
checkQuote(textContent.substr(start, length));
const anchor = new TextPositionAnchor(root, start, end);
return anchorByPosition(index, anchor, options);
return anchorByPosition(index, anchor);
});
});
}
......@@ -386,7 +382,7 @@ function anchor(root, selectors, options = {}) {
const { pageIndex, anchor } = quotePositionCache[quote.exact][
position.start
];
return anchorByPosition(pageIndex, anchor, options);
return anchorByPosition(pageIndex, anchor);
}
return prioritizePages(position).then(pageIndices => {
......
......@@ -188,6 +188,19 @@ describe('annotator/anchoring/pdf', function() {
});
});
// See https://github.com/hypothesis/client/issues/1329
it('anchors selectors that match the last text on the page', async () => {
viewer.pdfViewer.setCurrentPage(1);
const selectors = [
{
type: 'TextQuoteSelector',
exact: 'horde of the living dead.',
},
];
const anchoredRange = await pdfAnchoring.anchor(container, selectors);
assert.equal(anchoredRange.toString(), selectors[0].exact);
});
[
{
// Position on same page as quote but different text.
......
'use strict';
const { toRange } = require('../text-position');
describe('text-position', () => {
let container;
before(() => {
container = document.createElement('div');
container.innerHTML = `<h1>Test article</h1>
<p>First paragraph.</p>
<p>Second paragraph.</p>`;
});
after(() => {
container.remove();
});
describe('toRange', () => {
const testCase = (description, text) => ({
description,
text,
expected: text,
});
[
testCase('start text of root', 'Test article'),
testCase('a whole text node', 'First paragraph.'),
testCase('end text of root', 'Second paragraph.'),
testCase('part of a text node', 'rst paragraph'),
{
description: 'negative start offset',
start: -5,
end: 5,
expected: new Error('invalid start offset'),
},
{
description: 'invalid start offset',
start: 1000,
end: 1010,
expected: new Error('invalid start offset'),
},
{
description: 'invalid end offset',
start: 0,
end: 1000,
expected: new Error('invalid end offset'),
},
{
description: 'an empty range',
start: 0,
end: 0,
expected: '',
},
{
description: 'a range with end < start',
start: 10,
end: 5,
expected: '',
},
].forEach(({ description, start, end, expected, text }) => {
it(`returns a range with the correct text (${description})`, () => {
if (text) {
start = container.textContent.indexOf(text);
end = start + text.length;
}
if (expected instanceof Error) {
assert.throws(() => {
toRange(container, start, end);
}, expected.message);
} else {
const range = toRange(container, start, end);
assert.equal(range.toString(), expected);
}
});
});
});
});
'use strict';
/**
* Functions to convert between DOM ranges and characters offsets within the
* `textContent` of HTML elements.
*
* These were added to work around issues in `dom-anchor-text-position`'s
* `toRange` implementation. When the issue is resolved upstream, we may still
* want to keep the test suite for this module.
*
* See https://github.com/hypothesis/client/issues/1329
*/
/**
* Convert `start` and `end` character offset positions within the `textContent`
* of a `root` element into a `Range`.
*
* Throws if the `start` or `end` offsets are outside of the range `[0,
* root.textContent.length]`.
*
* @param {HTMLElement} root
* @param {number} start - Character offset within `root.textContent`
* @param {number} end - Character offset within `root.textContent`
* @return {Range} Range spanning text from `start` to `end`
*/
function toRange(root, start, end) {
// The `filter` and `expandEntityReferences` arguments are mandatory in IE
// although optional according to the spec.
const nodeIter = root.ownerDocument.createNodeIterator(
root,
NodeFilter.SHOW_TEXT,
null, // filter
false // expandEntityReferences
);
let startContainer;
let startOffset;
let endContainer;
let endOffset;
let textLength = 0;
let node;
while ((node = nodeIter.nextNode()) && (!startContainer || !endContainer)) {
const nodeText = node.nodeValue;
if (
!startContainer &&
start >= textLength &&
start <= textLength + nodeText.length
) {
startContainer = node;
startOffset = start - textLength;
}
if (
!endContainer &&
end >= textLength &&
end <= textLength + nodeText.length
) {
endContainer = node;
endOffset = end - textLength;
}
textLength += nodeText.length;
}
if (!startContainer) {
throw new Error('invalid start offset');
}
if (!endContainer) {
throw new Error('invalid end offset');
}
const range = root.ownerDocument.createRange();
range.setStart(startContainer, startOffset);
range.setEnd(endContainer, endOffset);
return range;
}
module.exports = {
toRange,
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment