Commit 11ac17b2 authored by Robert Knight's avatar Robert Knight

Re-introduce warning for page text / text layer mismatch

This warning is useful to detect situations where the page text and text layer
differ beyond just whitespace differences, which will result in
misalignment of highlights.
parent 0ec6612c
/* global PDFViewerApplication */
import { warnOnce } from '../../shared/warn-once';
import { translateOffsets } from '../util/normalize';
import { matchQuote } from './match-quote';
import { createPlaceholder } from './placeholder';
......@@ -324,6 +325,16 @@ async function anchorByPosition(pageIndex, start, end) {
isNotSpace
);
const textLayerQuote = stripSpaces(
textLayerStr.slice(textLayerStart, textLayerEnd)
);
const pageTextQuote = stripSpaces(pageText.slice(start, end));
if (textLayerQuote !== pageTextQuote) {
warnOnce(
'Text layer text does not match page text. Highlights will be mis-aligned.'
);
}
const startPos = new TextPosition(root, textLayerStart);
const endPos = new TextPosition(root, textLayerEnd);
return new TextRange(startPos, endPos).toRange();
......
......@@ -531,6 +531,11 @@ describe('annotator/anchoring/pdf', () => {
});
it('anchors with mismatch if text layer differs from PDF.js text API output', async () => {
const warnOnce = sinon.stub();
pdfAnchoring.$imports.$mock({
'../../shared/warn-once': { warnOnce },
});
viewer.pdfViewer.setCurrentPage(1);
const selection = 'zombie in possession';
const range = findText(container, selection);
......@@ -548,6 +553,10 @@ describe('annotator/anchoring/pdf', () => {
const anchoredRange = await pdfAnchoring.anchor(container, [quote]);
assert.equal(anchoredRange.toString(), 'zomby in possession o');
assert.calledWith(
warnOnce,
'Text layer text does not match page text. Highlights will be mis-aligned.'
);
});
it('anchors to a placeholder element if the page is not rendered', () => {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment