Commit 11ac17b2 authored by Robert Knight's avatar Robert Knight

Re-introduce warning for page text / text layer mismatch

This warning is useful to detect situations where the page text and text layer
differ beyond just whitespace differences, which will result in
misalignment of highlights.
parent 0ec6612c
/* global PDFViewerApplication */ /* global PDFViewerApplication */
import { warnOnce } from '../../shared/warn-once';
import { translateOffsets } from '../util/normalize'; import { translateOffsets } from '../util/normalize';
import { matchQuote } from './match-quote'; import { matchQuote } from './match-quote';
import { createPlaceholder } from './placeholder'; import { createPlaceholder } from './placeholder';
...@@ -324,6 +325,16 @@ async function anchorByPosition(pageIndex, start, end) { ...@@ -324,6 +325,16 @@ async function anchorByPosition(pageIndex, start, end) {
isNotSpace isNotSpace
); );
const textLayerQuote = stripSpaces(
textLayerStr.slice(textLayerStart, textLayerEnd)
);
const pageTextQuote = stripSpaces(pageText.slice(start, end));
if (textLayerQuote !== pageTextQuote) {
warnOnce(
'Text layer text does not match page text. Highlights will be mis-aligned.'
);
}
const startPos = new TextPosition(root, textLayerStart); const startPos = new TextPosition(root, textLayerStart);
const endPos = new TextPosition(root, textLayerEnd); const endPos = new TextPosition(root, textLayerEnd);
return new TextRange(startPos, endPos).toRange(); return new TextRange(startPos, endPos).toRange();
......
...@@ -531,6 +531,11 @@ describe('annotator/anchoring/pdf', () => { ...@@ -531,6 +531,11 @@ describe('annotator/anchoring/pdf', () => {
}); });
it('anchors with mismatch if text layer differs from PDF.js text API output', async () => { it('anchors with mismatch if text layer differs from PDF.js text API output', async () => {
const warnOnce = sinon.stub();
pdfAnchoring.$imports.$mock({
'../../shared/warn-once': { warnOnce },
});
viewer.pdfViewer.setCurrentPage(1); viewer.pdfViewer.setCurrentPage(1);
const selection = 'zombie in possession'; const selection = 'zombie in possession';
const range = findText(container, selection); const range = findText(container, selection);
...@@ -548,6 +553,10 @@ describe('annotator/anchoring/pdf', () => { ...@@ -548,6 +553,10 @@ describe('annotator/anchoring/pdf', () => {
const anchoredRange = await pdfAnchoring.anchor(container, [quote]); const anchoredRange = await pdfAnchoring.anchor(container, [quote]);
assert.equal(anchoredRange.toString(), 'zomby in possession o'); assert.equal(anchoredRange.toString(), 'zomby in possession o');
assert.calledWith(
warnOnce,
'Text layer text does not match page text. Highlights will be mis-aligned.'
);
}); });
it('anchors to a placeholder element if the page is not rendered', () => { it('anchors to a placeholder element if the page is not rendered', () => {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment