Commit d94df628 authored by Robert Knight's avatar Robert Knight

Show warning banner above PDF if there is no selectable text

 - Add `documentHasText` helper in `anchoring/pdf.js` which checks if
   the document appears to have selectable text

 - Add logic in `plugin/pdf.js` which uses the `documentHasText` helper
   to check whether the PDF has selectable text and display a banner
   above the PDF viewer if not

Fixes https://github.com/hypothesis/product-backlog/issues/1081
parent 901b4eb8
...@@ -96,6 +96,22 @@ async function getPageView(pageIndex) { ...@@ -96,6 +96,22 @@ async function getPageView(pageIndex) {
return /** @type {PDFPageView} */ (pageView); return /** @type {PDFPageView} */ (pageView);
} }
/**
* Return true if the document has selectable text.
*/
export async function documentHasText() {
const viewer = getPdfViewer();
let hasText = false;
for (let i = 0; i < viewer.pagesCount; i++) {
const pageText = await getPageTextContent(i);
if (pageText.trim().length > 0) {
hasText = true;
break;
}
}
return hasText;
}
/** /**
* Return the text of a given PDF page. * Return the text of a given PDF page.
* *
......
...@@ -41,24 +41,44 @@ describe('annotator/anchoring/pdf', function () { ...@@ -41,24 +41,44 @@ describe('annotator/anchoring/pdf', function () {
let container; let container;
let viewer; let viewer;
beforeEach(function () { /**
* Initialize the fake PDF.js viewer.
*
* @param {string[]} content -
* Array containing the text content of each page of the loaded PDF document
*/
function initViewer(content) {
cleanupViewer();
// The rendered text for each page is cached during anchoring. // The rendered text for each page is cached during anchoring.
// Clear this here so that each test starts from the same state. // Clear this here so that each test starts from the same state.
pdfAnchoring.purgeCache(); pdfAnchoring.purgeCache();
container = document.createElement('div'); viewer = new FakePDFViewerApplication({
document.body.appendChild(container); container,
content,
window.PDFViewerApplication = viewer = new FakePDFViewerApplication({
container: container,
content: fixtures.pdfPages,
}); });
window.PDFViewerApplication = viewer;
if (viewer.pdfViewer.pagesCount > 0) {
viewer.pdfViewer.setCurrentPage(0); viewer.pdfViewer.setCurrentPage(0);
}
}
/** Clean up any resources created by the fake PDF.js viewer. */
function cleanupViewer() {
viewer?.dispose();
window.PDFViewerApplication = null;
}
beforeEach(function () {
container = document.createElement('div');
document.body.appendChild(container);
initViewer(fixtures.pdfPages);
}); });
afterEach(function () { afterEach(function () {
window.PDFViewerApplication.dispose(); cleanupViewer();
window.PDFViewerApplication = null;
container.remove(); container.remove();
}); });
...@@ -345,4 +365,24 @@ describe('annotator/anchoring/pdf', function () { ...@@ -345,4 +365,24 @@ describe('annotator/anchoring/pdf', function () {
}); });
}); });
}); });
describe('documentHasText', () => {
it('returns true if PDF has selectable text', async () => {
assert.isTrue(await pdfAnchoring.documentHasText());
});
[
// Completely empty document.
[],
// Single page with no text.
[''],
// Multiple pages with no text.
['', '', ''],
].forEach(content => {
it('returns false if PDF does not have selectable text', async () => {
initViewer(content);
assert.isFalse(await pdfAnchoring.documentHasText());
});
});
});
}); });
import debounce from 'lodash.debounce'; import debounce from 'lodash.debounce';
import { Fragment, createElement, render } from 'preact';
import * as pdfAnchoring from '../anchoring/pdf'; import * as pdfAnchoring from '../anchoring/pdf';
import Delegator from '../delegator'; import Delegator from '../delegator';
...@@ -12,6 +13,26 @@ import PDFMetadata from './pdf-metadata'; ...@@ -12,6 +13,26 @@ import PDFMetadata from './pdf-metadata';
* @typedef {import('../../types/annotator').HypothesisWindow} HypothesisWindow * @typedef {import('../../types/annotator').HypothesisWindow} HypothesisWindow
*/ */
/**
* A banner shown at the top of the PDF viewer if the PDF cannot be annotated
* by Hypothesis.
*/
function WarningBanner() {
return (
<Fragment>
This PDF does not contain selectable text. To annotate it with Hypothesis
you will need to{' '}
<a
target="_blank"
rel="noreferrer"
href="https://web.hypothes.is/help/how-to-ocr-optimize-pdfs/"
>
OCR-optimize it.
</a>
</Fragment>
);
}
export default class PDF extends Delegator { export default class PDF extends Delegator {
/** /**
* @param {Annotator} annotator * @param {Annotator} annotator
...@@ -35,6 +56,16 @@ export default class PDF extends Delegator { ...@@ -35,6 +56,16 @@ export default class PDF extends Delegator {
childList: true, childList: true,
subtree: true, subtree: true,
}); });
/**
* A banner shown at the top of the PDF viewer warning the user if the PDF
* is not suitable for use with Hypothesis.
*
* @type {HTMLElement|null}
*/
this._warningBanner = null;
this._checkForSelectableText();
} }
destroy() { destroy() {
...@@ -50,6 +81,68 @@ export default class PDF extends Delegator { ...@@ -50,6 +81,68 @@ export default class PDF extends Delegator {
return this.pdfMetadata.getMetadata(); return this.pdfMetadata.getMetadata();
} }
/**
* Check whether the PDF has selectable text and show a warning if not.
*/
async _checkForSelectableText() {
// Wait for PDF to load.
try {
await this.uri();
} catch (e) {
return;
}
try {
const hasText = await pdfAnchoring.documentHasText();
this._showNoSelectableTextWarning(!hasText);
} catch (err) {
/* istanbul ignore next */
console.warn('Unable to check for text in PDF:', err);
}
}
/**
* Set whether the warning about a PDF's suitability for use with Hypothesis
* is shown.
*
* @param {boolean} showWarning
*/
_showNoSelectableTextWarning(showWarning) {
if (!showWarning) {
this._warningBanner?.remove();
this._warningBanner = null;
return;
}
const mainContainer = /** @type {HTMLElement} */ (document.querySelector(
'#mainContainer'
));
const updateBannerHeight = () => {
/* istanbul ignore next */
if (!this._warningBanner) {
return;
}
const rect = this._warningBanner.getBoundingClientRect();
mainContainer.style.top = rect.height + 'px';
this._warningBanner.style.top = -rect.height + 'px';
};
this._warningBanner = document.createElement('div');
this._warningBanner.className = 'annotator-pdf-warning-banner';
mainContainer.appendChild(this._warningBanner);
render(<WarningBanner />, this._warningBanner);
// @ts-expect-error - TS is missing `ResizeObserver`
if (typeof ResizeObserver !== 'undefined') {
// Update the banner when the window is resized or the Hypothesis
// sidebar is opened.
// @ts-ignore
new ResizeObserver(updateBannerHeight).observe(this._warningBanner);
}
updateBannerHeight();
}
// This method (re-)anchors annotations when pages are rendered and destroyed. // This method (re-)anchors annotations when pages are rendered and destroyed.
_update() { _update() {
// A list of annotations that need to be refreshed. // A list of annotations that need to be refreshed.
......
...@@ -9,6 +9,8 @@ function delay(ms) { ...@@ -9,6 +9,8 @@ function delay(ms) {
describe('annotator/plugin/pdf', () => { describe('annotator/plugin/pdf', () => {
let container; let container;
let fakeAnnotator; let fakeAnnotator;
let fakePdfAnchoring;
let fakePDFMetadata;
let fakePDFViewerApplication; let fakePDFViewerApplication;
let pdfPlugin; let pdfPlugin;
...@@ -33,7 +35,19 @@ describe('annotator/plugin/pdf', () => { ...@@ -33,7 +35,19 @@ describe('annotator/plugin/pdf', () => {
anchoring: null, anchoring: null,
}; };
fakePdfAnchoring = {
documentHasText: sinon.stub().resolves(true),
};
fakePDFMetadata = {
getMetadata: sinon.stub().resolves({}),
getUri: sinon.stub().resolves('https://example.com/test.pdf'),
};
$imports.$mock({ $imports.$mock({
'./pdf-metadata': () => fakePDFMetadata,
'../anchoring/pdf': fakePdfAnchoring,
// Disable debouncing of updates. // Disable debouncing of updates.
'lodash.debounce': callback => callback, 'lodash.debounce': callback => callback,
}); });
...@@ -73,6 +87,51 @@ describe('annotator/plugin/pdf', () => { ...@@ -73,6 +87,51 @@ describe('annotator/plugin/pdf', () => {
}); });
}); });
function getWarningBanner() {
return document.querySelector('.annotator-pdf-warning-banner');
}
it('does not show a warning when PDF has selectable text', async () => {
fakePdfAnchoring.documentHasText.resolves(true);
pdfPlugin = createPDFPlugin();
await delay(0); // Wait for text check to complete.
assert.called(fakePdfAnchoring.documentHasText);
assert.isNull(getWarningBanner());
});
it('does not show a warning if PDF does not load', async () => {
fakePDFMetadata.getUri.rejects(new Error('Something went wrong'));
pdfPlugin = createPDFPlugin();
await delay(0); // Wait for text check to complete.
assert.notCalled(fakePdfAnchoring.documentHasText);
assert.isNull(getWarningBanner());
});
it('shows a warning when PDF has no selectable text', async () => {
const mainContainer = document.createElement('div');
mainContainer.id = 'mainContainer';
document.body.appendChild(mainContainer);
fakePdfAnchoring.documentHasText.resolves(false);
pdfPlugin = createPDFPlugin();
await delay(0); // Wait for text check to complete.
assert.called(fakePdfAnchoring.documentHasText);
const banner = getWarningBanner();
assert.isNotNull(banner);
assert.isTrue(mainContainer.contains(banner));
assert.include(
banner.textContent,
'This PDF does not contain selectable text'
);
mainContainer.remove();
});
context('when the PDF viewer content changes', () => { context('when the PDF viewer content changes', () => {
async function triggerUpdate() { async function triggerUpdate() {
const element = document.createElement('div'); const element = document.createElement('div');
......
// SASS entry point for annotator styling // SASS entry point for annotator styling
@use "sass:meta"; @use "sass:meta";
@use "sass:color" as color;
@use '../variables' as var; @use '../variables' as var;
@use '../mixins/reset'; @use '../mixins/reset';
...@@ -81,6 +83,24 @@ ...@@ -81,6 +83,24 @@
transition: none !important; transition: none !important;
} }
.annotator-pdf-warning-banner {
position: absolute;
left: 0;
top: 0;
right: 0;
padding: 10px 5px;
// TODO - Replace highlight color with preferred warning color.
background-color: var.$color-highlight;
border-bottom: 3px solid color.adjust(var.$color-highlight, $lightness: -40%);
font-size: 12pt;
font-weight: bold;
// The Z-index is necessary to raise the banner above the toolbar at the
// top of the PDF.js viewer.
z-index: 10000;
}
/* /*
Mobile layout Mobile layout
240-479 px 240-479 px
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment