Commit 2b18e89a authored by Robert Knight's avatar Robert Knight

Capture page number selectors in PDFs

Capture the page number and label for PDF selections using the same
`PageSelector` type that we use for selections in PDF-based VitalSource books.
This has various uses, but one of the immediate ones is that it will allow us to
do some testing of the VS page range behaviors with PDFs, which are more
convenient to test with.

Related to:

 - https://github.com/hypothesis/client/issues/5986.
 - https://github.com/hypothesis/client/issues/3720 (this proposes a more
   PDF-specific selector that would also capture the page region as a rect).
parent 133ef3e7
/* global PDFViewerApplication */ /* global PDFViewerApplication */
import { warnOnce } from '../../shared/warn-once'; import { warnOnce } from '../../shared/warn-once';
import type { import type {
PageSelector,
TextPositionSelector, TextPositionSelector,
TextQuoteSelector, TextQuoteSelector,
Selector, Selector,
...@@ -646,8 +647,11 @@ export async function describe( ...@@ -646,8 +647,11 @@ export async function describe(
).relativeTo(textLayer); ).relativeTo(textLayer);
const startPageIndex = getSiblingIndex(textLayer.parentNode!); const startPageIndex = getSiblingIndex(textLayer.parentNode!);
const pageNumber = startPageIndex + 1;
const pageOffset = await getPageOffset(startPageIndex); const pageOffset = await getPageOffset(startPageIndex);
const pageView = await getPageView(startPageIndex);
const position = { const position = {
type: 'TextPositionSelector', type: 'TextPositionSelector',
start: pageOffset + startPos.offset, start: pageOffset + startPos.offset,
...@@ -656,7 +660,13 @@ export async function describe( ...@@ -656,7 +660,13 @@ export async function describe(
const quote = TextQuoteAnchor.fromRange(root, textRange).toSelector(); const quote = TextQuoteAnchor.fromRange(root, textRange).toSelector();
return [position, quote]; const pageSelector: PageSelector = {
type: 'PageSelector',
index: startPageIndex,
label: pageView.pageLabel ?? pageNumber.toString(),
};
return [position, quote, pageSelector];
} }
/** /**
......
...@@ -135,6 +135,7 @@ class FakePDFPageView { ...@@ -135,6 +135,7 @@ class FakePDFPageView {
? RenderingStates.FINISHED ? RenderingStates.FINISHED
: RenderingStates.INITIAL; : RenderingStates.INITIAL;
this.pdfPage = new FakePDFPageProxy(text, config); this.pdfPage = new FakePDFPageProxy(text, config);
this.pageLabel = null;
} }
dispose() { dispose() {
......
...@@ -91,15 +91,19 @@ describe('annotator/anchoring/pdf', () => { ...@@ -91,15 +91,19 @@ describe('annotator/anchoring/pdf', () => {
}); });
describe('describe', () => { describe('describe', () => {
it('returns position and quote selectors', () => { it('returns position and quote selectors', async () => {
viewer.pdfViewer.setCurrentPage(2); viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park'); const range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(selectors => {
const types = selectors.map(s => { const selectors = await pdfAnchoring.describe(container, range);
return s.type; selectors.sort((a, b) => a.type.localeCompare(b.type));
});
assert.deepEqual(types, ['TextPositionSelector', 'TextQuoteSelector']); const types = selectors.map(s => s.type);
}); assert.deepEqual(types, [
'PageSelector',
'TextPositionSelector',
'TextQuoteSelector',
]);
}); });
it('returns a position selector with correct start/end offsets', async () => { it('returns a position selector with correct start/end offsets', async () => {
...@@ -109,17 +113,18 @@ describe('annotator/anchoring/pdf', () => { ...@@ -109,17 +113,18 @@ describe('annotator/anchoring/pdf', () => {
const contentStr = fixtures.pdfPages.join(''); const contentStr = fixtures.pdfPages.join('');
const expectedPos = contentStr.replace(/\n/g, '').lastIndexOf(quote); const expectedPos = contentStr.replace(/\n/g, '').lastIndexOf(quote);
const [positionSelector] = await pdfAnchoring.describe(container, range); const selectors = await pdfAnchoring.describe(container, range);
const position = selectors.find(s => s.type === 'TextPositionSelector');
assert.equal(positionSelector.start, expectedPos); assert.equal(position.start, expectedPos);
assert.equal(positionSelector.end, expectedPos + quote.length); assert.equal(position.end, expectedPos + quote.length);
}); });
it('returns a quote selector with the correct quote', () => { it('returns a quote selector with the correct quote', () => {
viewer.pdfViewer.setCurrentPage(2); viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park'); const range = findText(container, 'Netherfield Park');
return pdfAnchoring.describe(container, range).then(selectors => { return pdfAnchoring.describe(container, range).then(selectors => {
const quote = selectors[1]; const quote = selectors.find(s => s.type === 'TextQuoteSelector');
assert.deepEqual(quote, { assert.deepEqual(quote, {
type: 'TextQuoteSelector', type: 'TextQuoteSelector',
...@@ -130,6 +135,35 @@ describe('annotator/anchoring/pdf', () => { ...@@ -130,6 +135,35 @@ describe('annotator/anchoring/pdf', () => {
}); });
}); });
it('returns a page selector with the page number as the label', async () => {
viewer.pdfViewer.setCurrentPage(2);
const range = findText(container, 'Netherfield Park');
const selectors = await pdfAnchoring.describe(container, range);
const page = selectors.find(s => s.type === 'PageSelector');
assert.deepEqual(page, {
type: 'PageSelector',
index: 2,
label: '3',
});
});
it('returns a page selector with the custom page label as the label', async () => {
viewer.pdfViewer.setCurrentPage(2);
viewer.pdfViewer.getPageView(2).pageLabel = 'iv';
const range = findText(container, 'Netherfield Park');
const selectors = await pdfAnchoring.describe(container, range);
const page = selectors.find(s => s.type === 'PageSelector');
assert.deepEqual(page, {
type: 'PageSelector',
index: 2,
label: 'iv',
});
});
it('returns selector when range starts at end of text node with no next siblings', () => { it('returns selector when range starts at end of text node with no next siblings', () => {
// this problem is referenced in client issue #122 // this problem is referenced in client issue #122
// But what is happening is the startContainer is referencing a text // But what is happening is the startContainer is referencing a text
......
...@@ -91,6 +91,14 @@ export type PDFPageProxy = { ...@@ -91,6 +91,14 @@ export type PDFPageProxy = {
export type PDFPageView = { export type PDFPageView = {
/** Container element for the PDF page. */ /** Container element for the PDF page. */
div: HTMLElement; div: HTMLElement;
/**
* The page label that is displayed in the current page input field.
*
* If null, PDF.js will display a 1-based page number instead.
*/
pageLabel: string | null;
pdfPage: PDFPageProxy; pdfPage: PDFPageProxy;
textLayer: TextLayer | null; textLayer: TextLayer | null;
/** See `RenderingStates` enum in src/annotator/anchoring/pdf.js */ /** See `RenderingStates` enum in src/annotator/anchoring/pdf.js */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment