Unverified Commit 2b5da551 authored by Robert Knight's avatar Robert Knight Committed by GitHub

Merge pull request #889 from hypothesis/update-pdfjs-text-api

Update PDF anchoring for changes in PDF.js API
parents 7f0a2338 37e41ea8
......@@ -72,9 +72,10 @@ function getPageTextContent(pageIndex) {
return textContent;
};
// FIXME - `pdfViewer.getPageTextContent` was removed in recent versions of PDF.js.
pageTextCache[pageIndex] = PDFViewerApplication.pdfViewer
.getPageTextContent(pageIndex)
pageTextCache[pageIndex] = getPage(pageIndex).pdfPage
.getTextContent({
normalizeWhitespace: true,
})
.then(joinItems);
return pageTextCache[pageIndex];
......
......@@ -43,6 +43,35 @@ function createPage(content, rendered) {
return pageEl;
}
/**
* Fake implementation of `PDFPageProxy` class.
*
* The original is defined at https://github.com/mozilla/pdf.js/blob/master/src/display/api.js
*/
class FakePDFPageProxy {
constructor(pageText) {
this.pageText = pageText;
}
getTextContent(params = {}) {
if (!params.normalizeWhitespace) {
return Promise.reject(new Error('Expected `normalizeWhitespace` to be true'));
}
const textContent = {
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: this.pageText.split(/\n/).map(line => ({ str: line })),
};
return Promise.resolve(textContent);
}
}
/**
* @typedef FakePDFPageViewOptions
* @prop [boolean] rendered - Whether this page is "rendered", as if it were
......@@ -70,6 +99,7 @@ class FakePDFPageView {
this.renderingState = textLayerEl
? RenderingStates.FINISHED
: RenderingStates.INITIAL;
this.pdfPage = new FakePDFPageProxy(text);
}
dispose() {
......@@ -103,19 +133,6 @@ class FakePDFViewer {
return this._pages[index];
}
getPageTextContent(index) {
this._checkBounds(index);
return Promise.resolve({
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: this._content[index].split(/\n/).map(line => ({ str: line })),
});
}
/**
* Set the index of the page which is currently visible in the viewport.
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment