Unverified Commit 2b5da551 authored by Robert Knight's avatar Robert Knight Committed by GitHub

Merge pull request #889 from hypothesis/update-pdfjs-text-api

Update PDF anchoring for changes in PDF.js API
parents 7f0a2338 37e41ea8
...@@ -72,9 +72,10 @@ function getPageTextContent(pageIndex) { ...@@ -72,9 +72,10 @@ function getPageTextContent(pageIndex) {
return textContent; return textContent;
}; };
// FIXME - `pdfViewer.getPageTextContent` was removed in recent versions of PDF.js. pageTextCache[pageIndex] = getPage(pageIndex).pdfPage
pageTextCache[pageIndex] = PDFViewerApplication.pdfViewer .getTextContent({
.getPageTextContent(pageIndex) normalizeWhitespace: true,
})
.then(joinItems); .then(joinItems);
return pageTextCache[pageIndex]; return pageTextCache[pageIndex];
......
...@@ -43,6 +43,35 @@ function createPage(content, rendered) { ...@@ -43,6 +43,35 @@ function createPage(content, rendered) {
return pageEl; return pageEl;
} }
/**
* Fake implementation of `PDFPageProxy` class.
*
* The original is defined at https://github.com/mozilla/pdf.js/blob/master/src/display/api.js
*/
class FakePDFPageProxy {
constructor(pageText) {
this.pageText = pageText;
}
getTextContent(params = {}) {
if (!params.normalizeWhitespace) {
return Promise.reject(new Error('Expected `normalizeWhitespace` to be true'));
}
const textContent = {
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: this.pageText.split(/\n/).map(line => ({ str: line })),
};
return Promise.resolve(textContent);
}
}
/** /**
* @typedef FakePDFPageViewOptions * @typedef FakePDFPageViewOptions
* @prop [boolean] rendered - Whether this page is "rendered", as if it were * @prop [boolean] rendered - Whether this page is "rendered", as if it were
...@@ -70,6 +99,7 @@ class FakePDFPageView { ...@@ -70,6 +99,7 @@ class FakePDFPageView {
this.renderingState = textLayerEl this.renderingState = textLayerEl
? RenderingStates.FINISHED ? RenderingStates.FINISHED
: RenderingStates.INITIAL; : RenderingStates.INITIAL;
this.pdfPage = new FakePDFPageProxy(text);
} }
dispose() { dispose() {
...@@ -103,19 +133,6 @@ class FakePDFViewer { ...@@ -103,19 +133,6 @@ class FakePDFViewer {
return this._pages[index]; return this._pages[index];
} }
getPageTextContent(index) {
this._checkBounds(index);
return Promise.resolve({
// The way that the page text is split into items will depend on
// the PDF and the version of PDF.js - individual text items might be
// just symbols, words, phrases or whole lines.
//
// Here we split items by line which matches the typical output for a
// born-digital PDF.
items: this._content[index].split(/\n/).map(line => ({ str: line })),
});
}
/** /**
* Set the index of the page which is currently visible in the viewport. * Set the index of the page which is currently visible in the viewport.
* *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment