Commit aade0062 authored by Robert Knight's avatar Robert Knight

Support new PDF.js fingerprint API

In https://github.com/mozilla/pdf.js/pull/13661 the API for retrieving
PDF fingerprints (aka. File Identifiers) changed. In generic builds of
PDF.js the old API remains available, but not in the non-generic one
that Firefox's built-in viewer uses.

This commit makes Hypothesis use the new API if available or the old API
otherwise. The fingerprint value should be the same in both cases.

Fixes #3673
parent f2512324
......@@ -16,6 +16,11 @@ import { normalizeURI } from '../util/url';
* @prop {string} documentFingerprint - The fingerprint of this PDF. This is
* referred to as the "File Identifier" in the PDF spec. It may be a hash of
* part of the content if the PDF file does not have a File Identifier.
*
* PDFs may have two file identifiers. The first is the "original" identifier
* which is not supposed to change if the file is updated and the second
* one is the "last modified" identifier. This property is the original
* identifier.
*/
/**
......@@ -117,7 +122,7 @@ export class PDFMetadata {
return this._loaded.then(app => {
let uri = getPDFURL(app);
if (!uri) {
uri = fingerprintToURN(app.pdfDocument.fingerprint);
uri = fingerprintToURN(getFingerprint(app));
}
return uri;
});
......@@ -138,8 +143,8 @@ export class PDFMetadata {
contentDispositionFilename,
metadata,
} = await app.pdfDocument.getMetadata();
const documentFingerprint = app.pdfDocument.fingerprint;
const documentFingerprint = getFingerprint(app);
const url = getPDFURL(app);
// Return the title metadata embedded in the PDF if available, otherwise
......@@ -176,8 +181,27 @@ export class PDFMetadata {
}
}
/**
* Get the fingerprint/file identifier of the currently loaded PDF.
*
* @param {PDFViewerApplication} app
*/
function getFingerprint(app) {
if (Array.isArray(app.pdfDocument.fingerprints)) {
return app.pdfDocument.fingerprints[0];
} else {
return /** @type {string} */ (app.pdfDocument.fingerprint);
}
}
/**
* Generate a URI from a PDF fingerprint suitable for storing as the main
* or associated URI of an annotation.
*
* @param {string} fingerprint
*/
function fingerprintToURN(fingerprint) {
return 'urn:x-pdf:' + String(fingerprint);
return `urn:x-pdf:${fingerprint}`;
}
/**
......
......@@ -33,12 +33,20 @@ class FakePDFDocumentProxy {
fingerprint,
info,
metadata = null,
}) {
this.fingerprint = fingerprint;
// Whether to expose fingerprints via the new API (after
// https://github.com/mozilla/pdf.js/pull/13661) or the old one.
newFingerprintAPI = true,
}) {
this._contentDispositionFilename = contentDispositionFilename;
this._info = info;
this._metadata = metadata;
if (newFingerprintAPI) {
this.fingerprints = [fingerprint, null];
} else {
this.fingerprint = fingerprint;
}
}
async getMetadata() {
......@@ -67,10 +75,16 @@ class FakePDFViewerApplication {
* @prop {boolean} domEvents - Whether events are emitted on the DOM
* @prop {boolean} eventBusEvents - Whether the `eventBus` API is enabled
* @prop {boolean} initializedPromise - Whether the `initializedPromise` API is enabled
* @prop {boolean} newFingerprintAPI - Whether to emulate the new fingerprints API
*/
constructor(
url = '',
{ domEvents = false, eventBusEvents = true, initializedPromise = true } = {}
{
domEvents = false,
eventBusEvents = true,
initializedPromise = true,
newFingerprintAPI = true,
} = {}
) {
this.url = url;
this.documentInfo = undefined;
......@@ -78,6 +92,7 @@ class FakePDFViewerApplication {
this.pdfDocument = null;
this.dispatchDOMEvents = domEvents;
this.initialized = false;
this.newFingerprintAPI = newFingerprintAPI;
// Use `EventEmitter` as a fake version of PDF.js's `EventBus` class as the
// API for subscribing to events is the same.
......@@ -118,9 +133,10 @@ class FakePDFViewerApplication {
this.pdfDocument = new FakePDFDocumentProxy({
contentDispositionFilename,
fingerprint,
info,
metadata: metadata ? new FakeMetadata(metadata) : null,
fingerprint,
newFingerprintAPI: this.newFingerprintAPI,
});
if (this.dispatchDOMEvents) {
......@@ -235,8 +251,11 @@ describe('PDFMetadata', () => {
url: 'http://fake.com/',
};
function createPDFMetadata(metadata = testMetadata) {
const fakePDFViewerApplication = new FakePDFViewerApplication();
function createPDFMetadata(metadata = testMetadata, viewerOptions) {
const fakePDFViewerApplication = new FakePDFViewerApplication(
'',
viewerOptions
);
fakePDFViewerApplication.completeInit();
fakePDFViewerApplication.finishLoading(metadata);
return {
......@@ -254,14 +273,19 @@ describe('PDFMetadata', () => {
});
});
[true, false].forEach(newFingerprintAPI => {
it('returns the fingerprint as a URN when the PDF URL is a file:// URL', async () => {
const { pdfMetadata } = createPDFMetadata({
const { pdfMetadata } = createPDFMetadata(
{
url: 'file:///test.pdf',
fingerprint: 'fakeFingerprint',
});
},
{ newFingerprintAPI }
);
const uri = await pdfMetadata.getUri();
assert.equal(uri, 'urn:x-pdf:fakeFingerprint');
});
});
it('resolves relative URLs', async () => {
const { fakePDFViewerApplication, pdfMetadata } = createPDFMetadata({
......@@ -280,11 +304,15 @@ describe('PDFMetadata', () => {
});
describe('#getMetadata', () => {
[true, false].forEach(newFingerprintAPI => {
it('returns the document fingerprint in the `documentFingerprint` property', async () => {
const { pdfMetadata } = createPDFMetadata();
const { pdfMetadata } = createPDFMetadata(testMetadata, {
newFingerprintAPI,
});
const metadata = await pdfMetadata.getMetadata();
assert.equal(metadata.documentFingerprint, testMetadata.fingerprint);
});
});
it('returns the PDF URL in the `links` array', async () => {
const { pdfMetadata } = createPDFMetadata();
......
......@@ -51,7 +51,13 @@
/**
* @typedef PDFDocument
* @prop {string} fingerprint
* @prop {string} [fingerprint] - PDF fingerprint in PDF.js before v2.10.377.
* May exist in later versions depending on the PDF.js build.
* @prop {[string, string|null]} [fingerprints] - PDF fingerprints in PDF.js after
* v2.10.377. See https://github.com/mozilla/pdf.js/pull/13661. The first
* entry of this array is the "original" fingerprint and the same as the
* `fingerprint` property in older versions. The second entry is the "modified"
* fingerprint. See "File Identifiers" section in the PDF spec.
* @prop {() => Promise<PDFDocumentMetadata>} getMetadata
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment