Move PDF metadata extraction into a separate service

In order to make it easier to change the document metadata retrieved for PDFs, this commit moves the code responsible for extracting that metadata into a separate (tested) service. We also remove our reliance on the `PDFViewerApplication.loading` which was removed from the bundled version of PDF.js in August 2015. Instead we simply check to see if `PDFViewerApplication.documentFingerprint` is available. If not, we listen for the first 'documentload' event before extracting metadata from the `PDFViewerApplication`.

Move PDF metadata extraction into a separate service
In order to make it easier to change the document metadata retrieved for PDFs, this commit moves the code responsible for extracting that metadata into a separate (tested) service. We also remove our reliance on the `PDFViewerApplication.loading` which was removed from the bundled version of PDF.js in August 2015. Instead we simply check to see if `PDFViewerApplication.documentFingerprint` is available. If not, we listen for the first 'documentload' event before extracting metadata from the `PDFViewerApplication`.
533b5f4b · Sheetal Umesh Kumar · Nick Stenning · e4f05b17 · 533b5f4b · 533b5f4b
Commit 533b5f4b authored Jun 02, 2016 by Sheetal Umesh Kumar Committed by Nick Stenning Jun 09, 2016
3 changed files
--- a/h/static/scripts/annotator/plugin/pdf-metadata.js
+++ b/h/static/scripts/annotator/plugin/pdf-metadata.js
+'use strict';
+
+/**
+ * This PDFMetadata service extracts metadata about a loading/loaded PDF
+ * document from a PDF.js PDFViewerApplication object.
+ *
+ * This hides from users of this service the need to wait until the PDF document
+ * is loaded before extracting the relevant metadata.
+ */
+function PDFMetadata(app) {
+  this._loaded = new Promise(function (resolve) {
+    var finish = function () {
+      window.removeEventListener('documentload', finish);
+      resolve(app);
+    };
+
+    if (app.documentFingerprint) {
+      resolve(app);
+    } else {
+      window.addEventListener('documentload', finish);
+    }
+  });
+}
+
+/**
+ * Returns a promise of the URI of the loaded PDF.
+ */
+PDFMetadata.prototype.getUri = function () {
+  return this._loaded.then(function (app) {
+    return app.url;
+  });
+};
+
+/**
+ * Returns a promise of a metadata object, containing:
+ *
+ * title(string) - The document title
+ * link(array) - An array of link objects representing URIs for the document
+ * documentFingerprint(string) - The document fingerprint
+ */
+PDFMetadata.prototype.getMetadata = function () {
+  return this._loaded.then(function (app) {
+    var title = document.title;
+
+    if (app.metadata && app.metadata.has('dc:title') && app.metadata.get('dc:title') !== 'Untitled') {
+      title = app.metadata.get('dc:title');
+    } else if (app.documentInfo && app.documentInfo.Title) {
+      title = app.documentInfo.Title;
+    }
+
+    var link = [
+      {href: fingerprintToURN(app.documentFingerprint)},
+      {href: app.url}
+    ];
+
+    return {
+      title: title,
+      link: link,
+      documentFingerprint: app.documentFingerprint,
+    };
+  });
+};
+
+function fingerprintToURN(fingerprint) {
+  return 'urn:x-pdf:' + String(fingerprint);
+}
+
+module.exports = PDFMetadata;
--- a/h/static/scripts/annotator/plugin/pdf.coffee
+++ b/h/static/scripts/annotator/plugin/pdf.coffee
@@ -9,18 +9,12 @@ module.exports = class PDF extends Annotator.Plugin

  pluginInit: ->
    @annotator.anchoring = require('../anchoring/pdf')
+    PDFMetadata = require('./pdf-metadata')

    @pdfViewer = PDFViewerApplication.pdfViewer
    @pdfViewer.viewer.classList.add('has-transparent-text-layer')

-    if PDFViewerApplication.loading
-      @documentLoaded = new Promise (resolve) ->
-        finish = (evt) ->
-          window.removeEventListener('documentload', finish)
-          resolve()
-        window.addEventListener('documentload', finish)
-    else
-      @documentLoaded = Promise.resolve()
+    @pdfMetadata = new PDFMetadata(PDFViewerApplication)

    @observer = new MutationObserver((mutations) => this._update())
    @observer.observe(@pdfViewer.viewer, {
@@ -35,29 +29,10 @@ module.exports = class PDF extends Annotator.Plugin
    @observer.disconnect()

  uri: ->
-    @documentLoaded.then ->
-      PDFViewerApplication.url
+    @pdfMetadata.getUri()

  getMetadata: ->
-    @documentLoaded.then ->
-      info = PDFViewerApplication.documentInfo
-      metadata = PDFViewerApplication.metadata
-
-      # Taken from PDFViewerApplication#load
-      if metadata?.has('dc:title') and metadata.get('dc:title') isnt 'Untitled'
-        title = metadata.get('dc:title')
-      else if info?['Title']
-        title = info['Title']
-      else
-        title = document.title
-
-      # This is an experimental URN,
-      # as per http://tools.ietf.org/html/rfc3406#section-3.0
-      urn = "urn:x-pdf:" + PDFViewerApplication.documentFingerprint
-      link = [{href: urn}, {href: PDFViewerApplication.url}]
-      documentFingerprint = PDFViewerApplication.documentFingerprint
-
-      return {title, link, documentFingerprint}
+    @pdfMetadata.getMetadata()

  # This method (re-)anchors annotations when pages are rendered and destroyed.
  _update: ->

--- a/h/static/scripts/annotator/plugin/test/pdf-metadata-test.js
+++ b/h/static/scripts/annotator/plugin/test/pdf-metadata-test.js
+'use strict';
+
+var PDFMetadata = require('../pdf-metadata');
+
+describe('pdf-metadata', function () {
+  describe('get pdf metadata when documentload event fires', function () {
+    it('should get pdf urn', function () {
+      var fakeApp = {};
+      var pdfMetadata = new PDFMetadata(fakeApp);
+
+      var event = document.createEvent('Event');
+      event.initEvent('documentload', false, false);
+      fakeApp.url = 'http://example.com/foo.pdf';
+      window.dispatchEvent(event);
+
+      return pdfMetadata.getUri().then(function (uri) {
+        assert.equal('http://example.com/foo.pdf', uri);
+      });
+    });
+  });
+
+  describe('get pdf metadata when documentFingerprint is set', function () {
+    var pdfMetadata;
+    var fakePDFViewerApplication = {
+      documentFingerprint: 'fakeFingerprint',
+      documentInfo: {
+        Title: 'fakeTitle',
+      },
+      metadata: {
+        metadata: {
+          'dc:title': 'fakeTitle',
+        }
+      },
+      url: 'fakeUrl',
+    };
+
+    beforeEach(function () {
+      pdfMetadata = new PDFMetadata(fakePDFViewerApplication);
+    });
+
+    describe('#getUri', function () {
+      it('returns the PDF URL as its URI', function () {
+        return pdfMetadata.getUri().then(function (uri) {
+          assert.equal('fakeUrl', uri);
+        });
+      });
+    });
+
+    describe('#getMetadata', function () {
+      it('should get a metadataobject with dc:title', function () {
+        var expectedMetadata = {
+          title: 'dcTitle',
+          link: [{href: 'urn:x-pdf:' + fakePDFViewerApplication.documentFingerprint},
+            {href: fakePDFViewerApplication.url}],
+          documentFingerprint: fakePDFViewerApplication.documentFingerprint,
+        };
+
+        fakePDFViewerApplication.metadata.has = sinon.stub().returns(true);
+        fakePDFViewerApplication.metadata.get = sinon.stub().returns('dcTitle');
+
+        return pdfMetadata.getMetadata().then(function (actualMetadata) {
+          assert.deepEqual(expectedMetadata, actualMetadata);
+        });
+      });
+
+      it('should get a metadataobject with documentInfo.Title', function () {
+        var expectedMetadata = {
+          title: fakePDFViewerApplication.documentInfo.Title,
+          link: [{href: 'urn:x-pdf:' + fakePDFViewerApplication.documentFingerprint},
+            {href: fakePDFViewerApplication.url}],
+          documentFingerprint: fakePDFViewerApplication.documentFingerprint,
+        };
+
+        fakePDFViewerApplication.metadata.has = sinon.stub().returns(false);
+
+        return pdfMetadata.getMetadata().then(function (actualMetadata) {
+          assert.deepEqual(expectedMetadata, actualMetadata);
+        });
+      });
+    });
+  });
+});