Merge pull request #734 from hypothesis/decaf-document-meta

Convert annotator/plugin/document.coffee to JS

Merge pull request #734 from hypothesis/decaf-document-meta
Convert annotator/plugin/document.coffee to JS
72c87787 · Robert Knight · GitHub · 20d5471e · 56f051cd · 20d5471e
Unverified Commit 72c87787 authored May 28, 2018 by Robert Knight Committed by GitHub May 28, 2018
4 changed files
--- a/src/annotator/plugin/document.coffee
+++ b/src/annotator/plugin/document.coffee
-baseURI = require('document-base-uri')
-
-Plugin = require('../plugin')
-{ normalizeURI } = require('../util/url')
-
-###
-** Adapted from:
-** https://github.com/openannotation/annotator/blob/v1.2.x/src/plugin/document.coffee
-**
-** Annotator v1.2.10
-** https://github.com/openannotation/annotator
-**
-** Copyright 2015, the Annotator project contributors.
-** Dual licensed under the MIT and GPLv3 licenses.
-** https://github.com/openannotation/annotator/blob/master/LICENSE
-###
-
-module.exports = class Document extends Plugin
-
-  events:
-    'beforeAnnotationCreated': 'beforeAnnotationCreated'
-
-  pluginInit: ->
-    # Test seams.
-    @baseURI = @options.baseURI or baseURI
-    @document = @options.document or document
-
-    this.getDocumentMetadata()
-
-  # Returns the primary URI for the document being annotated
-  uri: =>
-    uri = decodeURIComponent(this._getDocumentHref())
-    for link in @metadata.link
-      if link.rel == "canonical"
-        uri = link.href
-    return uri
-
-  # Returns all uris for the document being annotated
-  uris: =>
-    uniqueUrls = {}
-    for link in @metadata.link
-      uniqueUrls[link.href] = true if link.href
-    return (href for href of uniqueUrls)
-
-  beforeAnnotationCreated: (annotation) =>
-    annotation.document = @metadata
-
-  getDocumentMetadata: =>
-    @metadata = {}
-
-    # first look for some common metadata types
-    # TODO: look for microdata/rdfa?
-    this._getHighwire()
-    this._getDublinCore()
-    this._getFacebook()
-    this._getEprints()
-    this._getPrism()
-    this._getTwitter()
-    this._getFavicon()
-
-    # extract out/normalize some things
-    this._getTitle()
-    this._getLinks()
-
-    return @metadata
-
-  _getHighwire: =>
-    return @metadata.highwire = this._getMetaTags("citation", "name", "_")
-
-  _getFacebook: =>
-    return @metadata.facebook = this._getMetaTags("og", "property", ":")
-
-  _getTwitter: =>
-    return @metadata.twitter = this._getMetaTags("twitter", "name", ":")
-
-  _getDublinCore: =>
-    return @metadata.dc = this._getMetaTags("dc", "name", ".")
-
-  _getPrism: =>
-    return @metadata.prism = this._getMetaTags("prism", "name", ".")
-
-  _getEprints: =>
-    return @metadata.eprints = this._getMetaTags("eprints", "name", ".")
-
-  _getMetaTags: (prefix, attribute, delimiter) =>
-    tags = {}
-    for meta in @document.querySelectorAll('meta')
-      name = meta.getAttribute(attribute)
-      content = meta.content
-      if name
-        match = name.match(RegExp("^#{prefix}#{delimiter}(.+)$", "i"))
-        if match
-          n = match[1]
-          if tags[n]
-            tags[n].push(content)
-          else
-            tags[n] = [content]
-    return tags
-
-  _getTitle: =>
-    if @metadata.highwire.title
-      @metadata.title = @metadata.highwire.title[0]
-    else if @metadata.eprints.title
-      @metadata.title = @metadata.eprints.title[0]
-    else if @metadata.prism.title
-      @metadata.title = @metadata.prism.title[0]
-    else if @metadata.facebook.title
-      @metadata.title = @metadata.facebook.title[0]
-    else if @metadata.twitter.title
-      @metadata.title = @metadata.twitter.title[0]
-    else if @metadata.dc.title
-      @metadata.title = @metadata.dc.title[0]
-    else
-      @metadata.title = @document.title
-
-  _getLinks: =>
-    # we know our current location is a link for the document
-    @metadata.link = [href: this._getDocumentHref()]
-
-    # look for some relevant link relations
-    for link in @document.querySelectorAll('link')
-      href = this._absoluteUrl(link.href) # get absolute url
-      rel = link.rel
-      type = link.type
-      lang = link.hreflang
-
-      if rel not in ["alternate", "canonical", "bookmark", "shortlink"] then continue
-
-      if rel is 'alternate'
-        # Ignore feeds resources
-        if type and type.match /^application\/(rss|atom)\+xml/ then continue
-        # Ignore alternate languages
-        if lang then continue
-
-      @metadata.link.push(href: href, rel: rel, type: type)
-
-    # look for links in scholar metadata
-    for name, values of @metadata.highwire
-
-      if name == "pdf_url"
-        for url in values
-          @metadata.link.push
-            href: this._absoluteUrl(url)
-            type: "application/pdf"
-
-      # kind of a hack to express DOI identifiers as links but it's a
-      # convenient place to look them up later, and somewhat sane since
-      # they don't have a type
-      if name == "doi"
-        for doi in values
-          if doi[0..3] != "doi:"
-            doi = "doi:" + doi
-          @metadata.link.push(href: doi)
-
-    # look for links in dublincore data
-    for name, values of @metadata.dc
-      if name == "identifier"
-        for id in values
-          if id[0..3] == "doi:"
-            @metadata.link.push(href: id)
-
-    # look for a link to identify the resource in dublincore metadata
-    dcRelationValues = @metadata.dc['relation.ispartof']
-    dcIdentifierValues = @metadata.dc['identifier']
-    if dcRelationValues && dcIdentifierValues
-      dcUrnRelationComponent =
-        dcRelationValues[dcRelationValues.length - 1]
-      dcUrnIdentifierComponent =
-        dcIdentifierValues[dcIdentifierValues.length - 1]
-      dcUrn = 'urn:x-dc:' +
-        encodeURIComponent(dcUrnRelationComponent) + '/' +
-        encodeURIComponent(dcUrnIdentifierComponent)
-      @metadata.link.push(href: dcUrn)
-      # set this as the documentFingerprint as a hint to include this in search queries
-      @metadata.documentFingerprint = dcUrn
-
-  _getFavicon: =>
-    for link in @document.querySelectorAll('link')
-      if link.rel in ["shortcut icon", "icon"]
-        @metadata["favicon"] = this._absoluteUrl(link.href)
-
-  # Hack to get a absolute url from a possibly relative one
-  _absoluteUrl: (url) ->
-    normalizeURI(url, @baseURI)
-
-  # Get the true URI record when it's masked via a different protocol.
-  # This happens when an href is set with a uri using the 'blob:' protocol
-  # but the document can set a different uri through a <base> tag.
-  _getDocumentHref: ->
-    href = @document.location.href
-    allowedSchemes = ['http:', 'https:', 'file:']
-
-    # Use the current document location if it has a recognized scheme.
-    if new URL(href).protocol in allowedSchemes
-      return href
-
-    # Otherwise, try using the location specified by the <base> element.
-    if @baseURI and (new URL(@baseURI).protocol in allowedSchemes)
-      return @baseURI
-
-    # Fall back to returning the document URI, even though the scheme is not
-    # in the allowed list.
-    return href
--- a/src/annotator/plugin/document.js
+++ b/src/annotator/plugin/document.js
+'use strict';
+
+/*
+** Adapted from:
+** https://github.com/openannotation/annotator/blob/v1.2.x/src/plugin/document.coffee
+**
+** Annotator v1.2.10
+** https://github.com/openannotation/annotator
+**
+** Copyright 2015, the Annotator project contributors.
+** Dual licensed under the MIT and GPLv3 licenses.
+** https://github.com/openannotation/annotator/blob/master/LICENSE
+*/
+
+const baseURI = require('document-base-uri');
+
+const Plugin = require('../plugin');
+const { normalizeURI } = require('../util/url');
+
+/**
+ * DocumentMeta reads metadata/links from the current HTML document and
+ * populates the `document` property of new annotations.
+ */
+class DocumentMeta extends Plugin {
+  constructor(element, options) {
+    super(element, options);
+
+    this.events = {
+      'beforeAnnotationCreated': 'beforeAnnotationCreated',
+    };
+  }
+
+  pluginInit() {
+    // Test seams.
+    this.baseURI = this.options.baseURI || baseURI;
+    this.document = this.options.document || document;
+
+    this.getDocumentMetadata();
+  }
+
+  /**
+   * Returns the primary URI for the document being annotated
+   *
+   * @return {string}
+   */
+  uri() {
+    let uri = decodeURIComponent(this._getDocumentHref());
+    for (let link of this.metadata.link) {
+      if (link.rel === 'canonical') {
+        uri = link.href;
+      }
+    }
+    return uri;
+  }
+
+  /**
+   * Returns all uris for the document being annotated
+   *
+   * @return {string[]}
+   */
+  uris() {
+    const uniqueUrls = {};
+    for (let link of this.metadata.link) {
+      if (link.href) { uniqueUrls[link.href] = true; }
+    }
+    return Object.keys(uniqueUrls);
+  }
+
+  /**
+   * Hook that augments new annotations with metadata about the document they
+   * came from.
+   */
+  beforeAnnotationCreated(annotation) {
+    annotation.document = this.metadata;
+  }
+
+  /**
+   * Return metadata for the current page.
+   */
+  getDocumentMetadata() {
+    this.metadata = {};
+
+    // first look for some common metadata types
+    // TODO: look for microdata/rdfa?
+    this._getHighwire();
+    this._getDublinCore();
+    this._getFacebook();
+    this._getEprints();
+    this._getPrism();
+    this._getTwitter();
+    this._getFavicon();
+
+    // extract out/normalize some things
+    this._getTitle();
+    this._getLinks();
+
+    return this.metadata;
+  }
+
+  _getHighwire() {
+    this.metadata.highwire = this._getMetaTags('citation', 'name', '_');
+  }
+
+  _getFacebook() {
+    this.metadata.facebook = this._getMetaTags('og', 'property', ':');
+  }
+
+  _getTwitter() {
+    this.metadata.twitter = this._getMetaTags('twitter', 'name', ':');
+  }
+
+  _getDublinCore() {
+    this.metadata.dc = this._getMetaTags('dc', 'name', '.');
+  }
+
+  _getPrism() {
+    this.metadata.prism = this._getMetaTags('prism', 'name', '.');
+  }
+
+  _getEprints() {
+    this.metadata.eprints = this._getMetaTags('eprints', 'name', '.');
+  }
+
+  _getMetaTags(prefix, attribute, delimiter) {
+    const tags = {};
+    for (let meta of Array.from(this.document.querySelectorAll('meta'))) {
+      const name = meta.getAttribute(attribute);
+      const { content } = meta;
+      if (name) {
+        const match = name.match(RegExp(`^${prefix}${delimiter}(.+)$`, 'i'));
+        if (match) {
+          const n = match[1];
+          if (tags[n]) {
+            tags[n].push(content);
+          } else {
+            tags[n] = [content];
+          }
+        }
+      }
+    }
+    return tags;
+  }
+
+  _getTitle() {
+    if (this.metadata.highwire.title) {
+      this.metadata.title = this.metadata.highwire.title[0];
+    } else if (this.metadata.eprints.title) {
+      this.metadata.title = this.metadata.eprints.title[0];
+    } else if (this.metadata.prism.title) {
+      this.metadata.title = this.metadata.prism.title[0];
+    } else if (this.metadata.facebook.title) {
+      this.metadata.title = this.metadata.facebook.title[0];
+    } else if (this.metadata.twitter.title) {
+      this.metadata.title = this.metadata.twitter.title[0];
+    } else if (this.metadata.dc.title) {
+      this.metadata.title = this.metadata.dc.title[0];
+    } else {
+      this.metadata.title = this.document.title;
+    }
+  }
+
+  _getLinks() {
+    // we know our current location is a link for the document
+    let href;
+    let type;
+    let values;
+    this.metadata.link = [{href: this._getDocumentHref()}];
+
+    // look for some relevant link relations
+    for (let link of Array.from(this.document.querySelectorAll('link'))) {
+      href = this._absoluteUrl(link.href); // get absolute url
+      const { rel } = link;
+      ({ type } = link);
+      const lang = link.hreflang;
+
+      if (!['alternate', 'canonical', 'bookmark', 'shortlink'].includes(rel)) { continue; }
+
+      if (rel === 'alternate') {
+        // Ignore feeds resources
+        if (type && type.match(/^application\/(rss|atom)\+xml/)) { continue; }
+        // Ignore alternate languages
+        if (lang) { continue; }
+      }
+
+      this.metadata.link.push({href, rel, type});
+    }
+
+    // look for links in scholar metadata
+    for (let name of Object.keys(this.metadata.highwire)) {
+      values = this.metadata.highwire[name];
+      if (name === 'pdf_url') {
+        for (let url of values) {
+          this.metadata.link.push({
+            href: this._absoluteUrl(url),
+            type: 'application/pdf',
+          });
+        }
+      }
+
+      // kind of a hack to express DOI identifiers as links but it's a
+      // convenient place to look them up later, and somewhat sane since
+      // they don't have a type
+      if (name === 'doi') {
+        for (let doi of values) {
+          if (doi.slice(0, 4) !== 'doi:') {
+            doi = `doi:${doi}`;
+          }
+          this.metadata.link.push({href: doi});
+        }
+      }
+    }
+
+    // look for links in dublincore data
+    for (let name of Object.keys(this.metadata.dc)) {
+      values = this.metadata.dc[name];
+      if (name === 'identifier') {
+        for (let id of values) {
+          if (id.slice(0, 4) === 'doi:') {
+            this.metadata.link.push({href: id});
+          }
+        }
+      }
+    }
+
+    // look for a link to identify the resource in dublincore metadata
+    const dcRelationValues = this.metadata.dc['relation.ispartof'];
+    const dcIdentifierValues = this.metadata.dc.identifier;
+    if (dcRelationValues && dcIdentifierValues) {
+      const dcUrnRelationComponent =
+        dcRelationValues[dcRelationValues.length - 1];
+      const dcUrnIdentifierComponent =
+        dcIdentifierValues[dcIdentifierValues.length - 1];
+      const dcUrn = 'urn:x-dc:' +
+        encodeURIComponent(dcUrnRelationComponent) + '/' +
+        encodeURIComponent(dcUrnIdentifierComponent);
+      this.metadata.link.push({href: dcUrn});
+      // set this as the documentFingerprint as a hint to include this in search queries
+      this.metadata.documentFingerprint = dcUrn;
+    }
+  }
+
+  _getFavicon() {
+    for (let link of Array.from(this.document.querySelectorAll('link'))) {
+      if (['shortcut icon', 'icon'].includes(link.rel)) {
+        this.metadata.favicon = this._absoluteUrl(link.href);
+      }
+    }
+  }
+
+  // Hack to get a absolute url from a possibly relative one
+  _absoluteUrl(url) {
+    return normalizeURI(url, this.baseURI);
+  }
+
+  // Get the true URI record when it's masked via a different protocol.
+  // This happens when an href is set with a uri using the 'blob:' protocol
+  // but the document can set a different uri through a <base> tag.
+  _getDocumentHref() {
+    const { href } = this.document.location;
+    const allowedSchemes = ['http:', 'https:', 'file:'];
+
+    // Use the current document location if it has a recognized scheme.
+    const scheme = new URL(href).protocol;
+    if (allowedSchemes.includes(scheme)) {
+      return href;
+    }
+
+    // Otherwise, try using the location specified by the <base> element.
+    if (this.baseURI && allowedSchemes.includes(new URL(this.baseURI).protocol)) {
+      return this.baseURI;
+    }
+
+    // Fall back to returning the document URI, even though the scheme is not
+    // in the allowed list.
+    return href;
+  }
+}
+
+module.exports = DocumentMeta;
--- a/src/annotator/plugin/test/document-test.coffee
+++ b/src/annotator/plugin/test/document-test.coffee
-$ = require('jquery')
-Document = require('../document')
-
-###
-** Adapted from:
-** https://github.com/openannotation/annotator/blob/v1.2.x/test/spec/plugin/document_spec.coffee
-**
-** Annotator v1.2.10
-** https://github.com/openannotation/annotator
-**
-** Copyright 2015, the Annotator project contributors.
-** Dual licensed under the MIT and GPLv3 licenses.
-** https://github.com/openannotation/annotator/blob/master/LICENSE
-###
-
-describe 'Document', ->
-  testDocument = null
-
-  beforeEach ->
-    testDocument = new Document($('<div></div>')[0], {})
-    testDocument.pluginInit()
-
-  afterEach ->
-    $(document).unbind()
-
-  describe 'annotation should have some metadata', ->
-    # Add some metadata to the page
-    head = $("head")
-    head.append('<link rel="alternate" href="foo.pdf" type="application/pdf"></link>')
-    head.append('<link rel="alternate" href="foo.doc" type="application/msword"></link>')
-    head.append('<link rel="bookmark" href="http://example.com/bookmark"></link>')
-    head.append('<link rel="shortlink" href="http://example.com/bookmark/short"></link>')
-    head.append('<link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>')
-    head.append('<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">')
-    head.append('<meta name="citation_title" content="Foo">')
-    head.append('<meta name="citation_pdf_url" content="foo.pdf">')
-    head.append('<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">')
-    head.append('<meta name="dc:identifier" content="foobar-abcxyz">')
-    head.append('<meta name="dc.relation.ispartof" content="isbn:123456789">')
-    head.append('<meta name="DC.type" content="Article">')
-    head.append('<meta property="og:url" content="http://example.com">')
-    head.append('<meta name="twitter:site" content="@okfn">')
-    head.append('<link rel="icon" href="http://example.com/images/icon.ico"></link>')
-    head.append('<meta name="eprints.title" content="Computer Lib / Dream Machines">')
-    head.append('<meta name="prism.title" content="Literary Machines">')
-    head.append('<link rel="alternate" href="feed" type="application/rss+xml"></link>')
-    head.append('<link rel="canonical" href="http://example.com/canonical"></link>')
-
-    metadata = null
-
-    beforeEach ->
-      metadata = testDocument.metadata
-
-    it 'should have metadata', ->
-      assert.ok(metadata)
-
-    it 'should have a title, derived from highwire metadata if possible', ->
-      assert.equal(metadata.title, 'Foo')
-
-    it 'should have links with absolute hrefs and types', ->
-      assert.ok(metadata.link)
-      assert.equal(metadata.link.length, 10)
-      assert.equal(metadata.link[1].rel, "alternate")
-      assert.match(metadata.link[1].href, /^.+foo\.pdf$/)
-      assert.equal(metadata.link[1].type, "application/pdf")
-      assert.equal(metadata.link[2].rel, "alternate")
-      assert.match(metadata.link[2].href, /^.+foo\.doc$/)
-      assert.equal(metadata.link[2].type, "application/msword")
-      assert.equal(metadata.link[3].rel, "bookmark")
-      assert.equal(metadata.link[3].href, "http://example.com/bookmark")
-      assert.equal(metadata.link[4].rel, "shortlink")
-      assert.equal(metadata.link[4].href, "http://example.com/bookmark/short")
-      assert.equal(metadata.link[5].rel, "canonical")
-      assert.equal(metadata.link[5].href, "http://example.com/canonical")
-      assert.equal(metadata.link[6].href, "doi:10.1175/JCLI-D-11-00015.1")
-      assert.match(metadata.link[7].href, /.+foo\.pdf$/)
-      assert.equal(metadata.link[7].type, "application/pdf")
-      assert.equal(metadata.link[8].href, "doi:10.1175/JCLI-D-11-00015.1")
-
-      # Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
-      # Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
-      # and <identifier> is the percent-encoded value of the last dc.identifier meta element.
-      assert.equal(
-        metadata.link[9].href
-        "urn:x-dc:isbn%3A123456789/foobar-abcxyz"
-      )
-
-    it 'should ignore atom and RSS feeds and alternate languages', ->
-      assert.equal(metadata.link.length, 10)
-
-    it 'should have highwire metadata', ->
-      assert.ok(metadata.highwire)
-      assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf'])
-      assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1'])
-      assert.deepEqual(metadata.highwire.title, ['Foo'])
-
-    it 'should have dublincore metadata', ->
-      assert.ok(metadata.dc)
-      assert.deepEqual(metadata.dc.identifier, ["doi:10.1175/JCLI-D-11-00015.1", "foobar-abcxyz"])
-      assert.deepEqual(metadata.dc['relation.ispartof'], ["isbn:123456789"])
-      assert.deepEqual(metadata.dc.type, ["Article"])
-
-    it 'should have facebook metadata', ->
-      assert.ok(metadata.facebook)
-      assert.deepEqual(metadata.facebook.url, ["http://example.com"])
-
-    it 'should have eprints metadata', ->
-      assert.ok(metadata.eprints)
-      assert.deepEqual(metadata.eprints.title, ['Computer Lib / Dream Machines'])
-
-    it 'should have prism metadata', ->
-      assert.ok(metadata.prism)
-      assert.deepEqual(metadata.prism.title, ['Literary Machines'])
-
-      it 'should have twitter card metadata', ->
-        assert.ok(metadata.twitter)
-        assert.deepEqual(metadata.twitter.site, ['@okfn'])
-
-    it 'should have unique uris', ->
-      uris = testDocument.uris()
-      assert.equal(uris.length, 8)
-
-    it 'uri() returns the canonical uri', ->
-      uri = testDocument.uri()
-      assert.equal(uri, metadata.link[5].href)
-
-    it 'should have a favicon', ->
-      assert.equal(
-        metadata.favicon
-        'http://example.com/images/icon.ico'
-      )
-
-    it 'should have a documentFingerprint as the dc resource identifiers URN href', ->
-      assert.equal(metadata.documentFingerprint, metadata.link[9].href)
-
-
-  describe '#_absoluteUrl', ->
-
-    it 'should add the protocol when the url starts with two slashes', ->
-      result = testDocument._absoluteUrl('//example.com/')
-      expected = "#{document.location.protocol}//example.com/"
-      assert.equal(result, expected)
-
-    it 'should add a trailing slash when given an empty path', ->
-      result = testDocument._absoluteUrl('http://example.com')
-      assert.equal(result, 'http://example.com/')
-
-    it 'should make a relative path into an absolute url', ->
-      result = testDocument._absoluteUrl('path')
-      expected = (
-        document.location.protocol + '//' +
-          document.location.host +
-          document.location.pathname.replace(/[^\/]+$/, '') +
-          'path'
-      )
-      assert.equal(result, expected)
-
-    it 'should make an absolute path into an absolute url', ->
-      result = testDocument._absoluteUrl('/path')
-      expected = (
-        document.location.protocol + '//' +
-          document.location.host +
-          '/path'
-      )
-      assert.equal(result, expected)
-
-  describe '#uri', ->
-
-    beforeEach ->
-      # Remove any existing canonical links which would otherwise override the
-      # document's own location.
-      canonicalLink = document.querySelector('link[rel="canonical"]')
-      if canonicalLink
-        canonicalLink.remove()
-
-    # Create a blank HTML document with a faked `href` and `baseURI` and
-    # return a `Document` instance which reads metadata from it.
-    createDoc = (href, baseURI, htmlDoc) ->
-      if !htmlDoc
-        # Create a blank DOM Document
-        htmlDoc = document.implementation.createHTMLDocument()
-
-      # `Document.location` is not overridable. In order to fake the
-      # location in tests, create a proxy object in front of our blank HTML
-      # document.
-      fakeDocument =
-        createElement: htmlDoc.createElement.bind(htmlDoc),
-        querySelectorAll: htmlDoc.querySelectorAll.bind(htmlDoc),
-        location:
-          href: href
-      doc = new Document($('<div></div>')[0], {
-        document: fakeDocument,
-        baseURI: baseURI,
-      })
-      doc.pluginInit()
-      doc
-
-    [
-      'http://publisher.org/book',
-      'https://publisher.org/book',
-      'file:///Users/jim/book',
-    ].forEach (href) ->
-      it "should return the document's URL if it has an allowed scheme", ->
-        baseURI = 'https://publisher.org/'
-        doc = createDoc(href, baseURI)
-        assert.equal(doc.uri(), href)
-
-    it "should return the baseURI if the document's URL does not have an allowed scheme", ->
-      href = 'blob:1234-5678'
-      baseURI = 'https://publisher.org/book'
-      doc = createDoc(href, baseURI)
-      assert.equal(doc.uri(), baseURI)
-
-    [
-      # The base URI is not available in IE if the document has no `<base>`
-      # tags. This is a limitation of `document-base-uri`.
-      ['https://publisher.org/article', undefined],
-      # Ignore base URIs with non-HTTP/HTTPS/file protocols, which can be
-      # created by a `<base>` tag.
-      ['blob:1234', 'doi:foo'],
-      ['chrome://foo', 'chrome://blah'],
-    ].forEach ([href, baseURI]) ->
-      it "should return the document's URL if it and the baseURI do not have an allowed scheme", ->
-        doc = createDoc(href, baseURI)
-        assert.equal(doc.uri(), href)
-
-    it 'returns the canonical URI if present', ->
-      htmlDoc = document.implementation.createHTMLDocument()
-      canonicalLink = htmlDoc.createElement('link')
-      canonicalLink.rel = 'canonical'
-      canonicalLink.href = 'https://publisher.org/canonical'
-      htmlDoc.head.appendChild(canonicalLink)
-
-      doc = createDoc('https://publisher.org/not-canonical', null, htmlDoc)
-
-      assert.equal doc.uri(), canonicalLink.href
--- a/src/annotator/plugin/test/document-test.js
+++ b/src/annotator/plugin/test/document-test.js
+'use strict';
+
+/*
+** Adapted from:
+** https://github.com/openannotation/annotator/blob/v1.2.x/test/spec/plugin/document_spec.coffee
+**
+** Annotator v1.2.10
+** https://github.com/openannotation/annotator
+**
+** Copyright 2015, the Annotator project contributors.
+** Dual licensed under the MIT and GPLv3 licenses.
+** https://github.com/openannotation/annotator/blob/master/LICENSE
+*/
+
+const $ = require('jquery');
+
+const DocumentMeta = require('../document');
+
+describe('DocumentMeta', function() {
+  let testDocument = null;
+
+  beforeEach(function() {
+    testDocument = new DocumentMeta($('<div></div>')[0], {});
+    testDocument.pluginInit();
+  });
+
+  afterEach(() => $(document).unbind());
+
+  describe('annotation should have some metadata', function() {
+    // Add some metadata to the page
+    const head = $('head');
+    head.append('<link rel="alternate" href="foo.pdf" type="application/pdf"></link>');
+    head.append('<link rel="alternate" href="foo.doc" type="application/msword"></link>');
+    head.append('<link rel="bookmark" href="http://example.com/bookmark"></link>');
+    head.append('<link rel="shortlink" href="http://example.com/bookmark/short"></link>');
+    head.append('<link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>');
+    head.append('<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">');
+    head.append('<meta name="citation_title" content="Foo">');
+    head.append('<meta name="citation_pdf_url" content="foo.pdf">');
+    head.append('<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">');
+    head.append('<meta name="dc:identifier" content="foobar-abcxyz">');
+    head.append('<meta name="dc.relation.ispartof" content="isbn:123456789">');
+    head.append('<meta name="DC.type" content="Article">');
+    head.append('<meta property="og:url" content="http://example.com">');
+    head.append('<meta name="twitter:site" content="@okfn">');
+    head.append('<link rel="icon" href="http://example.com/images/icon.ico"></link>');
+    head.append('<meta name="eprints.title" content="Computer Lib / Dream Machines">');
+    head.append('<meta name="prism.title" content="Literary Machines">');
+    head.append('<link rel="alternate" href="feed" type="application/rss+xml"></link>');
+    head.append('<link rel="canonical" href="http://example.com/canonical"></link>');
+
+    let metadata = null;
+
+    beforeEach(() => metadata = testDocument.metadata);
+
+    it('should have metadata', () => assert.ok(metadata));
+
+    it('should have a title, derived from highwire metadata if possible', () => {
+      assert.equal(metadata.title, 'Foo');
+    });
+
+    it('should have links with absolute hrefs and types', function() {
+      assert.ok(metadata.link);
+      assert.equal(metadata.link.length, 10);
+      assert.equal(metadata.link[1].rel, 'alternate');
+      assert.match(metadata.link[1].href, /^.+foo\.pdf$/);
+      assert.equal(metadata.link[1].type, 'application/pdf');
+      assert.equal(metadata.link[2].rel, 'alternate');
+      assert.match(metadata.link[2].href, /^.+foo\.doc$/);
+      assert.equal(metadata.link[2].type, 'application/msword');
+      assert.equal(metadata.link[3].rel, 'bookmark');
+      assert.equal(metadata.link[3].href, 'http://example.com/bookmark');
+      assert.equal(metadata.link[4].rel, 'shortlink');
+      assert.equal(metadata.link[4].href, 'http://example.com/bookmark/short');
+      assert.equal(metadata.link[5].rel, 'canonical');
+      assert.equal(metadata.link[5].href, 'http://example.com/canonical');
+      assert.equal(metadata.link[6].href, 'doi:10.1175/JCLI-D-11-00015.1');
+      assert.match(metadata.link[7].href, /.+foo\.pdf$/);
+      assert.equal(metadata.link[7].type, 'application/pdf');
+      assert.equal(metadata.link[8].href, 'doi:10.1175/JCLI-D-11-00015.1');
+
+      // Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
+      // Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
+      // and <identifier> is the percent-encoded value of the last dc.identifier meta element.
+      assert.equal(
+        metadata.link[9].href,
+        'urn:x-dc:isbn%3A123456789/foobar-abcxyz'
+      );
+    });
+
+    it('should ignore atom and RSS feeds and alternate languages', () => {
+      assert.equal(metadata.link.length, 10);
+    });
+
+    it('should have highwire metadata', function() {
+      assert.ok(metadata.highwire);
+      assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf']);
+      assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
+      assert.deepEqual(metadata.highwire.title, ['Foo']);
+    });
+
+    it('should have dublincore metadata', function() {
+      assert.ok(metadata.dc);
+      assert.deepEqual(metadata.dc.identifier, ['doi:10.1175/JCLI-D-11-00015.1', 'foobar-abcxyz']);
+      assert.deepEqual(metadata.dc['relation.ispartof'], ['isbn:123456789']);
+      assert.deepEqual(metadata.dc.type, ['Article']);
+    });
+
+    it('should have facebook metadata', function() {
+      assert.ok(metadata.facebook);
+      assert.deepEqual(metadata.facebook.url, ['http://example.com']);
+    });
+
+    it('should have eprints metadata', function() {
+      assert.ok(metadata.eprints);
+      assert.deepEqual(metadata.eprints.title, ['Computer Lib / Dream Machines']);
+    });
+
+    it('should have prism metadata', function() {
+      assert.ok(metadata.prism);
+      assert.deepEqual(metadata.prism.title, ['Literary Machines']);
+
+      it('should have twitter card metadata', function() {
+        assert.ok(metadata.twitter);
+        assert.deepEqual(metadata.twitter.site, ['@okfn']);
+      });
+    });
+
+    it('should have unique uris', function() {
+      const uris = testDocument.uris();
+      assert.equal(uris.length, 8);
+    });
+
+    it('uri() returns the canonical uri', function() {
+      const uri = testDocument.uri();
+      assert.equal(uri, metadata.link[5].href);
+    });
+
+    it('should have a favicon', () =>
+      assert.equal(
+        metadata.favicon,
+        'http://example.com/images/icon.ico'
+      )
+    );
+
+    it('should have a documentFingerprint as the dc resource identifiers URN href', () => {
+      assert.equal(metadata.documentFingerprint, metadata.link[9].href);
+    });
+  });
+
+
+  describe('#_absoluteUrl', function() {
+
+    it('should add the protocol when the url starts with two slashes', function() {
+      const result = testDocument._absoluteUrl('//example.com/');
+      const expected = `${document.location.protocol}//example.com/`;
+      assert.equal(result, expected);
+    });
+
+    it('should add a trailing slash when given an empty path', function() {
+      const result = testDocument._absoluteUrl('http://example.com');
+      assert.equal(result, 'http://example.com/');
+    });
+
+    it('should make a relative path into an absolute url', function() {
+      const result = testDocument._absoluteUrl('path');
+      const expected = (
+        document.location.protocol + '//' +
+          document.location.host +
+          document.location.pathname.replace(/[^\/]+$/, '') +
+          'path'
+      );
+      assert.equal(result, expected);
+    });
+
+    it('should make an absolute path into an absolute url', function() {
+      const result = testDocument._absoluteUrl('/path');
+      const expected = (
+        document.location.protocol + '//' +
+          document.location.host +
+          '/path'
+      );
+      assert.equal(result, expected);
+    });
+  });
+
+  describe('#uri', function() {
+
+    beforeEach(function() {
+      // Remove any existing canonical links which would otherwise override the
+      // document's own location.
+      const canonicalLink = document.querySelector('link[rel="canonical"]');
+      if (canonicalLink) {
+        canonicalLink.remove();
+      }
+    });
+
+    // Create a blank HTML document with a faked `href` and `baseURI` and
+    // return a `DocumentMeta` instance which reads metadata from it.
+    const createDoc = function(href, baseURI, htmlDoc) {
+      if (!htmlDoc) {
+        // Create a blank DOM DocumentMeta
+        htmlDoc = document.implementation.createHTMLDocument();
+      }
+
+      // `DocumentMeta.location` is not overridable. In order to fake the
+      // location in tests, create a proxy object in front of our blank HTML
+      // document.
+      const fakeDocument = {
+        createElement: htmlDoc.createElement.bind(htmlDoc), // eslint-disable-line no-restricted-properties
+        querySelectorAll: htmlDoc.querySelectorAll.bind(htmlDoc),  // eslint-disable-line no-restricted-properties
+        location: {
+          href,
+        },
+      };
+      const doc = new DocumentMeta($('<div></div>')[0], {
+        document: fakeDocument,
+        baseURI,
+      });
+      doc.pluginInit();
+      return doc;
+    };
+
+    [
+      'http://publisher.org/book',
+      'https://publisher.org/book',
+      'file:///Users/jim/book',
+    ].forEach(href =>
+      it("should return the document's URL if it has an allowed scheme", function() {
+        const baseURI = 'https://publisher.org/';
+        const doc = createDoc(href, baseURI);
+        assert.equal(doc.uri(), href);
+      })
+    );
+
+    it("should return the baseURI if the document's URL does not have an allowed scheme", function() {
+      const href = 'blob:1234-5678';
+      const baseURI = 'https://publisher.org/book';
+      const doc = createDoc(href, baseURI);
+      assert.equal(doc.uri(), baseURI);
+    });
+
+    [
+      // The base URI is not available in IE if the document has no `<base>`
+      // tags. This is a limitation of `document-base-uri`.
+      ['https://publisher.org/article', undefined],
+      // Ignore base URIs with non-HTTP/HTTPS/file protocols, which can be
+      // created by a `<base>` tag.
+      ['blob:1234', 'doi:foo'],
+      ['chrome://foo', 'chrome://blah'],
+    ].forEach(function(...args) {
+      const [href, baseURI] = Array.from(args[0]);
+      it("should return the document's URL if it and the baseURI do not have an allowed scheme", function() {
+        const doc = createDoc(href, baseURI);
+        assert.equal(doc.uri(), href);
+      });
+    });
+
+    it('returns the canonical URI if present', function() {
+      const htmlDoc = document.implementation.createHTMLDocument();
+      const canonicalLink = htmlDoc.createElement('link');
+      canonicalLink.rel = 'canonical';
+      canonicalLink.href = 'https://publisher.org/canonical';
+      htmlDoc.head.appendChild(canonicalLink);
+
+      const doc = createDoc('https://publisher.org/not-canonical', null, htmlDoc);
+
+      assert.equal(doc.uri(), canonicalLink.href);
+    });
+  });
+});