Change the scheme for the document identifying URN that’s derived from DC metadata

“dc.relation.ispartof” is now used instead of “dc.source” The goal of this is to identify a resource that’s part of another resource, therefore the more specific term fits better.

Change the scheme for the document identifying URN that’s derived from DC metadata
“dc.relation.ispartof” is now used instead of “dc.source” The goal of this is to identify a resource that’s part of another resource, therefore the more specific term fits better.
82ac90d0 · Juan Corona · 7b9cbbb7 · 82ac90d0 · 82ac90d0
Commit 82ac90d0 authored Jul 19, 2017 by Juan Corona
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 22 deletions

document.coffee src/annotator/plugin/document.coffee +13 -14

document-test.coffee src/annotator/plugin/test/document-test.coffee +8 -8

No files found.
--- a/src/annotator/plugin/document.coffee
+++ b/src/annotator/plugin/document.coffee
@@ -159,21 +159,20 @@ module.exports = class Document extends Plugin
          if id[0..3] == "doi:"
            @metadata.link.push(href: id)

-    # look for a link to identify the resource in dublincore data
-    dcMetaConcat = []
-    # sort by name for a consistent ordering of the concatenated metadata
-    for name in Object.keys(@metadata.dc).sort()
-      # right now only look for these two terms
-      if name == 'source' || name == 'identifier'
-        values = @metadata.dc[name].sort().map (v) -> encodeURIComponent(v)
-        dcMetaConcat.push(name + ':' + values.join(','))
-
-    dcMetaUrn = 'urn:x-dc-meta:' + dcMetaConcat.join('/')
-    if dcMetaConcat.length == 2
-      # only do this if both terms are provided, for now
-      @metadata.link.push(href: dcMetaUrn)
+    # look for a link to identify the resource in dublincore metadata
+    dcRelationValues = @metadata.dc['relation.ispartof']
+    dcIdentifierValues = @metadata.dc['identifier']
+    if dcRelationValues && dcIdentifierValues
+      dcUrnRelationComponent =
+        dcRelationValues[dcRelationValues.length - 1]
+      dcUrnIdentifierComponent =
+        dcIdentifierValues[dcIdentifierValues.length - 1]
+      dcUrn = 'urn:x-dc:' +
+        encodeURIComponent(dcUrnRelationComponent) + '/' +
+        encodeURIComponent(dcUrnIdentifierComponent)
+      @metadata.link.push(href: dcUrn)
      # set this as the documentFingerprint as a hint to include this in search queries
-      @metadata.documentFingerprint = dcMetaUrn
+      @metadata.documentFingerprint = dcUrn

  _getFavicon: =>
    for link in $("link")

--- a/src/annotator/plugin/test/document-test.coffee
+++ b/src/annotator/plugin/test/document-test.coffee
@@ -41,8 +41,8 @@ describe 'Document', ->
    head.append('<meta name="citation_title" content="Foo">')
    head.append('<meta name="citation_pdf_url" content="foo.pdf">')
    head.append('<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">')
-    head.append('<meta name="dc:identifier" content="isbn:123456789">')
-    head.append('<meta name="dc:source" content="urn:example:abcxyz">')
+    head.append('<meta name="dc:identifier" content="foobar-abcxyz">')
+    head.append('<meta name="dc.relation.ispartof" content="isbn:123456789">')
    head.append('<meta name="DC.type" content="Article">')
    head.append('<meta property="og:url" content="http://example.com">')
    head.append('<meta name="twitter:site" content="@okfn">')
@@ -85,12 +85,12 @@ describe 'Document', ->
      assert.equal(metadata.link[7].type, "application/pdf")
      assert.equal(metadata.link[8].href, "doi:10.1175/JCLI-D-11-00015.1")

-      # link derived from dc resource identifiers
-      # should be in the form of `urn:x-dc-meta: identifier: <doi>,<isbn> / source: <example>`
-      # it's a comma separated list for when there's multiple identifiers
+      # Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
+      # Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
+      # and <identifier> is the percent-encoded value of the last dc.identifier meta element.
      assert.equal(
        metadata.link[9].href
-        "urn:x-dc-meta:identifier:doi%3A10.1175%2FJCLI-D-11-00015.1,isbn%3A123456789/source:urn%3Aexample%3Aabcxyz"
+        "urn:x-dc:isbn%3A123456789/foobar-abcxyz"
      )

    it 'should ignore atom and RSS feeds and alternate languages', ->
@@ -104,8 +104,8 @@ describe 'Document', ->

    it 'should have dublincore metadata', ->
      assert.ok(metadata.dc)
-      assert.deepEqual(metadata.dc.identifier, ["doi:10.1175/JCLI-D-11-00015.1", "isbn:123456789"])
-      assert.deepEqual(metadata.dc.source, ["urn:example:abcxyz"])
+      assert.deepEqual(metadata.dc.identifier, ["doi:10.1175/JCLI-D-11-00015.1", "foobar-abcxyz"])
+      assert.deepEqual(metadata.dc['relation.ispartof'], ["isbn:123456789"])
      assert.deepEqual(metadata.dc.type, ["Article"])

    it 'should have facebook metadata', ->