Commit 452c4f71 authored by Robert Knight's avatar Robert Knight

Decaffeinate document.coffee

Initial automated CoffeeScript => JS conversion using:

```
decaffeinate <CoffeeScript file>
eslint --fix <JS file>
```
parent 5b813e73
baseURI = require('document-base-uri')
Plugin = require('../plugin')
{ normalizeURI } = require('../util/url')
###
** Adapted from:
** https://github.com/openannotation/annotator/blob/v1.2.x/src/plugin/document.coffee
**
** Annotator v1.2.10
** https://github.com/openannotation/annotator
**
** Copyright 2015, the Annotator project contributors.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/openannotation/annotator/blob/master/LICENSE
###
module.exports = class Document extends Plugin
events:
'beforeAnnotationCreated': 'beforeAnnotationCreated'
pluginInit: ->
# Test seams.
@baseURI = @options.baseURI or baseURI
@document = @options.document or document
this.getDocumentMetadata()
# Returns the primary URI for the document being annotated
uri: =>
uri = decodeURIComponent(this._getDocumentHref())
for link in @metadata.link
if link.rel == "canonical"
uri = link.href
return uri
# Returns all uris for the document being annotated
uris: =>
uniqueUrls = {}
for link in @metadata.link
uniqueUrls[link.href] = true if link.href
return (href for href of uniqueUrls)
beforeAnnotationCreated: (annotation) =>
annotation.document = @metadata
getDocumentMetadata: =>
@metadata = {}
# first look for some common metadata types
# TODO: look for microdata/rdfa?
this._getHighwire()
this._getDublinCore()
this._getFacebook()
this._getEprints()
this._getPrism()
this._getTwitter()
this._getFavicon()
# extract out/normalize some things
this._getTitle()
this._getLinks()
return @metadata
_getHighwire: =>
return @metadata.highwire = this._getMetaTags("citation", "name", "_")
_getFacebook: =>
return @metadata.facebook = this._getMetaTags("og", "property", ":")
_getTwitter: =>
return @metadata.twitter = this._getMetaTags("twitter", "name", ":")
_getDublinCore: =>
return @metadata.dc = this._getMetaTags("dc", "name", ".")
_getPrism: =>
return @metadata.prism = this._getMetaTags("prism", "name", ".")
_getEprints: =>
return @metadata.eprints = this._getMetaTags("eprints", "name", ".")
_getMetaTags: (prefix, attribute, delimiter) =>
tags = {}
for meta in @document.querySelectorAll('meta')
name = meta.getAttribute(attribute)
content = meta.content
if name
match = name.match(RegExp("^#{prefix}#{delimiter}(.+)$", "i"))
if match
n = match[1]
if tags[n]
tags[n].push(content)
else
tags[n] = [content]
return tags
_getTitle: =>
if @metadata.highwire.title
@metadata.title = @metadata.highwire.title[0]
else if @metadata.eprints.title
@metadata.title = @metadata.eprints.title[0]
else if @metadata.prism.title
@metadata.title = @metadata.prism.title[0]
else if @metadata.facebook.title
@metadata.title = @metadata.facebook.title[0]
else if @metadata.twitter.title
@metadata.title = @metadata.twitter.title[0]
else if @metadata.dc.title
@metadata.title = @metadata.dc.title[0]
else
@metadata.title = @document.title
_getLinks: =>
# we know our current location is a link for the document
@metadata.link = [href: this._getDocumentHref()]
# look for some relevant link relations
for link in @document.querySelectorAll('link')
href = this._absoluteUrl(link.href) # get absolute url
rel = link.rel
type = link.type
lang = link.hreflang
if rel not in ["alternate", "canonical", "bookmark", "shortlink"] then continue
if rel is 'alternate'
# Ignore feeds resources
if type and type.match /^application\/(rss|atom)\+xml/ then continue
# Ignore alternate languages
if lang then continue
@metadata.link.push(href: href, rel: rel, type: type)
# look for links in scholar metadata
for name, values of @metadata.highwire
if name == "pdf_url"
for url in values
@metadata.link.push
href: this._absoluteUrl(url)
type: "application/pdf"
# kind of a hack to express DOI identifiers as links but it's a
# convenient place to look them up later, and somewhat sane since
# they don't have a type
if name == "doi"
for doi in values
if doi[0..3] != "doi:"
doi = "doi:" + doi
@metadata.link.push(href: doi)
# look for links in dublincore data
for name, values of @metadata.dc
if name == "identifier"
for id in values
if id[0..3] == "doi:"
@metadata.link.push(href: id)
# look for a link to identify the resource in dublincore metadata
dcRelationValues = @metadata.dc['relation.ispartof']
dcIdentifierValues = @metadata.dc['identifier']
if dcRelationValues && dcIdentifierValues
dcUrnRelationComponent =
dcRelationValues[dcRelationValues.length - 1]
dcUrnIdentifierComponent =
dcIdentifierValues[dcIdentifierValues.length - 1]
dcUrn = 'urn:x-dc:' +
encodeURIComponent(dcUrnRelationComponent) + '/' +
encodeURIComponent(dcUrnIdentifierComponent)
@metadata.link.push(href: dcUrn)
# set this as the documentFingerprint as a hint to include this in search queries
@metadata.documentFingerprint = dcUrn
_getFavicon: =>
for link in @document.querySelectorAll('link')
if link.rel in ["shortcut icon", "icon"]
@metadata["favicon"] = this._absoluteUrl(link.href)
# Hack to get a absolute url from a possibly relative one
_absoluteUrl: (url) ->
normalizeURI(url, @baseURI)
# Get the true URI record when it's masked via a different protocol.
# This happens when an href is set with a uri using the 'blob:' protocol
# but the document can set a different uri through a <base> tag.
_getDocumentHref: ->
href = @document.location.href
allowedSchemes = ['http:', 'https:', 'file:']
# Use the current document location if it has a recognized scheme.
if new URL(href).protocol in allowedSchemes
return href
# Otherwise, try using the location specified by the <base> element.
if @baseURI and (new URL(@baseURI).protocol in allowedSchemes)
return @baseURI
# Fall back to returning the document URI, even though the scheme is not
# in the allowed list.
return href
This diff is collapsed.
$ = require('jquery')
Document = require('../document')
###
** Adapted from:
** https://github.com/openannotation/annotator/blob/v1.2.x/test/spec/plugin/document_spec.coffee
**
** Annotator v1.2.10
** https://github.com/openannotation/annotator
**
** Copyright 2015, the Annotator project contributors.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/openannotation/annotator/blob/master/LICENSE
###
describe 'Document', ->
testDocument = null
beforeEach ->
testDocument = new Document($('<div></div>')[0], {})
testDocument.pluginInit()
afterEach ->
$(document).unbind()
describe 'annotation should have some metadata', ->
# Add some metadata to the page
head = $("head")
head.append('<link rel="alternate" href="foo.pdf" type="application/pdf"></link>')
head.append('<link rel="alternate" href="foo.doc" type="application/msword"></link>')
head.append('<link rel="bookmark" href="http://example.com/bookmark"></link>')
head.append('<link rel="shortlink" href="http://example.com/bookmark/short"></link>')
head.append('<link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>')
head.append('<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">')
head.append('<meta name="citation_title" content="Foo">')
head.append('<meta name="citation_pdf_url" content="foo.pdf">')
head.append('<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">')
head.append('<meta name="dc:identifier" content="foobar-abcxyz">')
head.append('<meta name="dc.relation.ispartof" content="isbn:123456789">')
head.append('<meta name="DC.type" content="Article">')
head.append('<meta property="og:url" content="http://example.com">')
head.append('<meta name="twitter:site" content="@okfn">')
head.append('<link rel="icon" href="http://example.com/images/icon.ico"></link>')
head.append('<meta name="eprints.title" content="Computer Lib / Dream Machines">')
head.append('<meta name="prism.title" content="Literary Machines">')
head.append('<link rel="alternate" href="feed" type="application/rss+xml"></link>')
head.append('<link rel="canonical" href="http://example.com/canonical"></link>')
metadata = null
beforeEach ->
metadata = testDocument.metadata
it 'should have metadata', ->
assert.ok(metadata)
it 'should have a title, derived from highwire metadata if possible', ->
assert.equal(metadata.title, 'Foo')
it 'should have links with absolute hrefs and types', ->
assert.ok(metadata.link)
assert.equal(metadata.link.length, 10)
assert.equal(metadata.link[1].rel, "alternate")
assert.match(metadata.link[1].href, /^.+foo\.pdf$/)
assert.equal(metadata.link[1].type, "application/pdf")
assert.equal(metadata.link[2].rel, "alternate")
assert.match(metadata.link[2].href, /^.+foo\.doc$/)
assert.equal(metadata.link[2].type, "application/msword")
assert.equal(metadata.link[3].rel, "bookmark")
assert.equal(metadata.link[3].href, "http://example.com/bookmark")
assert.equal(metadata.link[4].rel, "shortlink")
assert.equal(metadata.link[4].href, "http://example.com/bookmark/short")
assert.equal(metadata.link[5].rel, "canonical")
assert.equal(metadata.link[5].href, "http://example.com/canonical")
assert.equal(metadata.link[6].href, "doi:10.1175/JCLI-D-11-00015.1")
assert.match(metadata.link[7].href, /.+foo\.pdf$/)
assert.equal(metadata.link[7].type, "application/pdf")
assert.equal(metadata.link[8].href, "doi:10.1175/JCLI-D-11-00015.1")
# Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
# Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
# and <identifier> is the percent-encoded value of the last dc.identifier meta element.
assert.equal(
metadata.link[9].href
"urn:x-dc:isbn%3A123456789/foobar-abcxyz"
)
it 'should ignore atom and RSS feeds and alternate languages', ->
assert.equal(metadata.link.length, 10)
it 'should have highwire metadata', ->
assert.ok(metadata.highwire)
assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf'])
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1'])
assert.deepEqual(metadata.highwire.title, ['Foo'])
it 'should have dublincore metadata', ->
assert.ok(metadata.dc)
assert.deepEqual(metadata.dc.identifier, ["doi:10.1175/JCLI-D-11-00015.1", "foobar-abcxyz"])
assert.deepEqual(metadata.dc['relation.ispartof'], ["isbn:123456789"])
assert.deepEqual(metadata.dc.type, ["Article"])
it 'should have facebook metadata', ->
assert.ok(metadata.facebook)
assert.deepEqual(metadata.facebook.url, ["http://example.com"])
it 'should have eprints metadata', ->
assert.ok(metadata.eprints)
assert.deepEqual(metadata.eprints.title, ['Computer Lib / Dream Machines'])
it 'should have prism metadata', ->
assert.ok(metadata.prism)
assert.deepEqual(metadata.prism.title, ['Literary Machines'])
it 'should have twitter card metadata', ->
assert.ok(metadata.twitter)
assert.deepEqual(metadata.twitter.site, ['@okfn'])
it 'should have unique uris', ->
uris = testDocument.uris()
assert.equal(uris.length, 8)
it 'uri() returns the canonical uri', ->
uri = testDocument.uri()
assert.equal(uri, metadata.link[5].href)
it 'should have a favicon', ->
assert.equal(
metadata.favicon
'http://example.com/images/icon.ico'
)
it 'should have a documentFingerprint as the dc resource identifiers URN href', ->
assert.equal(metadata.documentFingerprint, metadata.link[9].href)
describe '#_absoluteUrl', ->
it 'should add the protocol when the url starts with two slashes', ->
result = testDocument._absoluteUrl('//example.com/')
expected = "#{document.location.protocol}//example.com/"
assert.equal(result, expected)
it 'should add a trailing slash when given an empty path', ->
result = testDocument._absoluteUrl('http://example.com')
assert.equal(result, 'http://example.com/')
it 'should make a relative path into an absolute url', ->
result = testDocument._absoluteUrl('path')
expected = (
document.location.protocol + '//' +
document.location.host +
document.location.pathname.replace(/[^\/]+$/, '') +
'path'
)
assert.equal(result, expected)
it 'should make an absolute path into an absolute url', ->
result = testDocument._absoluteUrl('/path')
expected = (
document.location.protocol + '//' +
document.location.host +
'/path'
)
assert.equal(result, expected)
describe '#uri', ->
beforeEach ->
# Remove any existing canonical links which would otherwise override the
# document's own location.
canonicalLink = document.querySelector('link[rel="canonical"]')
if canonicalLink
canonicalLink.remove()
# Create a blank HTML document with a faked `href` and `baseURI` and
# return a `Document` instance which reads metadata from it.
createDoc = (href, baseURI, htmlDoc) ->
if !htmlDoc
# Create a blank DOM Document
htmlDoc = document.implementation.createHTMLDocument()
# `Document.location` is not overridable. In order to fake the
# location in tests, create a proxy object in front of our blank HTML
# document.
fakeDocument =
createElement: htmlDoc.createElement.bind(htmlDoc),
querySelectorAll: htmlDoc.querySelectorAll.bind(htmlDoc),
location:
href: href
doc = new Document($('<div></div>')[0], {
document: fakeDocument,
baseURI: baseURI,
})
doc.pluginInit()
doc
[
'http://publisher.org/book',
'https://publisher.org/book',
'file:///Users/jim/book',
].forEach (href) ->
it "should return the document's URL if it has an allowed scheme", ->
baseURI = 'https://publisher.org/'
doc = createDoc(href, baseURI)
assert.equal(doc.uri(), href)
it "should return the baseURI if the document's URL does not have an allowed scheme", ->
href = 'blob:1234-5678'
baseURI = 'https://publisher.org/book'
doc = createDoc(href, baseURI)
assert.equal(doc.uri(), baseURI)
[
# The base URI is not available in IE if the document has no `<base>`
# tags. This is a limitation of `document-base-uri`.
['https://publisher.org/article', undefined],
# Ignore base URIs with non-HTTP/HTTPS/file protocols, which can be
# created by a `<base>` tag.
['blob:1234', 'doi:foo'],
['chrome://foo', 'chrome://blah'],
].forEach ([href, baseURI]) ->
it "should return the document's URL if it and the baseURI do not have an allowed scheme", ->
doc = createDoc(href, baseURI)
assert.equal(doc.uri(), href)
it 'returns the canonical URI if present', ->
htmlDoc = document.implementation.createHTMLDocument()
canonicalLink = htmlDoc.createElement('link')
canonicalLink.rel = 'canonical'
canonicalLink.href = 'https://publisher.org/canonical'
htmlDoc.head.appendChild(canonicalLink)
doc = createDoc('https://publisher.org/not-canonical', null, htmlDoc)
assert.equal doc.uri(), canonicalLink.href
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment