Commit 77038342 authored by Ed Summers's avatar Ed Summers

improved document metadata extraction which now replaces previous getHref jschannel rpc call

parent eb9f9a7d
......@@ -160,8 +160,11 @@ class Annotator.Host extends Annotator
@drag.last = null
)
.bind('getDocumentMetadata', =>
return @plugins.Document.getDocumentMetadata()
.bind('getDocumentInfo', =>
return {
uri: @plugins.Document.uri()
metadata: @plugins.Document.metadata
}
)
scanDocument: (reason = "something happened") =>
......
class Annotator.Plugin.Document extends Annotator.Plugin
$ = Annotator.$
events:
'beforeAnnotationCreated': 'beforeAnnotationCreated'
pluginInit: ->
@metadata = null
this.getDocumentMetadata()
# returns the primary URI for the document being annotated
uri: =>
uri = decodeURIComponent document.location.href
for link in @metadata
if link.rel == "canonical"
uri = link.href
return uri
# returns all uris for the document being annotated
uris: =>
uniqueUrls = {}
for link in @metadata.link
uniqueUrls[link.href] = true if link.href
return (href for href of uniqueUrls)
beforeAnnotationCreated: (annotation) =>
if not @metadata
@metadata = this.getDocumentMetadata()
annotation.document = @metadata
getDocumentMetadata: =>
$ = jQuery
@metadata =
title: $("head title").text()
@metadata = {}
# first look for some common metadata types
# TODO: look for microdata/rdfa?
this._getScholar()
this._getDublinCore()
this._getOpenGraph()
# extract out/normalize some things
this._getTitle()
this._getLinks()
return @metadata
_getScholar: =>
@metadata.scholar = {}
for meta in $("meta")
name = $(meta).prop("name")
content = $(meta).prop("content")
if name.match(/^citation_/)
if @metadata.scholar[name]
@metadata.scholar[name].push(content)
else
@metadata.scholar[name] = [content]
_getDublinCore: =>
@metadata.dc = {}
for meta in $("meta")
name = $(meta).prop("name")
content = $(meta).prop("content")
nameParts = name.split(".")
if nameParts.length == 2 and nameParts[0] == "dc"
n = nameParts[1]
if @metadata.dc[n]
@metadata.dc[n].push(content)
else
@metadata.dc[n] = [content]
_getOpenGraph: =>
@metadata.og = {}
for meta in $("meta")
property = $(meta).attr("property")
content = $(meta).prop("content")
if property
match = property.match(/^og:(.+)$/)
if match
n = match[1]
if @metadata.og[n]
@metadata.og[n].push(content)
else
@metadata.og[n] = [content]
_getTitle: =>
if @metadata.scholar.citation_title
@metadata.title = @metadata.scholar.citation_title[0]
else if @metadata.dc.title
@metadata.title = @metadata.dc.title
else
@metadata.title = $("head title").text()
_getLinks: =>
# we know our current location is a link for the document
@metadata.link = [href: document.location.href]
# look for some relevant link relations
for link in $("link")
l = $(link)
href = this._absoluteUrl(l.prop('href')) # get absolute url
rel = l.prop('rel')
type = l.prop('type')
if rel in ["alternate", "canonical"]
@metadata.link.push(href: href, rel: rel, type: type)
# look for links in scholar metadata
for name, values of @metadata.scholar
if name == "citation_pdf_url"
for url in values
@metadata.link.push
href: this._absoluteUrl(url)
type: "application/pdf"
# kind of a hack to express DOI identifiers as links but it's a
# convenient place to look them up later, and somewhat sane since
# they don't have a type
if name == "citation_doi"
for doi in values
if doi[0..3] != "doi:"
doi = "doi:" + doi
@metadata.link.push(href: doi)
# look for links in dublincore data
for name, values of @metadata.dc
if name == "identifier"
for id in values
if id[0..3] == "doi:"
@metadata.link.push(href: id)
# hack to get a absolute url from a possibly relative one
_absoluteUrl: (url) ->
img = $("<img src='#{ url }'>")
url = img.prop('src')
img.prop('src', null)
return url
......@@ -343,14 +343,18 @@ class Hypothesis extends Annotator
# Get the location of the annotated document
@provider.call
method: 'getHref'
success: (href) =>
method: 'getDocumentInfo'
success: (info) =>
href = info.uri
@plugins.Document.metadata = info.metadata
options = angular.extend {}, (@options.Store or {}),
annotationData:
uri: href
loadFromSearch:
limit: 1000
uri: href
<<<<<<< HEAD
this.addStore(options)
addStore: (options) ->
......@@ -361,15 +365,9 @@ class Hypothesis extends Annotator
return unless href?
console.log "Loaded annotions for '" + href + "'."
for href in this.getSynonymURLs href
console.log "Also loading annotations for: " + href
this.plugins.Store.loadAnnotationsFromSearch uri: href
# get metadata for the annotated document
@provider.call
method: 'getDocumentMetadata'
success: (metadata) =>
this.plugins.Document.metadata = metadata
for uri in @plugins.Document.uris()
console.log "Also loading annotations for: " + uri
this.plugins.Store.loadAnnotationsFromSearch uri: uri
class DraftProvider
drafts: []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment