Commit 99b11b74 authored by Robert Knight's avatar Robert Knight

Avoid parsing ignored `<link>` types in `DocumentMeta#_getLinks`

Clean up `DocumentMeta#_getLinks` by removing several unnecessary variables and
narrowing the scope of several others.

In the process, avoid trying to normalize the URIs for `<link>` elements
which are going to be ignored anyway because of their `rel` values.

As a side effect this should fix https://github.com/hypothesis/lms/issues/375
because the `<link>` elements shown in https://github.com/hypothesis/lms/issues/375#issuecomment-444637947
will never get parsed. This doesn't close that issue because invalid
URIs could still occur in the remaining `<link>` tags which are parsed.
parent 5d68724e
...@@ -160,34 +160,34 @@ class DocumentMeta extends Plugin { ...@@ -160,34 +160,34 @@ class DocumentMeta extends Plugin {
} }
_getLinks() { _getLinks() {
// we know our current location is a link for the document // We know our current location is a link for the document.
let href;
let type;
let values;
this.metadata.link = [{href: this._getDocumentHref()}]; this.metadata.link = [{href: this._getDocumentHref()}];
// look for some relevant link relations // Extract links from certain `<link>` tags.
for (let link of Array.from(this.document.querySelectorAll('link'))) { const linkElements = Array.from(this.document.querySelectorAll('link'));
href = this._absoluteUrl(link.href); // get absolute url for (let link of linkElements) {
const { rel } = link; if (!['alternate', 'canonical', 'bookmark', 'shortlink'].includes(link.rel)) {
({ type } = link); continue;
const lang = link.hreflang; }
if (!['alternate', 'canonical', 'bookmark', 'shortlink'].includes(rel)) { continue; } if (link.rel === 'alternate') {
// Ignore RSS feed links.
if (rel === 'alternate') { if (link.type && link.type.match(/^application\/(rss|atom)\+xml/)) {
// Ignore feeds resources continue;
if (type && type.match(/^application\/(rss|atom)\+xml/)) { continue; } }
// Ignore alternate languages // Ignore alternate languages.
if (lang) { continue; } if (link.hreflang) {
continue;
}
} }
this.metadata.link.push({href, rel, type}); const href = this._absoluteUrl(link.href);
this.metadata.link.push({href, rel: link.rel, type: link.type});
} }
// look for links in scholar metadata // look for links in scholar metadata
for (let name of Object.keys(this.metadata.highwire)) { for (let name of Object.keys(this.metadata.highwire)) {
values = this.metadata.highwire[name]; const values = this.metadata.highwire[name];
if (name === 'pdf_url') { if (name === 'pdf_url') {
for (let url of values) { for (let url of values) {
this.metadata.link.push({ this.metadata.link.push({
...@@ -212,7 +212,7 @@ class DocumentMeta extends Plugin { ...@@ -212,7 +212,7 @@ class DocumentMeta extends Plugin {
// look for links in dublincore data // look for links in dublincore data
for (let name of Object.keys(this.metadata.dc)) { for (let name of Object.keys(this.metadata.dc)) {
values = this.metadata.dc[name]; const values = this.metadata.dc[name];
if (name === 'identifier') { if (name === 'identifier') {
for (let id of values) { for (let id of values) {
if (id.slice(0, 4) === 'doi:') { if (id.slice(0, 4) === 'doi:') {
...@@ -247,7 +247,6 @@ class DocumentMeta extends Plugin { ...@@ -247,7 +247,6 @@ class DocumentMeta extends Plugin {
} }
} }
// Hack to get a absolute url from a possibly relative one
_absoluteUrl(url) { _absoluteUrl(url) {
return normalizeURI(url, this.baseURI); return normalizeURI(url, this.baseURI);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment