Commit 295a8886 authored by Eduardo Sanz García's avatar Eduardo Sanz García Committed by Robert Knight

Rearrange tests in `html-metadata.js`

I have created a few additional `describe` sub-sections and re-grouped
tests by the resulting metadata property. This allow for a more atomic
testing.
parent 907f0011
......@@ -29,231 +29,273 @@ describe('HTMLMetadata', () => {
});
describe('#getDocumentMetadata', () => {
let metadata = null;
beforeEach(() => {
// Add some metadata to the page
it('should ignore `<meta>` elements if "content" attribute are not present', () => {
tempDocumentHead.innerHTML = `
<link rel="alternate" href="foo.pdf" type="application/pdf"></link>
<link rel="alternate" href="foo.doc" type="application/msword"></link>
<link rel="bookmark" href="http://example.com/bookmark"></link>
<link rel="shortlink" href="http://example.com/bookmark/short"></link>
<link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>
<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
<meta name="citation_title" content="Foo">
<meta name="citation_pdf_url" content="foo.pdf">
<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
<meta name="dc:identifier" content="foobar-abcxyz">
<meta name="dc.relation.ispartof" content="isbn:123456789">
<meta name="DC.type" content="Article">
<meta property="og:url" content="http://example.com">
<meta name="twitter:site" content="@okfn">
<link rel="icon" href="http://example.com/images/icon.ico"></link>
<meta name="eprints.title" content="Computer Lib / Dream Machines">
<meta name="prism.title" content="Literary Machines">
<link rel="alternate" href="feed" type="application/rss+xml"></link>
<link rel="canonical" href="http://example.com/canonical"></link>
<meta name="citation_doi" content>
<meta name="dc.type">
`;
metadata = testDocument.getDocumentMetadata();
const metadata = testDocument.getDocumentMetadata();
assert.isEmpty(metadata.highwire);
assert.isEmpty(metadata.dc);
});
it('should return title', () => {
// Populate all supported title sources.
tempDocument.title = 'Test document title';
tempDocumentHead.innerHTML = `
<meta name="eprints.title" content="Eprints title">
<meta name="prism.title" content="PRISM title">
<meta name="dc.title" content="Dublin Core title">
<meta name="citation_title" content="Highwire title">
<meta property="og:title" content="Facebook title">
<meta name="twitter:title" content="Twitter title">
`;
// Title values, in order of source priority.
const sources = [
{
metaName: 'citation_title',
value: 'Highwire title',
},
{
metaName: 'eprints.title',
value: 'Eprints title',
},
{
metaName: 'prism.title',
value: 'PRISM title',
},
{
metaAttr: 'property',
metaName: 'og:title',
value: 'Facebook title',
},
{
metaName: 'twitter:title',
value: 'Twitter title',
},
{
metaName: 'dc.title',
value: 'Dublin Core title',
},
{
value: 'Test document title',
},
];
describe('metadata.title', () => {
it('should return title', () => {
// Populate all supported title sources.
tempDocument.title = 'Test document title';
tempDocumentHead.innerHTML = `
<meta name="eprints.title" content="Eprints title">
<meta name="prism.title" content="PRISM title">
<meta name="dc.title" content="Dublin Core title">
<meta name="citation_title" content="Highwire title">
<meta property="og:title" content="Facebook title">
<meta name="twitter:title" content="Twitter title">
`;
for (let source of sources) {
const metadata = testDocument.getDocumentMetadata();
assert.equal(metadata.title, source.value);
// Remove this title source. The next iteration should return the next
// title value in the priority order.
if (source.metaName) {
const attr = source.metaAttr ?? 'name';
tempDocumentHead
.querySelector(`meta[${attr}="${source.metaName}"]`)
.remove();
// Title values, in order of source priority.
const sources = [
{
metaName: 'citation_title',
value: 'Highwire title',
},
{
metaName: 'eprints.title',
value: 'Eprints title',
},
{
metaName: 'prism.title',
value: 'PRISM title',
},
{
metaAttr: 'property',
metaName: 'og:title',
value: 'Facebook title',
},
{
metaName: 'twitter:title',
value: 'Twitter title',
},
{
metaName: 'dc.title',
value: 'Dublin Core title',
},
{
value: 'Test document title',
},
];
for (let source of sources) {
const metadata = testDocument.getDocumentMetadata();
assert.equal(metadata.title, source.value);
// Remove this title source. The next iteration should return the next
// title value in the priority order.
if (source.metaName) {
const attr = source.metaAttr ?? 'name';
tempDocumentHead
.querySelector(`meta[${attr}="${source.metaName}"]`)
.remove();
}
}
}
});
it('should return links with absolute hrefs and types', () => {
assert.ok(metadata.link);
assert.equal(metadata.link.length, 10);
assert.equal(metadata.link[1].rel, 'alternate');
assert.match(metadata.link[1].href, /^.+foo\.pdf$/);
assert.equal(metadata.link[1].type, 'application/pdf');
assert.equal(metadata.link[2].rel, 'alternate');
assert.match(metadata.link[2].href, /^.+foo\.doc$/);
assert.equal(metadata.link[2].type, 'application/msword');
assert.equal(metadata.link[3].rel, 'bookmark');
assert.equal(metadata.link[3].href, 'http://example.com/bookmark');
assert.equal(metadata.link[4].rel, 'shortlink');
assert.equal(metadata.link[4].href, 'http://example.com/bookmark/short');
assert.equal(metadata.link[5].rel, 'canonical');
assert.equal(metadata.link[5].href, 'http://example.com/canonical');
assert.equal(metadata.link[6].href, 'doi:10.1175/JCLI-D-11-00015.1');
assert.match(metadata.link[7].href, /.+foo\.pdf$/);
assert.equal(metadata.link[7].type, 'application/pdf');
assert.equal(metadata.link[8].href, 'doi:10.1175/JCLI-D-11-00015.1');
// Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
// Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
// and <identifier> is the percent-encoded value of the last dc.identifier meta element.
assert.equal(
metadata.link[9].href,
'urn:x-dc:isbn%3A123456789/foobar-abcxyz'
);
});
});
it('should ignore atom and RSS feeds and alternate languages', () => {
assert.equal(metadata.link.length, 10);
});
describe('metadata.highwire', () => {
it('should return Highwire metadata', () => {
tempDocumentHead.innerHTML = `
<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
<meta name="citation_title" content="Foo">
<meta name="citation_pdf_url" content="foo.pdf">
`;
const metadata = testDocument.getDocumentMetadata();
it('should return Highwire metadata', () => {
assert.ok(metadata.highwire);
assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf']);
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.highwire.title, ['Foo']);
assert.ok(metadata.highwire);
assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf']);
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.highwire.title, ['Foo']);
});
});
context('in "meta" elements', () => {
it('should ignore if "content" attribute is not present', () => {
describe('metadata.dc', () => {
it('should return Dublin Core metadata', () => {
tempDocumentHead.innerHTML = `
<meta name="citation_doi" content>
<meta name="DC.type">
<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
<meta name="dc.identifier" content="foobar-abcxyz">
<meta name="dc.relation.ispartof" content="isbn:123456789">
<meta name="dc.type" content="Article">
`;
const metadata = testDocument.getDocumentMetadata();
assert.isEmpty(metadata.highwire);
assert.isEmpty(metadata.dc);
assert.ok(metadata.dc);
assert.deepEqual(metadata.dc.identifier, [
'doi:10.1175/JCLI-D-11-00015.1',
'foobar-abcxyz',
]);
assert.deepEqual(metadata.dc['relation.ispartof'], ['isbn:123456789']);
assert.deepEqual(metadata.dc.type, ['Article']);
});
it('should lower-case the value of the specified attribute', () => {
it('should work with any delimiter character', () => {
tempDocumentHead.innerHTML = `
<meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
<meta property="OG:URL" content="https://fb.com">
<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
<meta name="dc:identifier" content="doi:10.1175/JCLI-D-11-00015.2">
<meta name="dc_identifier" content="doi:10.1175/JCLI-D-11-00015.3">
`;
const metadata = testDocument.getDocumentMetadata();
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
assert.ok(metadata.dc);
assert.deepEqual(metadata.dc.identifier, [
'doi:10.1175/JCLI-D-11-00015.1',
'doi:10.1175/JCLI-D-11-00015.2',
'doi:10.1175/JCLI-D-11-00015.3',
]);
});
});
it('should return Dublin Core metadata', () => {
assert.ok(metadata.dc);
assert.deepEqual(metadata.dc.identifier, [
'doi:10.1175/JCLI-D-11-00015.1',
'foobar-abcxyz',
]);
assert.deepEqual(metadata.dc['relation.ispartof'], ['isbn:123456789']);
assert.deepEqual(metadata.dc.type, ['Article']);
});
describe('metadata.facebook', () => {
it('should return Facebook metadata', () => {
tempDocumentHead.innerHTML =
'<meta property="og:url" content="http://example.com">';
const metadata = testDocument.getDocumentMetadata();
it('should return Facebook metadata', () => {
assert.ok(metadata.facebook);
assert.deepEqual(metadata.facebook.url, ['http://example.com']);
assert.ok(metadata.facebook);
assert.deepEqual(metadata.facebook.url, ['http://example.com']);
});
});
it('should return eprints metadata', () => {
assert.ok(metadata.eprints);
assert.deepEqual(metadata.eprints.title, [
'Computer Lib / Dream Machines',
]);
});
describe('metadata.twitter', () => {
it('should return Twitter card metadata', () => {
tempDocumentHead.innerHTML =
'<meta name="twitter:site" content="@okfn">';
const metadata = testDocument.getDocumentMetadata();
it('should return PRISM metadata', () => {
assert.ok(metadata.prism);
assert.deepEqual(metadata.prism.title, ['Literary Machines']);
assert.ok(metadata.twitter);
assert.deepEqual(metadata.twitter.site, ['@okfn']);
});
});
it('should return Twitter card metadata', () => {
assert.ok(metadata.twitter);
assert.deepEqual(metadata.twitter.site, ['@okfn']);
it('should lower-case the value of the specified attribute', () => {
tempDocumentHead.innerHTML = `
<meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
<meta property="OG:URL" content="https://fb.com">
`;
const metadata = testDocument.getDocumentMetadata();
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
});
it('should return favicon URL', () => {
assert.equal(metadata.favicon, 'http://example.com/images/icon.ico');
});
describe('metadata.link', () => {
let metadata = null;
it('should set `documentFingerprint` to the dc resource identifiers URN href', () => {
assert.equal(metadata.documentFingerprint, metadata.link[9].href);
});
beforeEach(() => {
tempDocumentHead.innerHTML = `
<link rel="alternate" href="foo.pdf" type="application/pdf"></link>
<link rel="alternate" href="foo.doc" type="application/msword"></link>
<link rel="bookmark" href="http://example.com/bookmark"></link>
<link rel="shortlink" href="http://example.com/bookmark/short"></link>
<link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>
<link rel="icon" href="http://example.com/images/icon.ico"></link>
<link rel="canonical" href="http://example.com/canonical"></link>
<meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
<meta name="citation_pdf_url" content="foo.pdf">
<meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
<meta name="dc.identifier" content="foobar-abcxyz">
<meta name="dc.relation.ispartof" content="isbn:123456789">
`;
it('should ignore `<link>` tags with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<link rel="alternate" href="https://example.com/foo">
<link rel="alternate" href="http://a:b:c">
`;
metadata = testDocument.getDocumentMetadata();
});
const metadata = testDocument.getDocumentMetadata();
it('should return links with absolute hrefs and types', () => {
assert.ok(metadata.link);
assert.equal(metadata.link.length, 10);
assert.equal(metadata.link[1].rel, 'alternate');
assert.match(metadata.link[1].href, /^.+foo\.pdf$/);
assert.equal(metadata.link[1].type, 'application/pdf');
assert.equal(metadata.link[2].rel, 'alternate');
assert.match(metadata.link[2].href, /^.+foo\.doc$/);
assert.equal(metadata.link[2].type, 'application/msword');
assert.equal(metadata.link[3].rel, 'bookmark');
assert.equal(metadata.link[3].href, 'http://example.com/bookmark');
assert.equal(metadata.link[4].rel, 'shortlink');
assert.equal(
metadata.link[4].href,
'http://example.com/bookmark/short'
);
assert.equal(metadata.link[5].rel, 'canonical');
assert.equal(metadata.link[5].href, 'http://example.com/canonical');
assert.equal(metadata.link[6].href, 'doi:10.1175/JCLI-D-11-00015.1');
assert.match(metadata.link[7].href, /.+foo\.pdf$/);
assert.equal(metadata.link[7].type, 'application/pdf');
assert.equal(metadata.link[8].href, 'doi:10.1175/JCLI-D-11-00015.1');
// Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
// Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
// and <identifier> is the percent-encoded value of the last dc.identifier meta element.
assert.equal(
metadata.link[9].href,
'urn:x-dc:isbn%3A123456789/foobar-abcxyz'
);
});
// There should be one link with the document location and one for the
// valid `<link>` tag.
assert.deepEqual(metadata.link.length, 2);
assert.deepEqual(metadata.link[1], {
rel: 'alternate',
href: 'https://example.com/foo',
type: '',
it('should return favicon URL', () => {
assert.equal(metadata.favicon, 'http://example.com/images/icon.ico');
});
});
it('should ignore favicons with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<link rel="favicon" href="http://a:b:c">
`;
const metadata = testDocument.getDocumentMetadata();
assert.isUndefined(metadata.favicon);
});
it('should set `documentFingerprint` to the dc resource identifiers URN href', () => {
assert.equal(metadata.documentFingerprint, metadata.link[9].href);
});
it('should ignore `<meta>` PDF links with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<meta name="citation_pdf_url" content="http://a:b:c">
`;
const metadata = testDocument.getDocumentMetadata();
it('should ignore atom and RSS feeds and alternate languages', () => {
tempDocumentHead.innerHTML = '';
const links0 = testDocument.getDocumentMetadata().link;
tempDocumentHead.innerHTML = `
<link rel="alternate" href="feed" type="application/rss+xml"></link>
<link rel="alternate" href="feed" type="application/atom+xml"></link>
`;
const links1 = testDocument.getDocumentMetadata().link;
assert.equal(links1.length - links0.length, 0);
});
it('should ignore `pdf_url` links with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<meta name="citation_pdf_url" content="http://a:b:c">
`;
const metadata = testDocument.getDocumentMetadata();
// There should only be one link for the document's location.
// The invalid PDF link should be ignored.
assert.equal(metadata.link.length, 1);
});
it('should ignore `<link>` tags with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<link rel="alternate" href="https://example.com/foo">
<link rel="alternate" href="http://a:b:c">
`;
const metadata = testDocument.getDocumentMetadata();
// There should be one link with the document location and one for the
// valid `<link>` tag.
assert.deepEqual(metadata.link.length, 2);
assert.deepEqual(metadata.link[1], {
rel: 'alternate',
href: 'https://example.com/foo',
type: '',
});
});
// There should only be one link for the document's location.
// The invalid PDF link should be ignored.
assert.equal(metadata.link.length, 1);
it('should ignore favicons with invalid URIs', () => {
tempDocumentHead.innerHTML = `
<link rel="favicon" href="http://a:b:c">
`;
const metadata = testDocument.getDocumentMetadata();
assert.isUndefined(metadata.favicon);
});
});
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment