Rearrange tests in `html-metadata.js`

I have created a few additional `describe` sub-sections and re-grouped tests by the resulting metadata property. This allow for a more atomic testing.

Rearrange tests in `html-metadata.js`
I have created a few additional `describe` sub-sections and re-grouped tests by the resulting metadata property. This allow for a more atomic testing.
295a8886 · Eduardo Sanz García · Robert Knight · 907f0011 · 295a8886
Commit 295a8886 authored Feb 11, 2022 by Eduardo Sanz García Committed by Robert Knight Mar 01, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 229 additions and 187 deletions

html-metadata-test.js src/annotator/integrations/test/html-metadata-test.js +229 -187

No files found.
--- a/src/annotator/integrations/test/html-metadata-test.js
+++ b/src/annotator/integrations/test/html-metadata-test.js
@@ -29,231 +29,273 @@ describe('HTMLMetadata', () => {
  });

  describe('#getDocumentMetadata', () => {
-    let metadata = null;
-
-    beforeEach(() => {
-      // Add some metadata to the page
+    it('should ignore `<meta>` elements if "content" attribute are not present', () => {
      tempDocumentHead.innerHTML = `
-        <link rel="alternate" href="foo.pdf" type="application/pdf"></link>
-        <link rel="alternate" href="foo.doc" type="application/msword"></link>
-        <link rel="bookmark" href="http://example.com/bookmark"></link>
-        <link rel="shortlink" href="http://example.com/bookmark/short"></link>
-        <link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>
-        <meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
-        <meta name="citation_title" content="Foo">
-        <meta name="citation_pdf_url" content="foo.pdf">
-        <meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
-        <meta name="dc:identifier" content="foobar-abcxyz">
-        <meta name="dc.relation.ispartof" content="isbn:123456789">
-        <meta name="DC.type" content="Article">
-        <meta property="og:url" content="http://example.com">
-        <meta name="twitter:site" content="@okfn">
-        <link rel="icon" href="http://example.com/images/icon.ico"></link>
-        <meta name="eprints.title" content="Computer Lib / Dream Machines">
-        <meta name="prism.title" content="Literary Machines">
-        <link rel="alternate" href="feed" type="application/rss+xml"></link>
-        <link rel="canonical" href="http://example.com/canonical"></link>
+        <meta name="citation_doi" content>
+        <meta name="dc.type">
      `;
-
-      metadata = testDocument.getDocumentMetadata();
+      const metadata = testDocument.getDocumentMetadata();
+      assert.isEmpty(metadata.highwire);
+      assert.isEmpty(metadata.dc);
    });

-    it('should return title', () => {
-      // Populate all supported title sources.
-      tempDocument.title = 'Test document title';
-      tempDocumentHead.innerHTML = `
-  <meta name="eprints.title" content="Eprints title">
-  <meta name="prism.title" content="PRISM title">
-  <meta name="dc.title" content="Dublin Core title">
-  <meta name="citation_title" content="Highwire title">
-  <meta property="og:title" content="Facebook title">
-  <meta name="twitter:title" content="Twitter title">
-  `;
-
-      // Title values, in order of source priority.
-      const sources = [
-        {
-          metaName: 'citation_title',
-          value: 'Highwire title',
-        },
-        {
-          metaName: 'eprints.title',
-          value: 'Eprints title',
-        },
-        {
-          metaName: 'prism.title',
-          value: 'PRISM title',
-        },
-        {
-          metaAttr: 'property',
-          metaName: 'og:title',
-          value: 'Facebook title',
-        },
-        {
-          metaName: 'twitter:title',
-          value: 'Twitter title',
-        },
-        {
-          metaName: 'dc.title',
-          value: 'Dublin Core title',
-        },
-        {
-          value: 'Test document title',
-        },
-      ];
+    describe('metadata.title', () => {
+      it('should return title', () => {
+        // Populate all supported title sources.
+        tempDocument.title = 'Test document title';
+        tempDocumentHead.innerHTML = `
+          <meta name="eprints.title" content="Eprints title">
+          <meta name="prism.title" content="PRISM title">
+          <meta name="dc.title" content="Dublin Core title">
+          <meta name="citation_title" content="Highwire title">
+          <meta property="og:title" content="Facebook title">
+          <meta name="twitter:title" content="Twitter title">
+        `;

-      for (let source of sources) {
-        const metadata = testDocument.getDocumentMetadata();
-        assert.equal(metadata.title, source.value);
-
-        // Remove this title source. The next iteration should return the next
-        // title value in the priority order.
-        if (source.metaName) {
-          const attr = source.metaAttr ?? 'name';
-          tempDocumentHead
-            .querySelector(`meta[${attr}="${source.metaName}"]`)
-            .remove();
+        // Title values, in order of source priority.
+        const sources = [
+          {
+            metaName: 'citation_title',
+            value: 'Highwire title',
+          },
+          {
+            metaName: 'eprints.title',
+            value: 'Eprints title',
+          },
+          {
+            metaName: 'prism.title',
+            value: 'PRISM title',
+          },
+          {
+            metaAttr: 'property',
+            metaName: 'og:title',
+            value: 'Facebook title',
+          },
+          {
+            metaName: 'twitter:title',
+            value: 'Twitter title',
+          },
+          {
+            metaName: 'dc.title',
+            value: 'Dublin Core title',
+          },
+          {
+            value: 'Test document title',
+          },
+        ];
+
+        for (let source of sources) {
+          const metadata = testDocument.getDocumentMetadata();
+          assert.equal(metadata.title, source.value);
+
+          // Remove this title source. The next iteration should return the next
+          // title value in the priority order.
+          if (source.metaName) {
+            const attr = source.metaAttr ?? 'name';
+            tempDocumentHead
+              .querySelector(`meta[${attr}="${source.metaName}"]`)
+              .remove();
+          }
        }
-      }
-    });
-
-    it('should return links with absolute hrefs and types', () => {
-      assert.ok(metadata.link);
-      assert.equal(metadata.link.length, 10);
-      assert.equal(metadata.link[1].rel, 'alternate');
-      assert.match(metadata.link[1].href, /^.+foo\.pdf$/);
-      assert.equal(metadata.link[1].type, 'application/pdf');
-      assert.equal(metadata.link[2].rel, 'alternate');
-      assert.match(metadata.link[2].href, /^.+foo\.doc$/);
-      assert.equal(metadata.link[2].type, 'application/msword');
-      assert.equal(metadata.link[3].rel, 'bookmark');
-      assert.equal(metadata.link[3].href, 'http://example.com/bookmark');
-      assert.equal(metadata.link[4].rel, 'shortlink');
-      assert.equal(metadata.link[4].href, 'http://example.com/bookmark/short');
-      assert.equal(metadata.link[5].rel, 'canonical');
-      assert.equal(metadata.link[5].href, 'http://example.com/canonical');
-      assert.equal(metadata.link[6].href, 'doi:10.1175/JCLI-D-11-00015.1');
-      assert.match(metadata.link[7].href, /.+foo\.pdf$/);
-      assert.equal(metadata.link[7].type, 'application/pdf');
-      assert.equal(metadata.link[8].href, 'doi:10.1175/JCLI-D-11-00015.1');
-
-      // Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
-      // Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
-      // and <identifier> is the percent-encoded value of the last dc.identifier meta element.
-      assert.equal(
-        metadata.link[9].href,
-        'urn:x-dc:isbn%3A123456789/foobar-abcxyz'
-      );
+      });
    });

-    it('should ignore atom and RSS feeds and alternate languages', () => {
-      assert.equal(metadata.link.length, 10);
-    });
+    describe('metadata.highwire', () => {
+      it('should return Highwire metadata', () => {
+        tempDocumentHead.innerHTML = `
+          <meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
+          <meta name="citation_title" content="Foo">
+          <meta name="citation_pdf_url" content="foo.pdf">
+        `;
+        const metadata = testDocument.getDocumentMetadata();

-    it('should return Highwire metadata', () => {
-      assert.ok(metadata.highwire);
-      assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf']);
-      assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
-      assert.deepEqual(metadata.highwire.title, ['Foo']);
+        assert.ok(metadata.highwire);
+        assert.deepEqual(metadata.highwire.pdf_url, ['foo.pdf']);
+        assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
+        assert.deepEqual(metadata.highwire.title, ['Foo']);
+      });
    });

-    context('in "meta" elements', () => {
-      it('should ignore if "content" attribute is not present', () => {
+    describe('metadata.dc', () => {
+      it('should return Dublin Core metadata', () => {
        tempDocumentHead.innerHTML = `
-          <meta name="citation_doi" content>
-          <meta name="DC.type">
+          <meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
+          <meta name="dc.identifier" content="foobar-abcxyz">
+          <meta name="dc.relation.ispartof" content="isbn:123456789">
+          <meta name="dc.type" content="Article">
        `;
        const metadata = testDocument.getDocumentMetadata();
-        assert.isEmpty(metadata.highwire);
-        assert.isEmpty(metadata.dc);
+
+        assert.ok(metadata.dc);
+        assert.deepEqual(metadata.dc.identifier, [
+          'doi:10.1175/JCLI-D-11-00015.1',
+          'foobar-abcxyz',
+        ]);
+        assert.deepEqual(metadata.dc['relation.ispartof'], ['isbn:123456789']);
+        assert.deepEqual(metadata.dc.type, ['Article']);
      });

-      it('should lower-case the value of the specified attribute', () => {
+      it('should work with any delimiter character', () => {
        tempDocumentHead.innerHTML = `
-          <meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
-          <meta property="OG:URL" content="https://fb.com">
+          <meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
+          <meta name="dc:identifier" content="doi:10.1175/JCLI-D-11-00015.2">
+          <meta name="dc_identifier" content="doi:10.1175/JCLI-D-11-00015.3">
        `;
        const metadata = testDocument.getDocumentMetadata();
-        assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
-        assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
+
+        assert.ok(metadata.dc);
+        assert.deepEqual(metadata.dc.identifier, [
+          'doi:10.1175/JCLI-D-11-00015.1',
+          'doi:10.1175/JCLI-D-11-00015.2',
+          'doi:10.1175/JCLI-D-11-00015.3',
+        ]);
      });
    });

-    it('should return Dublin Core metadata', () => {
-      assert.ok(metadata.dc);
-      assert.deepEqual(metadata.dc.identifier, [
-        'doi:10.1175/JCLI-D-11-00015.1',
-        'foobar-abcxyz',
-      ]);
-      assert.deepEqual(metadata.dc['relation.ispartof'], ['isbn:123456789']);
-      assert.deepEqual(metadata.dc.type, ['Article']);
-    });
+    describe('metadata.facebook', () => {
+      it('should return Facebook metadata', () => {
+        tempDocumentHead.innerHTML =
+          '<meta property="og:url" content="http://example.com">';
+        const metadata = testDocument.getDocumentMetadata();

-    it('should return Facebook metadata', () => {
-      assert.ok(metadata.facebook);
-      assert.deepEqual(metadata.facebook.url, ['http://example.com']);
+        assert.ok(metadata.facebook);
+        assert.deepEqual(metadata.facebook.url, ['http://example.com']);
+      });
    });

-    it('should return eprints metadata', () => {
-      assert.ok(metadata.eprints);
-      assert.deepEqual(metadata.eprints.title, [
-        'Computer Lib / Dream Machines',
-      ]);
-    });
+    describe('metadata.twitter', () => {
+      it('should return Twitter card metadata', () => {
+        tempDocumentHead.innerHTML =
+          '<meta name="twitter:site" content="@okfn">';
+        const metadata = testDocument.getDocumentMetadata();

-    it('should return PRISM metadata', () => {
-      assert.ok(metadata.prism);
-      assert.deepEqual(metadata.prism.title, ['Literary Machines']);
+        assert.ok(metadata.twitter);
+        assert.deepEqual(metadata.twitter.site, ['@okfn']);
+      });
    });

-    it('should return Twitter card metadata', () => {
-      assert.ok(metadata.twitter);
-      assert.deepEqual(metadata.twitter.site, ['@okfn']);
+    it('should lower-case the value of the specified attribute', () => {
+      tempDocumentHead.innerHTML = `
+        <meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
+        <meta property="OG:URL" content="https://fb.com">
+      `;
+      const metadata = testDocument.getDocumentMetadata();
+      assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
+      assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
    });

-    it('should return favicon URL', () => {
-      assert.equal(metadata.favicon, 'http://example.com/images/icon.ico');
-    });
+    describe('metadata.link', () => {
+      let metadata = null;

-    it('should set `documentFingerprint` to the dc resource identifiers URN href', () => {
-      assert.equal(metadata.documentFingerprint, metadata.link[9].href);
-    });
+      beforeEach(() => {
+        tempDocumentHead.innerHTML = `
+          <link rel="alternate" href="foo.pdf" type="application/pdf"></link>
+          <link rel="alternate" href="foo.doc" type="application/msword"></link> 
+          <link rel="bookmark" href="http://example.com/bookmark"></link>
+          <link rel="shortlink" href="http://example.com/bookmark/short"></link>
+          <link rel="alternate" href="es/foo.html" hreflang="es" type="text/html"></link>
+          <link rel="icon" href="http://example.com/images/icon.ico"></link>
+          <link rel="canonical" href="http://example.com/canonical"></link>
+
+          <meta name="citation_doi" content="10.1175/JCLI-D-11-00015.1">
+          <meta name="citation_pdf_url" content="foo.pdf">
+          <meta name="dc.identifier" content="doi:10.1175/JCLI-D-11-00015.1">
+          <meta name="dc.identifier" content="foobar-abcxyz">
+          <meta name="dc.relation.ispartof" content="isbn:123456789">
+        `;

-    it('should ignore `<link>` tags with invalid URIs', () => {
-      tempDocumentHead.innerHTML = `
-        <link rel="alternate" href="https://example.com/foo">
-        <link rel="alternate" href="http://a:b:c">
-      `;
+        metadata = testDocument.getDocumentMetadata();
+      });

-      const metadata = testDocument.getDocumentMetadata();
+      it('should return links with absolute hrefs and types', () => {
+        assert.ok(metadata.link);
+        assert.equal(metadata.link.length, 10);
+        assert.equal(metadata.link[1].rel, 'alternate');
+        assert.match(metadata.link[1].href, /^.+foo\.pdf$/);
+        assert.equal(metadata.link[1].type, 'application/pdf');
+        assert.equal(metadata.link[2].rel, 'alternate');
+        assert.match(metadata.link[2].href, /^.+foo\.doc$/);
+        assert.equal(metadata.link[2].type, 'application/msword');
+        assert.equal(metadata.link[3].rel, 'bookmark');
+        assert.equal(metadata.link[3].href, 'http://example.com/bookmark');
+        assert.equal(metadata.link[4].rel, 'shortlink');
+        assert.equal(
+          metadata.link[4].href,
+          'http://example.com/bookmark/short'
+        );
+        assert.equal(metadata.link[5].rel, 'canonical');
+        assert.equal(metadata.link[5].href, 'http://example.com/canonical');
+        assert.equal(metadata.link[6].href, 'doi:10.1175/JCLI-D-11-00015.1');
+        assert.match(metadata.link[7].href, /.+foo\.pdf$/);
+        assert.equal(metadata.link[7].type, 'application/pdf');
+        assert.equal(metadata.link[8].href, 'doi:10.1175/JCLI-D-11-00015.1');
+
+        // Link derived from dc resource identifiers in the form of urn:x-dc:<container>/<identifier>
+        // Where <container> is the percent-encoded value of the last dc.relation.ispartof meta element
+        // and <identifier> is the percent-encoded value of the last dc.identifier meta element.
+        assert.equal(
+          metadata.link[9].href,
+          'urn:x-dc:isbn%3A123456789/foobar-abcxyz'
+        );
+      });

-      // There should be one link with the document location and one for the
-      // valid `<link>` tag.
-      assert.deepEqual(metadata.link.length, 2);
-      assert.deepEqual(metadata.link[1], {
-        rel: 'alternate',
-        href: 'https://example.com/foo',
-        type: '',
+      it('should return favicon URL', () => {
+        assert.equal(metadata.favicon, 'http://example.com/images/icon.ico');
      });
-    });

-    it('should ignore favicons with invalid URIs', () => {
-      tempDocumentHead.innerHTML = `
-        <link rel="favicon" href="http://a:b:c">
-      `;
-      const metadata = testDocument.getDocumentMetadata();
-      assert.isUndefined(metadata.favicon);
-    });
+      it('should set `documentFingerprint` to the dc resource identifiers URN href', () => {
+        assert.equal(metadata.documentFingerprint, metadata.link[9].href);
+      });

-    it('should ignore `<meta>` PDF links with invalid URIs', () => {
-      tempDocumentHead.innerHTML = `
-        <meta name="citation_pdf_url" content="http://a:b:c">
-      `;
-      const metadata = testDocument.getDocumentMetadata();
+      it('should ignore atom and RSS feeds and alternate languages', () => {
+        tempDocumentHead.innerHTML = '';
+        const links0 = testDocument.getDocumentMetadata().link;
+
+        tempDocumentHead.innerHTML = `
+          <link rel="alternate" href="feed" type="application/rss+xml"></link>
+          <link rel="alternate" href="feed" type="application/atom+xml"></link>
+        `;
+        const links1 = testDocument.getDocumentMetadata().link;
+
+        assert.equal(links1.length - links0.length, 0);
+      });
+
+      it('should ignore `pdf_url` links with invalid URIs', () => {
+        tempDocumentHead.innerHTML = `
+          <meta name="citation_pdf_url" content="http://a:b:c">
+        `;
+        const metadata = testDocument.getDocumentMetadata();
+
+        // There should only be one link for the document's location.
+        // The invalid PDF link should be ignored.
+        assert.equal(metadata.link.length, 1);
+      });
+
+      it('should ignore `<link>` tags with invalid URIs', () => {
+        tempDocumentHead.innerHTML = `
+          <link rel="alternate" href="https://example.com/foo">
+          <link rel="alternate" href="http://a:b:c">
+        `;
+        const metadata = testDocument.getDocumentMetadata();
+
+        // There should be one link with the document location and one for the
+        // valid `<link>` tag.
+        assert.deepEqual(metadata.link.length, 2);
+        assert.deepEqual(metadata.link[1], {
+          rel: 'alternate',
+          href: 'https://example.com/foo',
+          type: '',
+        });
+      });

-      // There should only be one link for the document's location.
-      // The invalid PDF link should be ignored.
-      assert.equal(metadata.link.length, 1);
+      it('should ignore favicons with invalid URIs', () => {
+        tempDocumentHead.innerHTML = `
+          <link rel="favicon" href="http://a:b:c">
+        `;
+        const metadata = testDocument.getDocumentMetadata();
+
+        assert.isUndefined(metadata.favicon);
+      });
    });
  });