Commit 9a277f8a authored by Eduardo Sanz García's avatar Eduardo Sanz García Committed by Eduardo

Convert `<meta name="...[to lowercase]">`

Previously the following attributes in the meta tag:

```
<meta name="DC_IDENTIFIER" .../>
<meta name="EPRINTS.TITLE_" .../>
<meta property="OG:URL" .../>
<meta name="CITATION_DOI" .../>
<meta name="PRISM_TITLE" .../>
<meta name="TWITTER:SITE" .../>
```

produced a metadata dictionary like this:

```
{
  dc: {IDENTIFIER: [...] },
  eprints: {TITLE: [...]},
  facebook: {URL: [...]},
  highwire: {DOI: [...]},
  twitter: {SITE: [...]}
}
```

Now, with the modifications introduced in this PR, it produces a
metadata dictionary where all the keys are lowercase:
```
{
  dc: {identifier: [...] },
  eprints: {title: [...]},
  facebook: {url: [...]},
  highwire: {doi: [...]},
  twitter: {site: [...]}
}
```

I have confirmed that `h` expects the lowercase version of the keys
`identifier` and `doi`:

https://github.com/hypothesis/h/blob/73dacc722367c44903ff3c6f610712ba8f43934d/h/util/document_claims.py#L222

https://github.com/hypothesis/h/blob/73dacc722367c44903ff3c6f610712ba8f43934d/h/util/document_claims.py#L247

Hence, if they are not provided as lowercase version, they will be
ignored and the document equivalence will not work.
parent f0946ae6
......@@ -125,11 +125,11 @@ export class HTMLMetadata {
if (name && content) {
const match = name.match(RegExp(`^${prefix}(.+)$`, 'i'));
if (match) {
const n = match[1];
if (tags[n]) {
tags[n].push(content);
const key = match[1].toLowerCase();
if (tags[key]) {
tags[key].push(content);
} else {
tags[n] = [content];
tags[key] = [content];
}
}
}
......
......@@ -157,7 +157,8 @@ describe('HTMLMetadata', () => {
assert.deepEqual(metadata.highwire.title, ['Foo']);
});
it('should ignore meta tags without content', () => {
context('in "meta" elements', () => {
it('should ignore if "content" attribute is not present', () => {
tempDocumentHead.innerHTML = `
<meta name="citation_doi" content>
<meta name="DC.type">
......@@ -167,6 +168,17 @@ describe('HTMLMetadata', () => {
assert.isEmpty(metadata.dc);
});
it('should lower-case the value of the specified attribute', () => {
tempDocumentHead.innerHTML = `
<meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
<meta property="OG:URL" content="https://fb.com">
`;
const metadata = testDocument.getDocumentMetadata();
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
});
});
it('should return Dublin Core metadata', () => {
assert.ok(metadata.dc);
assert.deepEqual(metadata.dc.identifier, [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment