Commit 9a277f8a authored by Eduardo Sanz García's avatar Eduardo Sanz García Committed by Eduardo

Convert `<meta name="...[to lowercase]">`

Previously the following attributes in the meta tag:

```
<meta name="DC_IDENTIFIER" .../>
<meta name="EPRINTS.TITLE_" .../>
<meta property="OG:URL" .../>
<meta name="CITATION_DOI" .../>
<meta name="PRISM_TITLE" .../>
<meta name="TWITTER:SITE" .../>
```

produced a metadata dictionary like this:

```
{
  dc: {IDENTIFIER: [...] },
  eprints: {TITLE: [...]},
  facebook: {URL: [...]},
  highwire: {DOI: [...]},
  twitter: {SITE: [...]}
}
```

Now, with the modifications introduced in this PR, it produces a
metadata dictionary where all the keys are lowercase:
```
{
  dc: {identifier: [...] },
  eprints: {title: [...]},
  facebook: {url: [...]},
  highwire: {doi: [...]},
  twitter: {site: [...]}
}
```

I have confirmed that `h` expects the lowercase version of the keys
`identifier` and `doi`:

https://github.com/hypothesis/h/blob/73dacc722367c44903ff3c6f610712ba8f43934d/h/util/document_claims.py#L222

https://github.com/hypothesis/h/blob/73dacc722367c44903ff3c6f610712ba8f43934d/h/util/document_claims.py#L247

Hence, if they are not provided as lowercase version, they will be
ignored and the document equivalence will not work.
parent f0946ae6
...@@ -125,11 +125,11 @@ export class HTMLMetadata { ...@@ -125,11 +125,11 @@ export class HTMLMetadata {
if (name && content) { if (name && content) {
const match = name.match(RegExp(`^${prefix}(.+)$`, 'i')); const match = name.match(RegExp(`^${prefix}(.+)$`, 'i'));
if (match) { if (match) {
const n = match[1]; const key = match[1].toLowerCase();
if (tags[n]) { if (tags[key]) {
tags[n].push(content); tags[key].push(content);
} else { } else {
tags[n] = [content]; tags[key] = [content];
} }
} }
} }
......
...@@ -157,7 +157,8 @@ describe('HTMLMetadata', () => { ...@@ -157,7 +157,8 @@ describe('HTMLMetadata', () => {
assert.deepEqual(metadata.highwire.title, ['Foo']); assert.deepEqual(metadata.highwire.title, ['Foo']);
}); });
it('should ignore meta tags without content', () => { context('in "meta" elements', () => {
it('should ignore if "content" attribute is not present', () => {
tempDocumentHead.innerHTML = ` tempDocumentHead.innerHTML = `
<meta name="citation_doi" content> <meta name="citation_doi" content>
<meta name="DC.type"> <meta name="DC.type">
...@@ -167,6 +168,17 @@ describe('HTMLMetadata', () => { ...@@ -167,6 +168,17 @@ describe('HTMLMetadata', () => {
assert.isEmpty(metadata.dc); assert.isEmpty(metadata.dc);
}); });
it('should lower-case the value of the specified attribute', () => {
tempDocumentHead.innerHTML = `
<meta name="CITATION_DOI" content="10.1175/JCLI-D-11-00015.1">
<meta property="OG:URL" content="https://fb.com">
`;
const metadata = testDocument.getDocumentMetadata();
assert.deepEqual(metadata.highwire.doi, ['10.1175/JCLI-D-11-00015.1']);
assert.deepEqual(metadata.facebook.url, ['https://fb.com']);
});
});
it('should return Dublin Core metadata', () => { it('should return Dublin Core metadata', () => {
assert.ok(metadata.dc); assert.ok(metadata.dc);
assert.deepEqual(metadata.dc.identifier, [ assert.deepEqual(metadata.dc.identifier, [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment