Commit 4d05b426 authored by Robert Knight's avatar Robert Knight

Strip unknown query params when generating Internet Archive embed URLs

Internet Archive details pages may include params in the query string
as well as "start" and "end" params in the path. In that case invalid
URLs were being generated.

Use a more robust approach to generating embed URLs and add some missing
test cases. Any unrecognized query params are ignored when generating
the embed URLs.
parent d2eaac97
'use strict';
var queryString = require('query-string');
/**
* Return an HTML5 audio player with the given src URL.
*/
......@@ -125,20 +127,44 @@ var embedGenerators = [
return null;
}
var groups = /(embed|details)\/(.+)$/.exec(link.href);
if (!groups) {
// Extract the unique slug from the path.
var slugMatch = /^\/(embed|details)\/(.+)/.exec(link.pathname);
if (!slugMatch) {
return null;
}
var path = groups[2]; // group 2 is the path
// Extract start and end times, which may appear either as query string
// params or path params.
var slug = slugMatch[2];
var linkParams = queryString.parse(link.search);
var startTime = linkParams.start;
var endTime = linkParams.end;
if (!startTime) {
var startPathParam = slug.match(/\/start\/([^\/]+)/);
if (startPathParam) {
startTime = startPathParam[1];
slug = slug.replace(startPathParam[0], '');
}
}
// Convert `/details` paths to `/embed` paths. TV News Archive links put
// the start & end times in the paths whereas the embed links always use
// "start" and "end" query params.
path = path.replace('/start/', '?start=');
path = path.replace('/end/', '&end=');
if (!endTime) {
var endPathParam = slug.match(/\/end\/([^\/]+)/);
if (endPathParam) {
endTime = endPathParam[1];
slug = slug.replace(endPathParam[0], '');
}
}
return iframe('https://archive.org/embed/' + path);
// Generate embed URL.
var iframeUrl = new URL(`https://archive.org/embed/${slug}`);
if (startTime) {
iframeUrl.searchParams.append('start', startTime);
}
if (endTime) {
iframeUrl.searchParams.append('end', endTime);
}
return iframe(iframeUrl.href);
},
......
......@@ -93,10 +93,13 @@ describe('media-embedder', function () {
it('replaces internet archive links with iframes', function () {
var urls = [
// Video details page.
'https://archive.org/details/PATH',
'https://archive.org/details/PATH?start=360&end=420.3',
'https://archive.org/details/PATH?start=360&end=420.3&unknownparam=1',
// TV News Archive video details page.
'https://archive.org/details/PATH/start/360/end/420.3',
'https://archive.org/details/PATH/start/360/end/420.3?q=ignoreme',
// Embed link generated by the "Share" links on the details pages.
'https://archive.org/embed/PATH?start=360&end=420.3',
......@@ -108,11 +111,10 @@ describe('media-embedder', function () {
assert.equal(element.children[0].tagName, 'IFRAME');
// For both url flavors the expected result is the same.
// details flavor: we match, transform, and iframe
// embed flavor: we just match and iframe
var actual = element.children[0].src;
var expected = 'https://archive.org/embed/PATH?start=360&end=420.3';
var expected = url.indexOf('start') !== -1 ?
'https://archive.org/embed/PATH?start=360&end=420.3' :
'https://archive.org/embed/PATH';
assert.equal(actual, expected);
});
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment