Commit 505d13e9 authored by Robert Knight's avatar Robert Knight

Get VitalSource chapter titles via `getTOC` API

Work around an issue where the `getCurrentPage` API can return incorrect
chapter titles [1] by using titles from the `getTOC` API instead.

[1] https://github.com/hypothesis/client/issues/4986
parent dc5075bf
......@@ -246,6 +246,28 @@ describe('annotator/integrations/vitalsource', () => {
throw new Error('Unknown book');
}
}
async getTOC() {
if (this._format === 'epub') {
const toc = [
{
cfi: '/1[foo]!/4/10',
title: 'Chapter one (from TOC)',
},
{
cfi: '/2[bar]!/2/8',
title: 'Chapter two (from TOC)',
},
{
cfi: '/4[bar]!/2/8',
title: 'Chapter three (from TOC)',
},
];
return { ok: true, data: toc };
} else {
return { ok: false };
}
}
}
describe('VitalSourceContentIntegration', () => {
......@@ -356,7 +378,7 @@ describe('annotator/integrations/vitalsource', () => {
type: 'EPUBContentSelector',
url: '/pages/chapter_02.xhtml',
cfi: '/2',
title: 'Chapter two',
title: 'Chapter two (from TOC)',
});
const pageSelector = selectors.find(s => s.type === 'PageSelector');
......
import { TinyEmitter } from 'tiny-emitter';
import { documentCFI } from '../../shared/cfi';
import { ListenerCollection } from '../../shared/listener-collection';
import { FeatureFlags } from '../features';
import { onDocumentReady } from '../frame-observer';
......@@ -93,10 +94,75 @@ type PageInfo = {
* segment. For PDF-based books, a chapter will often have multiple pages
* and all these pages will have the same title. In EPUBs, each content
* document will typically have a different title.
*
* WARNING: This information may be incorrect in some books. See
* https://github.com/hypothesis/client/issues/4986. Data from
* {@link MosaicBookElement.getTOC} can be used instead.
*/
chapterTitle: string;
};
/**
* An entry from a book's table of contents. This information comes from
* an EPUB's Navigation Document [1] or Navigation Control File ("toc.ncx") [2].
*
* [1] https://www.w3.org/publishing/epub3/epub-packages.html#sec-package-nav
* [2] https://www.niso.org/publications/ansiniso-z3986-2005-r2012-specifications-digital-talking-book
*
* An example entry looks like this:
*
* ```
* {
* "title": "acknowledgments",
* "path": "/OEBPS/Text/fm.htm#heading_id_4",
* "level": 2,
* "cfi": "/6/14[;vnd.vst.idref=fm.htm]!/4/20[heading_id_4]",
* "page": "9",
* }
* ```
*/
type TableOfContentsEntry = {
/** The title of the book segment that this entry refers to. */
title: string;
/**
* The path of the EPUB Content Document containing the location that this
* TOC entry points to.
*/
path: string;
/**
* The 1-based level of the entry in the table of contents tree.
*/
level: number;
/**
* Canonical Fragment Identifier specifying the location within the book that
* this TOC entry points to.
*/
cfi: string;
/** The page label associated with this location in the book. */
page: string;
};
/**
* A type that represents a successful data fetch or an error.
*
* This is a container type used in the return types of various
* {@link MosaicBookElement} methods.
*/
type DataResponse<T> = {
/** True if the data was fetched successfully. */
ok: boolean;
/** Fetched data. Will be missing if `ok` is false. */
data?: T;
/** HTTP status code. May be `undefined` if `ok` is false. */
status: number | undefined;
};
/**
* `<mosaic-book>` custom element in the VitalSource container frame.
*
......@@ -114,6 +180,9 @@ type MosaicBookElement = HTMLElement & {
*/
getCurrentPage(): Promise<PageInfo>;
/** Retrieve the book's table of contents. */
getTOC(): Promise<DataResponse<TableOfContentsEntry[]>>;
/**
* Navigate the book to the page or content document whose CFI matches `cfi`.
*/
......@@ -436,7 +505,23 @@ export class VitalSourceContentIntegration
return selectors;
}
const pageInfo = await this._bookElement.getCurrentPage();
const [pageInfo, toc] = await Promise.all([
this._bookElement.getCurrentPage(),
this._bookElement.getTOC(),
]);
let title = pageInfo.chapterTitle;
// Find the first table of contents entry that corresponds to the current page,
// and use its title instead of `pageInfo.chapterTitle`. This works around
// https://github.com/hypothesis/client/issues/4986.
const pageCFI = documentCFI(pageInfo.cfi);
const tocEntry = toc.data?.find(
entry => documentCFI(entry.cfi) === pageCFI
);
if (tocEntry) {
title = tocEntry.title;
}
// We generate an "EPUBContentSelector" with a CFI for all VS books,
// although for PDF-based books the CFI is a string generated from the
......@@ -446,7 +531,7 @@ export class VitalSourceContentIntegration
type: 'EPUBContentSelector',
cfi: pageInfo.cfi,
url: pageInfo.absoluteURL,
title: pageInfo.chapterTitle,
title,
},
];
......
......@@ -114,3 +114,23 @@ export function compareCFIs(a: string, b: string): number {
};
return compareArrays(parseCFI(a), parseCFI(b));
}
/**
* Return a slice of `cfi` up to the first step indirection [1], with assertions
* removed.
*
* A typical CFI consists of a path within the table of contents to indicate
* a content document, a step indirection ("!"), then the path of an element
* within the content document. For such a CFI, this function will retain only
* the content document path.
*
* [1] https://idpf.org/epub/linking/cfi/#sec-path-indirection
*
* @example
* documentCFI('/6/152[;vnd.vst.idref=ch13_01]!/4/2[ch13_sec_1]') // Returns "/6/152"
*/
export function documentCFI(cfi: string): string {
const stripped = stripCFIAssertions(cfi);
const sepIndex = stripped.indexOf('!');
return sepIndex === -1 ? stripped : stripped.slice(0, sepIndex);
}
import { compareCFIs, stripCFIAssertions } from '../cfi';
import { compareCFIs, documentCFI, stripCFIAssertions } from '../cfi';
describe('sidebar/util/cfi', () => {
describe('stripCFIAssertions', () => {
......@@ -102,4 +102,21 @@ describe('sidebar/util/cfi', () => {
});
});
});
describe('documentCFI', () => {
it('returns part of CFI before first step indirection', () => {
// Typical CFI with one step indirection.
assert.equal(documentCFI('/2/4/8!/10/12'), '/2/4/8');
// Rarer case of CFI with multiple step indirections.
assert.equal(documentCFI('/2/4/8!/10/12!/2/4'), '/2/4/8');
});
it('strips assertions', () => {
assert.equal(
documentCFI('/6/152[;vnd.vst.idref=ch13_01]!/4/2[ch13_sec_1]'),
'/6/152'
);
});
});
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment