Commit 8d92de32 authored by Robert Knight's avatar Robert Knight

Add functions for comparing CFIs and stripping assertions

When sorting EPUB annotations by document location, we will need to consider
both the chapter/page and the text position within the chapter/page. Add
utilities for comparing CFIs that will be useful for this purpose.

The full sorting rules are specified by
https://idpf.org/epub/linking/cfi/#sec-sorting. These utilities currently only
implement the steps needed to compare simple CFIs which identify a location in
the book's spine (eg. "/2/4/6"), rather than a "full" CFI which also contains a
step indrection ("!") followed by the location of the selected content in the
chapter.
parent 2d25a77e
/**
* Compare two arrays.
*
* Arrays are compared as a sequence of values in priority order. If the two
* arrays are of different length but their common indexes have the same values,
* the shorter array is considered less than the longer one.
*
* This logic is similar to how eg. tuples are compared in Python.
*/
function compareArrays(
a: Array<number | string>,
b: Array<number | string>
): number {
for (let i = 0; i < Math.min(a.length, b.length); i++) {
if (a[i] === b[i]) {
continue;
} else if (a[i] < b[i]) {
return -1;
} else if (a[i] > b[i]) {
return 1;
}
}
return a.length - b.length;
}
/**
* Strip assertions from a Canonical Fragment Identifier.
*
* Assertions are `[...]` enclosed sections which act as checks on the validity
* of numbers but do not affect the sort order.
*
* @example
* stripCFIAssertions("/6/14[chap05ref]") // returns "/6/14"
*/
export function stripCFIAssertions(cfi: string): string {
// Fast path for CFIs with no assertions.
if (!cfi.includes('[')) {
return cfi;
}
let result = '';
// Has next char been escaped?
let escaped = false;
// Are we in a `[...]` assertion section?
let inAssertion = false;
for (const ch of cfi) {
if (!escaped && ch === '^') {
escaped = true;
continue;
}
if (!escaped && ch === '[') {
inAssertion = true;
} else if (!escaped && inAssertion && ch === ']') {
inAssertion = false;
} else if (!inAssertion) {
result += ch;
}
escaped = false;
}
return result;
}
/**
* Compare two Canonical Fragment Identifiers.
*
* The full sorting rules for CFIs are specified by https://idpf.org/epub/linking/cfi/#sec-sorting.
*
* This function currently only implements what is necessary to compare simple
* CFIs that specify a location within a book's spine, without any step
* indirections ("!"). These CFIs consist of a "/"-delimited sequence of numbers,
* with optional assertions in `[...]` brackets (eg. "/2/4[chapter2ref]").
*
* Per the sorting rules linked above, the input CFIs are assumed to be
* unescaped. This means that they may contain circumflex (^) escape characters,
* but don't have the additional escaping that is needed when CFIs are used
* inside URIs or HTML.
*
* @example
* compareCFIs("/2/3[chap3ref]", "/2/10[chap10ref]") // returns -1
*
* @param a - The first CFI
* @param b - The second CFI
* @return A value that is negative, zero or positive depending on
* whether `a` is less-than, equal-to or greater-than `b`
*/
export function compareCFIs(a: string, b: string): number {
const parseCFI = (cfi: string) => {
return stripCFIAssertions(cfi)
.split('/')
.map(str => {
const intVal = parseInt(str);
return Number.isNaN(intVal) ? str : intVal;
});
};
return compareArrays(parseCFI(a), parseCFI(b));
}
import { compareCFIs, stripCFIAssertions } from '../cfi';
describe('sidebar/util/cfi', () => {
describe('stripCFIAssertions', () => {
it('returns CFI without assertions unchanged', () => {
assert.equal(stripCFIAssertions('/1/2/3/10'), '/1/2/3/10');
});
it('removes assertions from CFI', () => {
assert.equal(stripCFIAssertions('/1/2[chap4ref]'), '/1/2');
assert.equal(
stripCFIAssertions('/1[part1ref]/2[chapter2ref]/3[subsectionref]'),
'/1/2/3'
);
});
it('ignores escaped characters', () => {
assert.equal(stripCFIAssertions('/1/2[chap4^[ignoreme^]ref]'), '/1/2');
assert.equal(stripCFIAssertions('/1/2[a^[b^]]/3[c^[d^]]'), '/1/2/3');
});
});
describe('compareCFIs', () => {
[
// Trivial cases
{
a: '/2/4',
b: '/2/1',
expected: 1,
},
{
a: '/2/1',
b: '/2/4',
expected: -1,
},
{
a: '/2/4',
b: '/2/4',
expected: 0,
},
// Check numeric steps are treated as numbers and not as strings.
{
a: '/2/3',
b: '/2/10',
expected: -1,
},
{
a: '/2/10',
b: '/2/3',
expected: 1,
},
// Check that assertions are ignored.
{
a: '/2/4[foo]',
b: '/2/4[bar]',
expected: 0,
},
// CFIs with many steps
{
a: '/1/2/3/4/5/6/a/b/c',
b: '/1/2/3/4/5/7/a/b/c',
expected: -1,
},
// Check that steps that can't be parsed as numbers are handled as
// strings. Non-numeric steps are not allowed by the grammar, but we
// try to handle them gracefully.
{
a: '/2/4/def',
b: '/2/4/abc',
expected: 1,
},
// Empty CFIs
{
a: '',
b: '',
expected: 0,
},
].forEach(({ a, b, expected }) => {
it('compares CFIs', () => {
assert.equal(
compareCFIs(a, b),
expected,
`comparing CFIs "${a}" and "${b}" failed`
);
});
});
});
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment