Commit 907f0011 authored by Robert Knight's avatar Robert Knight

Make side-by-side mode work with VitalSource "Great Book" books

Many classic texts that are freely available in VitalSource use `<div
class="para">` elements for paragraphs of text, instead of `<p>` elements.
Modify the logic for finding the main content of an HTML document to consider
these. We will probably want to make these selectors a configurable argument to
`guessMainContentArea` in future. Since there are only two of them and it is
unlikely to have ill effects outside of VitalSource books, I made it a general
feature of the HTML side-by-side mode implementation.
parent bda664a8
import { intersectRects, rectContains, rectIntersects } from '../util/geometry';
/**
* CSS selectors used to find elements that are considered potentially part
* of the main content of a page.
*/
const contentSelectors = [
'p',
// Paragraphs in VitalSource "Great Book" format ebooks.
'.para',
];
/**
* Attempt to guess the region of the page that contains the main content.
*
......@@ -21,7 +32,8 @@ export function guessMainContentArea(root) {
// In future we might want to expand this to consider other text containers,
// since some pages, especially eg. in ebooks, may not have any paragraphs
// (eg. instead they may only contain tables or lists or headings).
const paragraphs = Array.from(root.querySelectorAll('p'))
const contentSelector = contentSelectors.join(',');
const paragraphs = Array.from(root.querySelectorAll(contentSelector))
.map(p => {
// Gather some data about them.
const rect = p.getBoundingClientRect();
......
......@@ -6,6 +6,11 @@ import {
describe('annotator/integrations/html-side-by-side', () => {
let contentElements;
function addContentElementToDocument(element) {
contentElements.push(element);
document.body.append(element);
}
function createContent(paragraphs) {
const paraElements = paragraphs.map(({ content, left, width }) => {
const el = document.createElement('p');
......@@ -18,9 +23,7 @@ describe('annotator/integrations/html-side-by-side', () => {
const root = document.createElement('div');
root.append(...paraElements);
document.body.append(root);
contentElements.push(root);
addContentElementToDocument(root);
return root;
}
......@@ -63,6 +66,23 @@ describe('annotator/integrations/html-side-by-side', () => {
assert.deepEqual(area, { left: 10, right: 110 });
});
[
'<p>content</p>',
// Paragraphs in VitalSource "Great Book" format ebooks.
'<div class="para">content</div>',
].forEach(contentHTML => {
it('finds content area with various paragraph types', () => {
const content = document.createElement('div');
content.innerHTML = contentHTML;
addContentElementToDocument(content);
const area = guessMainContentArea(content);
assert.ok(area);
});
});
it('ignores the positions of hidden paragraphs', () => {
const paragraphs = [];
for (let i = 0; i < 10; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment