Commit 907f0011 authored by Robert Knight's avatar Robert Knight

Make side-by-side mode work with VitalSource "Great Book" books

Many classic texts that are freely available in VitalSource use `<div
class="para">` elements for paragraphs of text, instead of `<p>` elements.
Modify the logic for finding the main content of an HTML document to consider
these. We will probably want to make these selectors a configurable argument to
`guessMainContentArea` in future. Since there are only two of them and it is
unlikely to have ill effects outside of VitalSource books, I made it a general
feature of the HTML side-by-side mode implementation.
parent bda664a8
import { intersectRects, rectContains, rectIntersects } from '../util/geometry'; import { intersectRects, rectContains, rectIntersects } from '../util/geometry';
/**
* CSS selectors used to find elements that are considered potentially part
* of the main content of a page.
*/
const contentSelectors = [
'p',
// Paragraphs in VitalSource "Great Book" format ebooks.
'.para',
];
/** /**
* Attempt to guess the region of the page that contains the main content. * Attempt to guess the region of the page that contains the main content.
* *
...@@ -21,7 +32,8 @@ export function guessMainContentArea(root) { ...@@ -21,7 +32,8 @@ export function guessMainContentArea(root) {
// In future we might want to expand this to consider other text containers, // In future we might want to expand this to consider other text containers,
// since some pages, especially eg. in ebooks, may not have any paragraphs // since some pages, especially eg. in ebooks, may not have any paragraphs
// (eg. instead they may only contain tables or lists or headings). // (eg. instead they may only contain tables or lists or headings).
const paragraphs = Array.from(root.querySelectorAll('p')) const contentSelector = contentSelectors.join(',');
const paragraphs = Array.from(root.querySelectorAll(contentSelector))
.map(p => { .map(p => {
// Gather some data about them. // Gather some data about them.
const rect = p.getBoundingClientRect(); const rect = p.getBoundingClientRect();
......
...@@ -6,6 +6,11 @@ import { ...@@ -6,6 +6,11 @@ import {
describe('annotator/integrations/html-side-by-side', () => { describe('annotator/integrations/html-side-by-side', () => {
let contentElements; let contentElements;
function addContentElementToDocument(element) {
contentElements.push(element);
document.body.append(element);
}
function createContent(paragraphs) { function createContent(paragraphs) {
const paraElements = paragraphs.map(({ content, left, width }) => { const paraElements = paragraphs.map(({ content, left, width }) => {
const el = document.createElement('p'); const el = document.createElement('p');
...@@ -18,9 +23,7 @@ describe('annotator/integrations/html-side-by-side', () => { ...@@ -18,9 +23,7 @@ describe('annotator/integrations/html-side-by-side', () => {
const root = document.createElement('div'); const root = document.createElement('div');
root.append(...paraElements); root.append(...paraElements);
addContentElementToDocument(root);
document.body.append(root);
contentElements.push(root);
return root; return root;
} }
...@@ -63,6 +66,23 @@ describe('annotator/integrations/html-side-by-side', () => { ...@@ -63,6 +66,23 @@ describe('annotator/integrations/html-side-by-side', () => {
assert.deepEqual(area, { left: 10, right: 110 }); assert.deepEqual(area, { left: 10, right: 110 });
}); });
[
'<p>content</p>',
// Paragraphs in VitalSource "Great Book" format ebooks.
'<div class="para">content</div>',
].forEach(contentHTML => {
it('finds content area with various paragraph types', () => {
const content = document.createElement('div');
content.innerHTML = contentHTML;
addContentElementToDocument(content);
const area = guessMainContentArea(content);
assert.ok(area);
});
});
it('ignores the positions of hidden paragraphs', () => { it('ignores the positions of hidden paragraphs', () => {
const paragraphs = []; const paragraphs = [];
for (let i = 0; i < 10; i++) { for (let i = 0; i < 10; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment