Generate absolute URLs in EPUBContentSelector selectors

Change `EPUBContentSelector` selectors captured in VS books to include the resolved URL of the content document (eg. "https://jigsaw.vitalsource.com/books/{book_id}/...") rather than just the path ("/books/{book_id}/..."). This may make it easier to work with these URLs outside of the VitalSource viewer, and should also make these URLs more consistent with `EPUBContentSelector` selectors captured in other ebook readers in future.

Generate absolute URLs in EPUBContentSelector selectors
Change `EPUBContentSelector` selectors captured in VS books to include the resolved URL of the content document (eg. "https://jigsaw.vitalsource.com/books/{book_id}/...") rather than just the path ("/books/{book_id}/..."). This may make it easier to work with these URLs outside of the VitalSource viewer, and should also make these URLs more consistent with `EPUBContentSelector` selectors captured in other ebook readers in future.
91a563e1 · Robert Knight · 265e59fc · 91a563e1 · 91a563e1 · 91a563e1
Commit 91a563e1 authored Dec 16, 2022 by Robert Knight
4 changed files
--- a/src/annotator/integrations/test/vitalsource-test.js
+++ b/src/annotator/integrations/test/vitalsource-test.js
@@ -54,6 +54,10 @@ class FakeVitalSourceViewer {
  }
 }

+function resolveURL(relativeURL) {
+  return new URL(relativeURL, document.baseURI).toString();
+}
+
 describe('annotator/integrations/vitalsource', () => {
  let featureFlags;
  let fakeViewer;
@@ -376,7 +380,7 @@ describe('annotator/integrations/vitalsource', () => {
        assert.ok(cfiSelector);
        assert.deepEqual(cfiSelector, {
          type: 'EPUBContentSelector',
-          url: '/pages/chapter_02.xhtml',
+          url: resolveURL('/pages/chapter_02.xhtml'),
          cfi: '/2',
          title: 'Chapter two (from TOC)',
        });
@@ -401,7 +405,7 @@ describe('annotator/integrations/vitalsource', () => {
        assert.ok(cfiSelector);
        assert.deepEqual(cfiSelector, {
          type: 'EPUBContentSelector',
-          url: '/pages/2',
+          url: resolveURL('/pages/2'),
          cfi: '/1',
          title: 'First chapter',
        });
@@ -479,7 +483,7 @@ describe('annotator/integrations/vitalsource', () => {
        const ann = createAnnotationWithSelector({
          type: 'EPUBContentSelector',
          cfi: '/2/4',
-          url: '/chapters/02.xhtml',
+          url: resolveURL('/chapters/02.xhtml'),
        });

        integration.navigateToSegment(ann);
@@ -491,7 +495,7 @@ describe('annotator/integrations/vitalsource', () => {
        const integration = createIntegration();
        const ann = createAnnotationWithSelector({
          type: 'EPUBContentSelector',
-          url: '/chapters/02.xhtml',
+          url: resolveURL('/chapters/02.xhtml'),
        });

        integration.navigateToSegment(ann);
@@ -506,7 +510,7 @@ describe('annotator/integrations/vitalsource', () => {
        const segment = await integration.segmentInfo();
        assert.deepEqual(segment, {
          cfi: '/2',
-          url: '/pages/chapter_02.xhtml',
+          url: resolveURL('/pages/chapter_02.xhtml'),
        });
      });
    });

--- a/src/annotator/integrations/vitalsource.ts
+++ b/src/annotator/integrations/vitalsource.ts
@@ -175,6 +175,18 @@ function makeContentFrameScrollable(frame: HTMLIFrameElement) {
  attrObserver.observe(frame, { attributes: true });
 }

+/**
+ * Return a copy of URL with the origin removed.
+ *
+ * eg. "https://jigsaw.vitalsource.com/books/123/chapter.html?foo" =>
+ * "/books/123/chapter.html"
+ */
+function stripOrigin(url: string) {
+  // Resolve input URL in case it doesn't already have an origin.
+  const parsed = new URL(url, document.baseURI);
+  return parsed.pathname + parsed.search;
+}
+
 /**
 * Integration for the content frame in VitalSource's Bookshelf ebook reader.
 *
@@ -312,23 +324,13 @@ export class VitalSourceContentIntegration
      return selectors;
    }

-    const [pageInfo, toc] = await Promise.all([
-      this._bookElement.getCurrentPage(),
-      this._bookElement.getTOC(),
-    ]);
-
-    let title = pageInfo.chapterTitle;
-
-    // Find the first table of contents entry that corresponds to the current page,
-    // and use its title instead of `pageInfo.chapterTitle`. This works around
-    // https://github.com/hypothesis/client/issues/4986.
-    const pageCFI = documentCFI(pageInfo.cfi);
-    const tocEntry = toc.data?.find(
-      entry => documentCFI(entry.cfi) === pageCFI
-    );
-    if (tocEntry) {
-      title = tocEntry.title;
-    }
+    const {
+      cfi,
+      index: pageIndex,
+      page: pageLabel,
+      title,
+      url,
+    } = await this._getPageInfo(true /* includeTitle */);

    // We generate an "EPUBContentSelector" with a CFI for all VS books,
    // although for PDF-based books the CFI is a string generated from the
@@ -336,8 +338,8 @@ export class VitalSourceContentIntegration
    const extraSelectors: Selector[] = [
      {
        type: 'EPUBContentSelector',
-        cfi: pageInfo.cfi,
-        url: pageInfo.absoluteURL,
+        cfi,
+        url,
        title,
      },
    ];
@@ -351,8 +353,8 @@ export class VitalSourceContentIntegration
    if (bookInfo.format === 'pbk') {
      extraSelectors.push({
        type: 'PageSelector',
-        index: pageInfo.index,
-        label: pageInfo.page,
+        index: pageIndex,
+        label: pageLabel,
      });
    }

@@ -425,7 +427,7 @@ export class VitalSourceContentIntegration
    if (selector?.cfi) {
      this._bookElement.goToCfi(selector.cfi);
    } else if (selector?.url) {
-      this._bookElement.goToURL(selector.url);
+      this._bookElement.goToURL(stripOrigin(selector.url));
    } else {
      throw new Error('No segment information available');
    }
@@ -437,14 +439,53 @@ export class VitalSourceContentIntegration
    return true;
  }

-  async segmentInfo(): Promise<SegmentInfo> {
-    const pageInfo = await this._bookElement.getCurrentPage();
+  /**
+   * Retrieve information about the currently displayed content document or
+   * page.
+   *
+   * @param includeTitle - Whether to fetch the title. This involves some extra
+   *   work so should be skipped when not required.
+   */
+  async _getPageInfo(includeTitle: boolean) {
+    const [pageInfo, toc] = await Promise.all([
+      this._bookElement.getCurrentPage(),
+      includeTitle ? this._bookElement.getTOC() : undefined,
+    ]);
+
+    let title;
+
+    if (toc) {
+      title = pageInfo.chapterTitle;
+
+      // Find the first table of contents entry that corresponds to the current page,
+      // and use its title instead of `pageInfo.chapterTitle`. This works around
+      // https://github.com/hypothesis/client/issues/4986.
+      const pageCFI = documentCFI(pageInfo.cfi);
+      const tocEntry = toc.data?.find(
+        entry => documentCFI(entry.cfi) === pageCFI
+      );
+      if (tocEntry) {
+        title = tocEntry.title;
+      }
+    }
+
    return {
      cfi: pageInfo.cfi,
-      url: pageInfo.absoluteURL,
+      index: pageInfo.index,
+      page: pageInfo.page,
+      title,
+
+      // The `pageInfo.absoluteURL` URL is an absolute path that does not
+      // include the origin of VitalSource's CDN.
+      url: new URL(pageInfo.absoluteURL, document.baseURI).toString(),
    };
  }

+  async segmentInfo(): Promise<SegmentInfo> {
+    const { cfi, url } = await this._getPageInfo(false /* includeTitle */);
+    return { cfi, url };
+  }
+
  async uri() {
    if (this._bookIsSingleDocument()) {
      const bookInfo = this._bookElement.getBookInfo();

--- a/src/types/api.ts
+++ b/src/types/api.ts
@@ -76,8 +76,8 @@ export type EPUBContentSelector = {
  type: 'EPUBContentSelector';

  /**
-   * URL of the content document. This can either be an absolute HTTP URL, or
-   * a URL that is relative to the root of the EPUB.
+   * URL of the content document. This should be an absolute HTTPS URL if
+   * available, but may be relative to the root of the EPUB.
   */
  url: string;


--- a/src/types/vitalsource.ts
+++ b/src/types/vitalsource.ts
@@ -174,6 +174,8 @@ export type MosaicBookElement = HTMLElement & {

  /**
   * Navigate the book to the page or content document whose URL matches `url`.
+   *
+   * `url` must be a relative URL with an absolute path (eg. "/books/123/chapter01.xhtml").
   */
  goToURL(url: string): void;
 };