Add ImageTextLayer class

Implement a class that creates a transparent text layer on top of an image containing text. This is useful for supporting annotation in documents which are rendered images or canvases that don't have browser-accessible text in them, such as VitalSource. It is the caller's responsibility to determine the location and character value of text in the image.

Add ImageTextLayer class
Implement a class that creates a transparent text layer on top of an image containing text. This is useful for supporting annotation in documents which are rendered images or canvases that don't have browser-accessible text in them, such as VitalSource. It is the caller's responsibility to determine the location and character value of text in the image.
41db2a37 · Robert Knight · 4d145d7d · 41db2a37 · 41db2a37
Commit 41db2a37 authored Nov 18, 2021 by Robert Knight
Hide whitespace changes
Inline Side-by-side

Showing with 432 additions and 0 deletions

image-text-layer.js src/annotator/integrations/image-text-layer.js +196 -0

image-text-layer-test.js src/annotator/integrations/test/image-text-layer-test.js +236 -0

No files found.
--- a/src/annotator/integrations/image-text-layer.js
+++ b/src/annotator/integrations/image-text-layer.js
+import { ListenerCollection } from '../../shared/listener-collection';
+
+/**
+ * @typedef Rect
+ * @prop {number} left
+ * @prop {number} right
+ * @prop {number} top
+ * @prop {number} bottom
+ */
+
+/**
+ * @param {Rect|null} a
+ * @param {Rect} b
+ */
+function mergeBoxes(a, b) {
+  if (!a) {
+    return b;
+  }
+  return {
+    left: Math.min(a.left, b.left),
+    right: Math.max(a.right, b.right),
+    top: Math.min(a.top, b.top),
+    bottom: Math.max(a.bottom, b.bottom),
+  };
+}
+
+/**
+ * ImageTextLayer maintains a transparent text layer on top of an image
+ * which contains text. This enables the text in the image to be selected
+ * and highlighted.
+ *
+ * This is similar to the one that PDF.js creates for us in our standard PDF
+ * viewer.
+ */
+export class ImageTextLayer {
+  /**
+   * Create a text layer which is displayed on top of `image`.
+   *
+   * @param {Element} image - Rendered image on which to overlay the text layer.
+   *   The text layer will be inserted into the DOM as the next sibling of `image`.
+   * @param {Rect[]} charBoxes - Bounding boxes for characters in the image.
+   *   Coordinates should be in the range [0-1], where 0 is the top/left corner
+   *   of the image and 1 is the bottom/right.
+   * @param {string} text - Characters in the image corresponding to `charBoxes`
+   */
+  constructor(image, charBoxes, text) {
+    if (charBoxes.length !== text.length) {
+      throw new Error('Char boxes length does not match text length');
+    }
+
+    // Create container for text layer and position it above the image.
+    const containerParent = /** @type {HTMLElement} */ (image.parentNode);
+    const container = document.createElement('hypothesis-text-layer');
+    containerParent.insertBefore(container, image.nextSibling);
+
+    // Position text layer over image. For now we assume that the image's top-left
+    // corner aligns with the top-left corner of its container.
+    containerParent.style.position = 'relative';
+    container.style.position = 'absolute';
+    container.style.top = '0';
+    container.style.left = '0';
+
+    // Use multiply blending to make text in the image more readable when
+    // the corresponding text in the text layer is selected or highlighted.
+    // We apply a similar effect in PDF.js.
+    container.style.mixBlendMode = 'multiply';
+
+    // Set a fixed font style on the container and create a canvas using the same
+    // font which we can use to measure the "natural" size of the text. This is
+    // later used when scaling the text to fit the underlying part of the image.
+    const fontSize = 16;
+    const fontFamily = 'sans-serif';
+    container.style.fontSize = fontSize + 'px';
+    container.style.fontFamily = fontFamily;
+    const canvas = document.createElement('canvas');
+    const context = /** @type {CanvasRenderingContext2D} */ (
+      canvas.getContext('2d')
+    );
+    context.font = `${fontSize}px ${fontFamily}`;
+
+    // Split the text into words and create an element for each in the text layer.
+
+    /** @type {Rect|null} */
+    let currentWordBox = null;
+    let currentWordText = '';
+
+    /**
+     * @typedef TextBox
+     * @prop {HTMLElement} span
+     * @prop {Rect} box
+     * @prop {number} width - Natural width of the text
+     * @prop {number} height - Natural height of the text
+     */
+
+    /** @type {TextBox[]} */
+    const boxes = [];
+
+    // Actual height of text boxes before scaling using CSS transforms.
+    let boxHeight = 0;
+
+    const addCurrentWordToTextLayer = () => {
+      if (!currentWordBox) {
+        return;
+      }
+
+      const span = document.createElement('span');
+      span.style.position = 'absolute';
+      span.style.color = 'transparent';
+      span.style.transformOrigin = 'top left';
+
+      span.textContent = currentWordText;
+
+      container.append(span);
+      container.append(' ');
+
+      // Measure the initial height of text boxes. We only do this once as it
+      // should be the same for all boxes, since they use the same font.
+      if (!boxHeight) {
+        boxHeight = span.getBoundingClientRect().height;
+      }
+
+      boxes.push({
+        span,
+        box: currentWordBox,
+        width: context.measureText(currentWordText).width,
+        height: boxHeight,
+      });
+
+      currentWordBox = null;
+      currentWordText = '';
+    };
+
+    for (let i = 0; i < charBoxes.length; i++) {
+      const char = text[i];
+      if (/\s/.test(char)) {
+        addCurrentWordToTextLayer();
+        continue;
+      }
+
+      const charBox = charBoxes[i];
+      currentWordBox = mergeBoxes(currentWordBox, charBox);
+      currentWordText += char;
+    }
+    addCurrentWordToTextLayer();
+
+    // Positon and scale text boxes to fit current image size.
+    const updateBoxSizes = () => {
+      const imageWidth = image.getBoundingClientRect().width;
+      const imageHeight = image.getBoundingClientRect().height;
+      container.style.width = imageWidth + 'px';
+      container.style.height = imageHeight + 'px';
+
+      for (let { span, box, width, height } of boxes) {
+        const left = box.left * imageWidth;
+        const top = box.top * imageHeight;
+        const right = box.right * imageWidth;
+        const bottom = box.bottom * imageHeight;
+
+        span.style.left = left + 'px';
+        span.style.top = top + 'px';
+
+        const scaleX = (right - left) / width;
+        const scaleY = (bottom - top) / height;
+
+        span.style.transform = `scale(${scaleX}, ${scaleY})`;
+      }
+    };
+
+    updateBoxSizes();
+
+    /**
+     * Container element for the text layer.
+     *
+     * This is exposed so that callers can tweak the style if needed (eg.
+     * to set a z-index value).
+     */
+    this.container = container;
+
+    // Adjust box sizes when image is resized. We currently assume this
+    // corresponds to a frame resize. We could use `ResizeObserver` instead in
+    // supporting browsers.
+    this._pendingResizeTimer = 0;
+    this._listeners = new ListenerCollection();
+    this._listeners.add(window, 'resize', () => {
+      this._pendingResizeTimer = setTimeout(() => {
+        updateBoxSizes();
+      }, 50);
+    });
+  }
+
+  destroy() {
+    this.container.remove();
+    this._listeners.removeAll();
+    clearTimeout(this._pendingResizeTimer);
+  }
+}
--- a/src/annotator/integrations/test/image-text-layer-test.js
+++ b/src/annotator/integrations/test/image-text-layer-test.js
+import { ImageTextLayer } from '../image-text-layer';
+
+// Sizes of characters and lines used in character bounding boxes generated by
+// these tests, expressed as percentages of the image size.
+const charWidth = 4;
+const charHeight = 8;
+const charSpacing = 5;
+const lineSpacing = 10;
+
+/**
+ * Return a `[left, top, width, height]` tuple of the expected position of
+ * a word in the text layer.
+ */
+function expectedBoxOffsetAndSize(imageWidth, imageHeight, lineIndex, text) {
+  const xScale = imageWidth / 100;
+  const yScale = imageHeight / 100;
+
+  const width = (text.length - 1) * charSpacing * xScale + charWidth * xScale;
+  const height = charHeight * yScale;
+
+  return [0, lineSpacing * lineIndex * yScale, width, height];
+}
+
+/**
+ * Create character bounding box data for text in an image.
+ *
+ * The generated data positions each word on a separate line.
+ */
+function createCharBoxes(text) {
+  const charBoxes = [];
+  let lineIndex = 0;
+  let charIndex = 0;
+
+  for (let char of text) {
+    charBoxes.push({
+      left: (charIndex * charSpacing) / 100,
+      right: (charIndex * charSpacing + charWidth) / 100,
+      top: (lineIndex * lineSpacing) / 100,
+      bottom: (lineIndex * lineSpacing + charHeight) / 100,
+    });
+
+    if (char === ' ') {
+      charIndex = 0;
+      ++lineIndex;
+    } else {
+      ++charIndex;
+    }
+  }
+  return charBoxes;
+}
+
+describe('ImageTextLayer', () => {
+  let textLayers;
+  let containers;
+
+  function createPageImage() {
+    const container = document.createElement('div');
+    const image = document.createElement('img');
+
+    // Image size chosen so 1% == 5px, to make the math easy.
+    image.style.width = '500px';
+    image.style.height = '500px';
+
+    container.append(image);
+
+    document.body.append(container);
+    containers.push(container);
+
+    return { container, image };
+  }
+
+  function createTextLayer(...args) {
+    const textLayer = new ImageTextLayer(...args);
+    textLayers.push(textLayer);
+    return textLayer;
+  }
+
+  function getWordBoxes(textLayer) {
+    return [...textLayer.container.querySelectorAll('span')];
+  }
+
+  beforeEach(() => {
+    containers = [];
+    textLayers = [];
+  });
+
+  afterEach(() => {
+    containers.forEach(c => c.remove());
+    textLayers.forEach(tl => tl.destroy());
+  });
+
+  it('creates a <hypothesis-text-layer> element above the image', () => {
+    const { container, image } = createPageImage();
+
+    // nb. Text starts with a space to exercise an extra code path in the text
+    // layer builder.
+    const imageText = ' some text in the image';
+
+    createTextLayer(image, createCharBoxes(imageText), imageText);
+
+    const textLayerEl = container.querySelector('hypothesis-text-layer');
+    assert.instanceOf(textLayerEl, HTMLElement);
+    assert.equal(image.nextSibling, textLayerEl);
+    assert.equal(container.style.position, 'relative');
+    assert.equal(textLayerEl.style.position, 'absolute');
+    assert.equal(textLayerEl.style.mixBlendMode, 'multiply');
+  });
+
+  it('throws if char box array and text have different lengths', () => {
+    const { image } = createPageImage();
+    const imageText = 'some text in the image';
+
+    assert.throws(() => {
+      const charBoxes = createCharBoxes(imageText).slice(0, -1);
+      createTextLayer(image, charBoxes, imageText);
+    }, 'Char boxes length does not match text length');
+  });
+
+  it('creates elements in the text layer for each word in the image', () => {
+    const { image } = createPageImage();
+    const imageText = 'some words here';
+    const textLayer = createTextLayer(
+      image,
+      createCharBoxes(imageText),
+      imageText
+    );
+
+    assert.equal(textLayer.container.textContent, 'some words here ');
+    const wordSpans = getWordBoxes(textLayer);
+    assert.equal(wordSpans.length, imageText.split(' ').length);
+    assert.deepEqual(
+      wordSpans.map(ws => ws.textContent),
+      ['some', 'words', 'here']
+    );
+
+    const imageBox = image.getBoundingClientRect();
+    const wordBoxPositions = wordSpans.map(span => {
+      const wordBox = span.getBoundingClientRect();
+      return [
+        wordBox.left - imageBox.left,
+        wordBox.top - imageBox.top,
+        wordBox.width,
+        wordBox.height,
+      ].map(coord => Math.floor(coord));
+    });
+
+    const imageWidth = parseInt(image.style.width);
+    const imageHeight = parseInt(image.style.height);
+
+    const expectedPositions = [
+      expectedBoxOffsetAndSize(imageWidth, imageHeight, 0, 'some'),
+      expectedBoxOffsetAndSize(imageWidth, imageHeight, 1, 'words'),
+      expectedBoxOffsetAndSize(imageWidth, imageHeight, 2, 'here'),
+    ];
+    assert.deepEqual(wordBoxPositions, expectedPositions);
+  });
+
+  it('updates size and position of text layer elements when window is resized', () => {
+    const { image } = createPageImage();
+    const imageText = 'some text in the image';
+
+    const clock = sinon.useFakeTimers();
+    try {
+      const textLayer = createTextLayer(
+        image,
+        createCharBoxes(imageText),
+        imageText
+      );
+
+      const originalBoxes = getWordBoxes(textLayer).map(box =>
+        box.getBoundingClientRect()
+      );
+
+      // Rescale image to 3/5 of original size.
+      image.style.width = '300px';
+      image.style.height = '300px';
+
+      // Notify text layer that image has been resized. We currently assume
+      // that this always corresponds to a window resize.
+      window.dispatchEvent(new Event('resize'));
+      clock.tick(100);
+
+      // Check that the positions and sizes of each text box were changed to
+      // reflect the new scale of the image.
+      const ratio = 3 / 5;
+      const imageBox = image.getBoundingClientRect();
+      const newBoxes = getWordBoxes(textLayer).map(box =>
+        box.getBoundingClientRect()
+      );
+
+      const tolerance = 0.01;
+      assert.equal(originalBoxes.length, newBoxes.length);
+      for (let [i, originalBox] of originalBoxes.entries()) {
+        const newBox = newBoxes[i];
+
+        const leftGap = originalBox.left - imageBox.left;
+        const newLeftGap = newBox.left - imageBox.left;
+        assert.approximately(leftGap * ratio, newLeftGap, tolerance);
+
+        const topGap = originalBox.top - imageBox.top;
+        const newTopGap = newBox.top - imageBox.top;
+        assert.approximately(topGap * ratio, newTopGap, tolerance);
+
+        assert.approximately(
+          originalBox.width * ratio,
+          newBox.width,
+          tolerance
+        );
+        assert.approximately(
+          originalBox.height * ratio,
+          newBox.height,
+          tolerance
+        );
+      }
+    } finally {
+      clock.restore();
+    }
+  });
+
+  describe('#destroy', () => {
+    it('removes the <hypothesis-text-layer> element', () => {
+      const { container, image } = createPageImage();
+      const imageText = 'some text in the image';
+      const textLayer = createTextLayer(
+        image,
+        createCharBoxes(imageText),
+        imageText
+      );
+
+      textLayer.destroy();
+
+      const textLayerEl = container.querySelector('hypothesis-text-layer');
+      assert.isNull(textLayerEl);
+    });
+  });
+});