Commit aea38355 authored by Lyza Danger Gardner's avatar Lyza Danger Gardner Committed by Lyza Gardner

Add utility to trim text-content Ranges

Remove any leading or trailing whitespace and/or nodes from the supplied
Range. The resulting range starts exactly at the first non-whitespace
character in the Range and ends exactly at the last non-whitespace
character in the Range.

`trimRange` assumes the provided Range starts and ends on a text node
and will throw otherwise.
parent b3d76b42
import { textNodes } from '../../../test-util/compare-dom';
import { trimRange } from '../trim-range';
import { TextRange } from '../text-range';
describe('annotator/anchoring/trim-range', () => {
let container;
const htmlContent = `
<div id="A">
<div id="B">&#8233;z<p id="C"><span id="D">&nbsp; charm&nbsp;</span> a </p> </div>
<div id="E">
<p id="F"> <span id="G">&nbsp;<span id="H"><!-- comment -->f </span></span></p>
</div>
<div id="I"></div>
</div>
<div id="J"> <span id="K"> t </span><div id="L"></div><div id="M">&nbsp;<div> </div>
`;
afterEach(() => {
container?.remove();
});
const addContent = innerHTML => {
container = document.createElement('div');
container.innerHTML = innerHTML;
document.body.appendChild(container);
return container;
};
const addTextContent = text => {
const wrappingNode = document.createElement('p');
wrappingNode.innerHTML = text;
document.body.appendChild(wrappingNode);
return wrappingNode;
};
/**
* Return a Range that selects text content in `container` from `startOffset`
* to `endOffset`. Will select entire textContent of `container` if no offsets
* are provided.
*
* @param {Element} container
* @param {number} [startOffset] - or 0 if not present
* @param {number} [endOffset] - or length of text content if not present
*/
const rangeFromOffsets = (container, startOffset, endOffset) => {
return TextRange.fromOffsets(
container,
startOffset ?? 0,
endOffset ?? container.textContent.length
).toRange();
};
/**
* Return a Range that starts before `startElement` and ends after
* `endElement`. The returned Range will start and end on a Text node.
*/
const textRangeBetween = (startElement, endElement) => {
const range = new Range();
range.setStartBefore(startElement);
range.setEndAfter(endElement);
return TextRange.fromRange(range).toRange();
};
describe('Trimming by adjusting offsets within existing start and end containers', () => {
[
['Non-whitespace', 0, 0],
['&nbsp;One whitespace', 0, 1],
['&nbsp;&#8194;Multiple whitespace', 0, 2],
['Internal whitespace', 8, 9],
['Internal&#5760; whitespace', 8, 10],
].forEach(([testContent, initialStartOffset, expectedTrimmedOffset]) => {
it('trims start offset forward until following character is non-whitespace', () => {
container = addTextContent(testContent);
const textRange = rangeFromOffsets(container, initialStartOffset);
const trimmedRange = trimRange(textRange);
assert.equal(textRange.startContainer, trimmedRange.startContainer);
assert.equal(trimmedRange.startOffset, expectedTrimmedOffset);
});
});
[
['End', 3, 3],
['End ', 4, 3],
['End &nbsp;&#8197; ', 6, 3],
['Mid space', 4, 3],
['Mid &nbsp;&#8197;spaces', 6, 3],
].forEach(([testContent, initialEndOffset, trimmedEndOffset]) => {
it('trims end offset back until preceding character is non-whitespace', () => {
container = addTextContent(testContent);
const textRange = rangeFromOffsets(container, 0, initialEndOffset);
const trimmedRange = trimRange(textRange);
assert.equal(textRange.endContainer, trimmedRange.endContainer);
assert.equal(trimmedRange.endOffset, trimmedEndOffset);
});
});
});
describe('Trimming by changing start or end containers and their offsets', () => {
[
{
range: { from: '#A' },
expected: { start: '#B', offset: 1, char: 'z' },
},
{
range: { from: '#B' },
expected: { start: '#B', offset: 1, char: 'z' },
},
{
range: { from: '#C' },
expected: { start: '#D', offset: 2, char: 'c' },
},
{
range: { from: '#D' },
expected: { start: '#D', offset: 2, char: 'c' },
},
{
range: { from: '#E' },
expected: { start: '#H', offset: 0, char: 'f' },
},
{
range: { from: '#F' },
expected: { start: '#H', offset: 0, char: 'f' },
},
{
range: { from: '#G' },
expected: { start: '#H', offset: 0, char: 'f' },
},
{
range: { from: '#I', to: '#J' },
expected: { start: '#K', offset: 1, char: 't' },
},
{
range: { from: '#I', to: '#L' },
expected: { start: '#K', offset: 1, char: 't' },
},
].forEach(({ range, expected }) => {
it('moves start position forward to first non-whitespace character in range', () => {
container = addContent(htmlContent);
const textRange = textRangeBetween(
document.querySelector(range.from),
document.querySelector(range.to ?? range.from)
);
const trimmedRange = trimRange(textRange);
assert.equal(
trimmedRange.startContainer,
textNodes(document.querySelector(expected.start))[0],
`Trimmed startContainer is first text node inside of ${expected.start}`
);
assert.equal(trimmedRange.startOffset, expected.offset);
assert.equal(
trimmedRange.startContainer.textContent.charAt(
trimmedRange.startOffset
),
expected.char,
`First character at trimmed start position is ${expected.char}`
);
});
});
[
{
range: { from: '#A' },
expected: { end: '#H', offset: 1, char: 'f' },
},
{
range: { from: '#B' },
expected: { end: '#C', offset: 2, char: 'a' },
},
{
range: { from: '#C' },
expected: { end: '#C', offset: 2, char: 'a' },
},
{
range: { from: '#D' },
expected: { end: '#D', offset: 7, char: 'm' },
},
{
range: { from: '#E' },
expected: { end: '#H', offset: 1, char: 'f' },
},
{
range: { from: '#F' },
expected: { end: '#H', offset: 1, char: 'f' },
},
{
range: { from: '#G' },
expected: { end: '#H', offset: 1, char: 'f' },
},
{
range: { from: '#H' },
expected: { end: '#H', offset: 1, char: 'f' },
},
{
range: { from: '#A', to: '#J' },
expected: { end: '#K', offset: 2, char: 't' },
},
].forEach(({ range, expected }) => {
it('moves end position backward to nearest non-whitespace character', () => {
container = addContent(htmlContent);
const textRange = textRangeBetween(
document.querySelector(range.from),
document.querySelector(range.to ?? range.from)
);
const trimmedRange = trimRange(textRange);
const endTextNodes = textNodes(document.querySelector(expected.end));
assert.equal(
trimmedRange.endContainer,
endTextNodes[endTextNodes.length - 1],
`Trimmed endContainer is last text node inside of ${expected.end}`
);
assert.equal(trimmedRange.endOffset, expected.offset);
assert.equal(
trimmedRange.endContainer.textContent.charAt(
trimmedRange.endOffset - 1
),
expected.char,
`Character before trimmed end position is ${expected.char}`
);
});
});
});
it('throws if the range contains no non-whitespace text content', () => {
container = addContent('<p id="empty">&nbsp;&nbsp;<p>');
const textRange = textRangeBetween(
document.querySelector('#empty'),
document.querySelector('#empty')
);
assert.throws(
() => trimRange(textRange),
RangeError,
'Range contains no non-whitespace text'
);
});
it('throws if the range does not start or end on a text node', () => {
container = addContent('<p id="notText">Some text<p>');
const pElement = document.querySelector('p#notText');
const range = new Range();
range.setStartBefore(pElement);
range.setEndAfter(pElement);
assert.throws(
() => trimRange(range),
RangeError,
'Range startContainer is not a text node'
);
const pTextNode = textNodes(pElement)[0];
range.setStart(pTextNode, 0);
assert.throws(
() => trimRange(range),
RangeError,
'Range endContainer is not a text node'
);
});
});
/**
* From which direction to evaluate strings or nodes: from the start of a string
* or range seeking Forwards, or from the end seeking Backwards.
*/
enum TrimDirection {
Forwards = 1,
Backwards,
}
/**
* An object representing metadata for a Range position (e.g. for use with
* Range.setStart or Range.setEnd)
*/
type RangePosition = {
offset: number;
node: Node;
};
/**
* Return the offset of the nearest non-whitespace character to `baseOffset`
* within the string `text`, looking in the `direction` indicated. Return -1 if
* no non-whitespace character exists between `baseOffset` (inclusive) and the
* terminus of the string (start or end depending on `direction`).
*/
function closestNonSpaceInString(
text: string,
baseOffset: number,
direction: TrimDirection
): number {
const nextChar =
direction === TrimDirection.Forwards ? baseOffset : baseOffset - 1;
if (text.charAt(nextChar).trim() !== '') {
// baseOffset is already valid: it points at a non-whitespace character
return baseOffset;
}
let availableChars: string;
let availableNonWhitespaceChars: string;
if (direction === TrimDirection.Backwards) {
availableChars = text.substring(0, baseOffset);
availableNonWhitespaceChars = availableChars.trimEnd();
} else {
availableChars = text.substring(baseOffset);
availableNonWhitespaceChars = availableChars.trimStart();
}
if (!availableNonWhitespaceChars.length) {
return -1;
}
const offsetDelta =
availableChars.length - availableNonWhitespaceChars.length;
return direction === TrimDirection.Backwards
? baseOffset - offsetDelta
: baseOffset + offsetDelta;
}
/**
* Calculate a new Range start position (TrimDirection.Forwards) or end position
* (Backwards) for `range` that represents the nearest non-whitespace character,
* moving into the `range` away from the relevant initial boundary node towards
* the terminating boundary node.
*
* @throws {RangeError} If no text node with non-whitespace characters found
*/
function closestNonSpaceInRange(
range: Range,
direction: TrimDirection
): RangePosition {
const nodeIter =
range.commonAncestorContainer.ownerDocument!.createNodeIterator(
range.commonAncestorContainer,
NodeFilter.SHOW_TEXT
);
const initialBoundaryNode =
direction === TrimDirection.Forwards
? range.startContainer
: range.endContainer;
const terminalBoundaryNode =
direction === TrimDirection.Forwards
? range.endContainer
: range.startContainer;
let currentNode = nodeIter.nextNode();
// Advance the NodeIterator to the `initialBoundaryNode`
while (currentNode && currentNode !== initialBoundaryNode) {
currentNode = nodeIter.nextNode();
}
if (direction === TrimDirection.Backwards) {
// Reverse the NodeIterator direction. This will return the same node
// as the previous `nextNode()` call (initial boundary node).
currentNode = nodeIter.previousNode();
}
let trimmedOffset = -1;
const advance = () => {
currentNode =
direction === TrimDirection.Forwards
? nodeIter.nextNode()
: nodeIter.previousNode();
if (currentNode) {
const nodeText = currentNode.textContent!;
const baseOffset =
direction === TrimDirection.Forwards ? 0 : nodeText.length;
trimmedOffset = closestNonSpaceInString(nodeText, baseOffset, direction);
}
};
while (
currentNode &&
trimmedOffset === -1 &&
currentNode !== terminalBoundaryNode
) {
advance();
}
if (currentNode && trimmedOffset >= 0) {
return { node: currentNode, offset: trimmedOffset };
}
/* istanbul ignore next */
throw new RangeError('No text nodes with non-whitespace text found in range');
}
/**
* Return a new DOM Range that adjusts the start and end positions of `range` as
* needed such that:
*
* - `startContainer` and `endContainer` text nodes both contain at least one
* non-whitespace character within the Range's text content
* - `startOffset` and `endOffset` both reference non-whitespace characters,
* with `startOffset` immediately before the first non-whitespace character
* and `endOffset` immediately after the last
*
* Whitespace characters are those that are removed by `String.prototype.trim()`
*
* @param range - A DOM Range that whose `startContainer` and `endContainer` are
* both text nodes, and which contains at least one non-whitespace character.
* @throws {RangeError}
*/
export function trimRange(range: Range): Range {
if (!range.toString().trim().length) {
throw new RangeError('Range contains no non-whitespace text');
}
if (range.startContainer.nodeType !== Node.TEXT_NODE) {
throw new RangeError('Range startContainer is not a text node');
}
if (range.endContainer.nodeType !== Node.TEXT_NODE) {
throw new RangeError('Range endContainer is not a text node');
}
const trimmedRange = range.cloneRange();
let startTrimmed = false;
let endTrimmed = false;
const trimmedOffsets = {
start: closestNonSpaceInString(
range.startContainer.textContent!,
range.startOffset,
TrimDirection.Forwards
),
end: closestNonSpaceInString(
range.endContainer.textContent!,
range.endOffset,
TrimDirection.Backwards
),
};
if (trimmedOffsets.start >= 0) {
trimmedRange.setStart(range.startContainer, trimmedOffsets.start);
startTrimmed = true;
}
// Note: An offset of 0 is invalid for an end offset, as no text in the
// node would be included in the range.
if (trimmedOffsets.end > 0) {
trimmedRange.setEnd(range.endContainer, trimmedOffsets.end);
endTrimmed = true;
}
if (startTrimmed && endTrimmed) {
return trimmedRange;
}
if (!startTrimmed) {
// There are no (non-whitespace) characters between `startOffset` and the
// end of the `startContainer` node.
const { node, offset } = closestNonSpaceInRange(
trimmedRange,
TrimDirection.Forwards
);
if (node && offset >= 0) {
trimmedRange.setStart(node, offset);
}
}
if (!endTrimmed) {
// There are no (non-whitespace) characters between the start of the Range's
// `endContainer` text content and the `endOffset`.
const { node, offset } = closestNonSpaceInRange(
trimmedRange,
TrimDirection.Backwards
);
if (node && offset > 0) {
trimmedRange.setEnd(node, offset);
}
}
return trimmedRange;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment