Commit 166b60fa authored by Robert Knight's avatar Robert Knight

Avoid sanitizing HTML generated by KaTeX.

KaTeX escapes its own input and we trust it to generate safe HTML as
output. Passing HTML generated by KaTeX through ngSanitize can mangle
the result.  eg. The formatting of the infinity symbol is corrupted when
using this test block for example:

  $$\lim_{x \to \infty} \exp(-x) = 0$$

This commit rewrites the rendering logic to only sanitize the markdown
output from Showdown.

This additionally fixes an issue where text that _happened_ to also
be valid markdown in the rendered math would be rendered as markdown,
which is not what we want.
parent fedd429c
'use strict'; 'use strict';
var escapeHtml = require('escape-html');
var katex = require('katex'); var katex = require('katex');
var showdown = require('showdown'); var showdown = require('showdown');
...@@ -12,12 +13,6 @@ function targetBlank() { ...@@ -12,12 +13,6 @@ function targetBlank() {
var converter; var converter;
/**
* Render markdown to HTML.
*
* This function does *not* sanitize the HTML in any way, that is the caller's
* responsibility.
*/
function renderMarkdown(markdown) { function renderMarkdown(markdown) {
if (!converter) { if (!converter) {
// see https://github.com/showdownjs/showdown#valid-options // see https://github.com/showdownjs/showdown#valid-options
...@@ -34,55 +29,102 @@ function renderMarkdown(markdown) { ...@@ -34,55 +29,102 @@ function renderMarkdown(markdown) {
return converter.makeHtml(markdown); return converter.makeHtml(markdown);
} }
function mathPlaceholder(id) {
return '{math:' + id.toString() + '}';
}
/** /**
* Replaces inline math between '\(' and '\)' delimiters * Parses a string containing mixed markdown and LaTeX in between
* with math rendered by KaTeX. * '$$..$$' or '\( ... \)' delimiters and returns an object containing a
* list of math blocks found in the string, plus the input string with math
* blocks replaced by placeholders.
*/ */
function renderInlineMath(text) { function extractMath(content) {
var mathStart = text.indexOf('\\('); var mathBlocks = [];
if (mathStart !== -1) { var pos = 0;
var mathEnd = text.indexOf('\\)', mathStart); var replacedContent = content;
if (mathEnd !== -1) {
var markdownSection = text.slice(0, mathStart); while (true) {
var mathSection = text.slice(mathStart + 2, mathEnd); var blockMathStart = replacedContent.indexOf('$$', pos);
var renderedMath = katex.renderToString(mathSection); var inlineMathStart = replacedContent.indexOf('\\(', pos);
return markdownSection +
renderedMath + if (blockMathStart === -1 && inlineMathStart === -1) {
renderInlineMath(text.slice(mathEnd + 2)); break;
} }
var mathStart;
var mathEnd;
if (blockMathStart !== -1 &&
(inlineMathStart === -1 || blockMathStart < inlineMathStart)) {
mathStart = blockMathStart;
mathEnd = replacedContent.indexOf('$$', mathStart + 2);
} else {
mathStart = inlineMathStart;
mathEnd = replacedContent.indexOf('\\)', mathStart + 2);
} }
return text;
}
/** if (mathEnd === -1) {
* Renders mixed blocks of markdown and LaTeX to HTML. break;
* } else {
* LaTeX blocks are delimited by '$$' (for blocks) or '\(' and '\)' mathEnd = mathEnd + 2;
* (for inline math). }
*
* @param {string} text - The markdown and LaTeX to render var id = mathBlocks.length + 1;
* @param {(string) => string} $sanitize - A function that sanitizes HTML to var placeholder = mathPlaceholder(id);
* remove any potentially unsafe tokens (eg. <script> tags). mathBlocks.push({
* @return {string} The sanitized HTML id: id,
*/ expression: replacedContent.slice(mathStart + 2, mathEnd - 2),
function renderMathAndMarkdown(text, $sanitize) { inline: inlineMathStart !== -1,
var html = text.split('$$').map(function (part, index) { });
if (index % 2 === 0) {
// Plain markdown block var replacement;
return renderMarkdown(renderInlineMath(part)); if (inlineMathStart !== -1) {
replacement = placeholder;
} else { } else {
// Math block // Add new lines before and after math blocks so that they render
// as separate paragraphs
replacement = '\n\n' + placeholder + '\n\n';
}
replacedContent = replacedContent.slice(0, mathStart) +
replacement +
replacedContent.slice(mathEnd);
pos = mathStart + replacement.length;
}
return {
mathBlocks: mathBlocks,
content: replacedContent,
};
}
function insertMath(html, mathBlocks) {
return mathBlocks.reduce(function (html, block) {
var renderedMath;
try { try {
// \\displaystyle tells KaTeX to render the math in display style if (block.inline) {
// (full sized fonts). renderedMath = katex.renderToString(block.expression);
return katex.renderToString("\\displaystyle {" + part + "}"); } else {
} catch (error) { // '\displaystyle {}' results in full-height fonts being used
return part; // for blocks.
renderedMath = katex.renderToString('\\displaystyle {' + block.expression + '}');
} }
} catch (err) {
renderedMath = escapeHtml(block.expression);
} }
}).join(''); return html.replace(mathPlaceholder(block.id), renderedMath);
}, html);
}
return $sanitize(html); function renderMathAndMarkdown(markdown, sanitizeFn) {
// KaTeX takes care of escaping its input, so we want to avoid passing its
// output through the HTML sanitizer. Therefore we first extract the math
// blocks from the input, render and sanitize the remaining markdown and then
// render and re-insert the math blocks back into the output.
var mathInfo = extractMath(markdown);
var markdownHTML = sanitizeFn(renderMarkdown(mathInfo.content));
var mathAndMarkdownHTML = insertMath(markdownHTML, mathInfo.mathBlocks);
return mathAndMarkdownHTML;
} }
module.exports = renderMathAndMarkdown; module.exports = renderMathAndMarkdown;
...@@ -6,6 +6,14 @@ describe('render-markdown', function () { ...@@ -6,6 +6,14 @@ describe('render-markdown', function () {
var render; var render;
var renderMarkdown; var renderMarkdown;
function fakeSanitize(html) {
return '{safe}' + html + '{/safe}';
}
function noopSanitize(html) {
return html;
}
beforeEach(function () { beforeEach(function () {
renderMarkdown = proxyquire('../render-markdown', { renderMarkdown = proxyquire('../render-markdown', {
katex: { katex: {
...@@ -14,8 +22,9 @@ describe('render-markdown', function () { ...@@ -14,8 +22,9 @@ describe('render-markdown', function () {
}, },
}, },
}); });
render = function (markdown) { render = function (markdown, sanitizeFn) {
return renderMarkdown(markdown, function (html) { return html; }); sanitizeFn = sanitizeFn || noopSanitize;
return renderMarkdown(markdown, sanitizeFn);
}; };
}); });
...@@ -43,23 +52,33 @@ describe('render-markdown', function () { ...@@ -43,23 +52,33 @@ describe('render-markdown', function () {
}); });
it('should sanitize the result', function () { it('should sanitize the result', function () {
var sanitize = function (html) { assert.equal(renderMarkdown('one **two** three', fakeSanitize),
return '<safe>' + html + '</safe>'; '{safe}<p>one <strong>two</strong> three</p>{/safe}');
};
assert.equal(renderMarkdown('one **two** three', sanitize),
'<safe><p>one <strong>two</strong> three</p></safe>');
}); });
}); });
describe('math blocks', function () { describe('math blocks', function () {
it('should render LaTeX blocks', function () { it('should render LaTeX blocks', function () {
assert.equal(render('$$x*2$$'), 'math:\\displaystyle {x*2}'); assert.equal(render('$$x*2$$'), '<p>math:\\displaystyle {x*2}</p>');
}); });
it('should render mixed blocks', function () { it('should render mixed blocks', function () {
assert.equal(render('one $$x*2$$ two $$x*3$$ three'), assert.equal(render('one $$x*2$$ two $$x*3$$ three'),
'<p>one </p>math:\\displaystyle {x*2}<p>two </p>' + '<p>one </p>\n\n<p>math:\\displaystyle {x*2}</p>\n\n' +
'math:\\displaystyle {x*3}<p>three</p>'); '<p>two </p>\n\n<p>math:\\displaystyle {x*3}</p>\n\n<p>three</p>');
});
it('should not sanitize math renderer output', function () {
var fakeSanitize = function (html) {
return html.toLowerCase();
};
assert.equal(render('$$X*2$$ FOO', fakeSanitize),
'<p>math:\\displaystyle {X*2}</p>\n\n<p>foo</p>');
});
it('should render mixed inline and block math', function () {
assert.equal(render('one \\(x*2\\) three $$x*3$$'),
'<p>one math:x*2 three </p>\n\n<p>math:\\displaystyle {x*3}</p>');
}); });
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment