Commit 166b60fa authored by Robert Knight's avatar Robert Knight

Avoid sanitizing HTML generated by KaTeX.

KaTeX escapes its own input and we trust it to generate safe HTML as
output. Passing HTML generated by KaTeX through ngSanitize can mangle
the result.  eg. The formatting of the infinity symbol is corrupted when
using this test block for example:

  $$\lim_{x \to \infty} \exp(-x) = 0$$

This commit rewrites the rendering logic to only sanitize the markdown
output from Showdown.

This additionally fixes an issue where text that _happened_ to also
be valid markdown in the rendered math would be rendered as markdown,
which is not what we want.
parent fedd429c
'use strict';
var escapeHtml = require('escape-html');
var katex = require('katex');
var showdown = require('showdown');
......@@ -12,12 +13,6 @@ function targetBlank() {
var converter;
/**
* Render markdown to HTML.
*
* This function does *not* sanitize the HTML in any way, that is the caller's
* responsibility.
*/
function renderMarkdown(markdown) {
if (!converter) {
// see https://github.com/showdownjs/showdown#valid-options
......@@ -34,55 +29,102 @@ function renderMarkdown(markdown) {
return converter.makeHtml(markdown);
}
function mathPlaceholder(id) {
return '{math:' + id.toString() + '}';
}
/**
* Replaces inline math between '\(' and '\)' delimiters
* with math rendered by KaTeX.
* Parses a string containing mixed markdown and LaTeX in between
* '$$..$$' or '\( ... \)' delimiters and returns an object containing a
* list of math blocks found in the string, plus the input string with math
* blocks replaced by placeholders.
*/
function renderInlineMath(text) {
var mathStart = text.indexOf('\\(');
if (mathStart !== -1) {
var mathEnd = text.indexOf('\\)', mathStart);
if (mathEnd !== -1) {
var markdownSection = text.slice(0, mathStart);
var mathSection = text.slice(mathStart + 2, mathEnd);
var renderedMath = katex.renderToString(mathSection);
return markdownSection +
renderedMath +
renderInlineMath(text.slice(mathEnd + 2));
function extractMath(content) {
var mathBlocks = [];
var pos = 0;
var replacedContent = content;
while (true) {
var blockMathStart = replacedContent.indexOf('$$', pos);
var inlineMathStart = replacedContent.indexOf('\\(', pos);
if (blockMathStart === -1 && inlineMathStart === -1) {
break;
}
var mathStart;
var mathEnd;
if (blockMathStart !== -1 &&
(inlineMathStart === -1 || blockMathStart < inlineMathStart)) {
mathStart = blockMathStart;
mathEnd = replacedContent.indexOf('$$', mathStart + 2);
} else {
mathStart = inlineMathStart;
mathEnd = replacedContent.indexOf('\\)', mathStart + 2);
}
if (mathEnd === -1) {
break;
} else {
mathEnd = mathEnd + 2;
}
var id = mathBlocks.length + 1;
var placeholder = mathPlaceholder(id);
mathBlocks.push({
id: id,
expression: replacedContent.slice(mathStart + 2, mathEnd - 2),
inline: inlineMathStart !== -1,
});
var replacement;
if (inlineMathStart !== -1) {
replacement = placeholder;
} else {
// Add new lines before and after math blocks so that they render
// as separate paragraphs
replacement = '\n\n' + placeholder + '\n\n';
}
replacedContent = replacedContent.slice(0, mathStart) +
replacement +
replacedContent.slice(mathEnd);
pos = mathStart + replacement.length;
}
return text;
return {
mathBlocks: mathBlocks,
content: replacedContent,
};
}
/**
* Renders mixed blocks of markdown and LaTeX to HTML.
*
* LaTeX blocks are delimited by '$$' (for blocks) or '\(' and '\)'
* (for inline math).
*
* @param {string} text - The markdown and LaTeX to render
* @param {(string) => string} $sanitize - A function that sanitizes HTML to
* remove any potentially unsafe tokens (eg. <script> tags).
* @return {string} The sanitized HTML
*/
function renderMathAndMarkdown(text, $sanitize) {
var html = text.split('$$').map(function (part, index) {
if (index % 2 === 0) {
// Plain markdown block
return renderMarkdown(renderInlineMath(part));
} else {
// Math block
try {
// \\displaystyle tells KaTeX to render the math in display style
// (full sized fonts).
return katex.renderToString("\\displaystyle {" + part + "}");
} catch (error) {
return part;
function insertMath(html, mathBlocks) {
return mathBlocks.reduce(function (html, block) {
var renderedMath;
try {
if (block.inline) {
renderedMath = katex.renderToString(block.expression);
} else {
// '\displaystyle {}' results in full-height fonts being used
// for blocks.
renderedMath = katex.renderToString('\\displaystyle {' + block.expression + '}');
}
} catch (err) {
renderedMath = escapeHtml(block.expression);
}
}).join('');
return html.replace(mathPlaceholder(block.id), renderedMath);
}, html);
}
return $sanitize(html);
function renderMathAndMarkdown(markdown, sanitizeFn) {
// KaTeX takes care of escaping its input, so we want to avoid passing its
// output through the HTML sanitizer. Therefore we first extract the math
// blocks from the input, render and sanitize the remaining markdown and then
// render and re-insert the math blocks back into the output.
var mathInfo = extractMath(markdown);
var markdownHTML = sanitizeFn(renderMarkdown(mathInfo.content));
var mathAndMarkdownHTML = insertMath(markdownHTML, mathInfo.mathBlocks);
return mathAndMarkdownHTML;
}
module.exports = renderMathAndMarkdown;
......@@ -6,6 +6,14 @@ describe('render-markdown', function () {
var render;
var renderMarkdown;
function fakeSanitize(html) {
return '{safe}' + html + '{/safe}';
}
function noopSanitize(html) {
return html;
}
beforeEach(function () {
renderMarkdown = proxyquire('../render-markdown', {
katex: {
......@@ -14,8 +22,9 @@ describe('render-markdown', function () {
},
},
});
render = function (markdown) {
return renderMarkdown(markdown, function (html) { return html; });
render = function (markdown, sanitizeFn) {
sanitizeFn = sanitizeFn || noopSanitize;
return renderMarkdown(markdown, sanitizeFn);
};
});
......@@ -43,23 +52,33 @@ describe('render-markdown', function () {
});
it('should sanitize the result', function () {
var sanitize = function (html) {
return '<safe>' + html + '</safe>';
};
assert.equal(renderMarkdown('one **two** three', sanitize),
'<safe><p>one <strong>two</strong> three</p></safe>');
assert.equal(renderMarkdown('one **two** three', fakeSanitize),
'{safe}<p>one <strong>two</strong> three</p>{/safe}');
});
});
describe('math blocks', function () {
it('should render LaTeX blocks', function () {
assert.equal(render('$$x*2$$'), 'math:\\displaystyle {x*2}');
assert.equal(render('$$x*2$$'), '<p>math:\\displaystyle {x*2}</p>');
});
it('should render mixed blocks', function () {
assert.equal(render('one $$x*2$$ two $$x*3$$ three'),
'<p>one </p>math:\\displaystyle {x*2}<p>two </p>' +
'math:\\displaystyle {x*3}<p>three</p>');
'<p>one </p>\n\n<p>math:\\displaystyle {x*2}</p>\n\n' +
'<p>two </p>\n\n<p>math:\\displaystyle {x*3}</p>\n\n<p>three</p>');
});
it('should not sanitize math renderer output', function () {
var fakeSanitize = function (html) {
return html.toLowerCase();
};
assert.equal(render('$$X*2$$ FOO', fakeSanitize),
'<p>math:\\displaystyle {X*2}</p>\n\n<p>foo</p>');
});
it('should render mixed inline and block math', function () {
assert.equal(render('one \\(x*2\\) three $$x*3$$'),
'<p>one math:x*2 three </p>\n\n<p>math:\\displaystyle {x*3}</p>');
});
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment