Commit 23a681a9 authored by Randall Leeds's avatar Randall Leeds

Move PDF cache into pdf anchoring module

parent 4d0019c9
...@@ -7,6 +7,11 @@ html = require('./html') ...@@ -7,6 +7,11 @@ html = require('./html')
{TextPositionAnchor, TextQuoteAnchor} = require('./types') {TextPositionAnchor, TextQuoteAnchor} = require('./types')
# Caches for performance
pageTextCache = {}
quotePositionCache = {}
getSiblingIndex = (node) -> getSiblingIndex = (node) ->
siblings = Array.prototype.slice.call(node.parentNode.childNodes) siblings = Array.prototype.slice.call(node.parentNode.childNodes)
return siblings.indexOf(node) return siblings.indexOf(node)
...@@ -22,34 +27,35 @@ getPage = (pageIndex) -> ...@@ -22,34 +27,35 @@ getPage = (pageIndex) ->
return PDFViewerApplication.pdfViewer.pages[pageIndex] return PDFViewerApplication.pdfViewer.pages[pageIndex]
getPageTextContent = (pageIndex, cache) -> getPageTextContent = (pageIndex) ->
if cache[pageIndex]? if pageTextCache[pageIndex]?
return Promise.resolve(cache[pageIndex]) return Promise.resolve(pageTextCache[pageIndex])
else else
joinItems = ({items}) -> joinItems = ({items}) ->
nonEmpty = (item.str for item in items when /\S/.test(item.str)) nonEmpty = (item.str for item in items when /\S/.test(item.str))
textContent = nonEmpty.join('') textContent = nonEmpty.join('')
cache[pageIndex] = textContent pageTextCache[pageIndex] = textContent
return textContent return textContent
return PDFViewerApplication.pdfViewer.getPageTextContent(pageIndex) return PDFViewerApplication.pdfViewer.getPageTextContent(pageIndex)
.then(joinItems) .then(joinItems)
getPageOffset = (pageIndex, cache = {}) ->
getPageOffset = (pageIndex) ->
index = -1 index = -1
next = (offset) -> next = (offset) ->
if ++index is pageIndex if ++index is pageIndex
return Promise.resolve(offset) return Promise.resolve(offset)
return getPageTextContent(index, cache) return getPageTextContent(index)
.then((textContent) -> next(offset + textContent.length)) .then((textContent) -> next(offset + textContent.length))
return next(0) return next(0)
findPage = (offset, cache = {}) -> findPage = (offset) ->
index = 0 index = 0
total = 0 total = 0
...@@ -60,9 +66,9 @@ findPage = (offset, cache = {}) -> ...@@ -60,9 +66,9 @@ findPage = (offset, cache = {}) ->
else else
index++ index++
total += textContent.length total += textContent.length
return getPageTextContent(index, cache).then(count) return getPageTextContent(index).then(count)
return getPageTextContent(0, cache).then(count) return getPageTextContent(0).then(count)
###* ###*
...@@ -79,8 +85,6 @@ findPage = (offset, cache = {}) -> ...@@ -79,8 +85,6 @@ findPage = (offset, cache = {}) ->
# :rtype: Promise # :rtype: Promise
#### ####
exports.anchor = (root, selectors, options = {}) -> exports.anchor = (root, selectors, options = {}) ->
cache = options.cache ? {}
# Selectors # Selectors
position = null position = null
quote = null quote = null
...@@ -125,7 +129,7 @@ exports.anchor = (root, selectors, options = {}) -> ...@@ -125,7 +129,7 @@ exports.anchor = (root, selectors, options = {}) ->
if position? if position?
promise = promise.catch -> promise = promise.catch ->
return findPage(position.start, cache.pageText) return findPage(position.start)
.then ({index, offset, textContent}) -> .then ({index, offset, textContent}) ->
page = getPage(index) page = getPage(index)
start = position.start - offset start = position.start - offset
...@@ -140,14 +144,14 @@ exports.anchor = (root, selectors, options = {}) -> ...@@ -140,14 +144,14 @@ exports.anchor = (root, selectors, options = {}) ->
{pagesCount} = PDFViewerApplication.pdfViewer {pagesCount} = PDFViewerApplication.pdfViewer
if position? if position?
if cache.quotePosition[quote.exact]?[position.start]? if quotePositionCache[quote.exact]?[position.start]?
{page, anchor} = cache.quotePosition[quote.exact][position.start] {page, anchor} = quotePositionCache[quote.exact][position.start]
return anchorByPosition(page, anchor) return anchorByPosition(page, anchor)
findInPages = ([pageIndex, rest...]) -> findInPages = ([pageIndex, rest...]) ->
page = getPage(pageIndex) page = getPage(pageIndex)
content = getPageTextContent(pageIndex, cache.pageText) content = getPageTextContent(pageIndex)
offset = getPageOffset(pageIndex, cache.pageText) offset = getPageOffset(pageIndex)
Promise.all([content, offset, page]) Promise.all([content, offset, page])
.then (results) -> .then (results) ->
[content, offset, page] = results [content, offset, page] = results
...@@ -157,8 +161,8 @@ exports.anchor = (root, selectors, options = {}) -> ...@@ -157,8 +161,8 @@ exports.anchor = (root, selectors, options = {}) ->
pageOptions.hint = position.start - offset pageOptions.hint = position.start - offset
anchor = new TextQuoteAnchor.fromSelector(root, quote) anchor = new TextQuoteAnchor.fromSelector(root, quote)
anchor = anchor.toPositionAnchor(pageOptions) anchor = anchor.toPositionAnchor(pageOptions)
cache.quotePosition[quote.exact] ?= {} quotePositionCache[quote.exact] ?= {}
cache.quotePosition[quote.exact][position.start] = {page, anchor} quotePositionCache[quote.exact][position.start] = {page, anchor}
return anchorByPosition(page, anchor) return anchorByPosition(page, anchor)
.catch -> .catch ->
if rest.length if rest.length
...@@ -169,7 +173,7 @@ exports.anchor = (root, selectors, options = {}) -> ...@@ -169,7 +173,7 @@ exports.anchor = (root, selectors, options = {}) ->
pages = [0...pagesCount] pages = [0...pagesCount]
if position? if position?
return findPage(position.start, cache.pageText) return findPage(position.start)
.then ({index, offset, textContent}) -> .then ({index, offset, textContent}) ->
left = pages.slice(0, index) left = pages.slice(0, index)
right = pages.slice(index) right = pages.slice(index)
...@@ -187,7 +191,6 @@ exports.anchor = (root, selectors, options = {}) -> ...@@ -187,7 +191,6 @@ exports.anchor = (root, selectors, options = {}) ->
exports.describe = (root, range, options = {}) -> exports.describe = (root, range, options = {}) ->
cache = options.cache ? {}
range = new xpathRange.BrowserRange(range).normalize() range = new xpathRange.BrowserRange(range).normalize()
startTextLayer = getNodeTextLayer(range.start) startTextLayer = getNodeTextLayer(range.start)
...@@ -208,7 +211,7 @@ exports.describe = (root, range, options = {}) -> ...@@ -208,7 +211,7 @@ exports.describe = (root, range, options = {}) ->
start = seek(iter, range.start) start = seek(iter, range.start)
end = seek(iter, range.end) + start + range.end.textContent.length end = seek(iter, range.end) + start + range.end.textContent.length
return getPageOffset(startPageIndex, cache.pageText).then (pageOffset) -> return getPageOffset(startPageIndex).then (pageOffset) ->
# XXX: range covers only one page # XXX: range covers only one page
start += pageOffset start += pageOffset
end += pageOffset end += pageOffset
...@@ -222,3 +225,8 @@ exports.describe = (root, range, options = {}) -> ...@@ -222,3 +225,8 @@ exports.describe = (root, range, options = {}) ->
quote = TextQuoteAnchor.fromRange(root, r, options).toSelector(options) quote = TextQuoteAnchor.fromRange(root, r, options).toSelector(options)
return Promise.all([position, quote]) return Promise.all([position, quote])
exports.purgeCache = ->
pageTextCache = {}
quotePositionCache = {}
...@@ -9,21 +9,7 @@ class PDF extends Annotator.Plugin ...@@ -9,21 +9,7 @@ class PDF extends Annotator.Plugin
pdfViewer: null pdfViewer: null
pluginInit: -> pluginInit: ->
cache = { @annotator.anchoring = require('../anchoring/pdf')
pageText: {}
quotePosition: {}
}
anchoring = require('../anchoring/pdf')
@annotator.anchoring = {
anchor: (root, selectors, options = {}) ->
options = extend({}, options, {cache})
return anchoring.anchor(root, selectors, options)
describe: (root, range, options = {}) ->
options = extend({}, options, {cache})
return anchoring.describe(root, range, options)
}
@pdfViewer = PDFViewerApplication.pdfViewer @pdfViewer = PDFViewerApplication.pdfViewer
@pdfViewer.viewer.classList.add('has-transparent-text-layer') @pdfViewer.viewer.classList.add('has-transparent-text-layer')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment