Commit e2f8fd13 authored by csillag's avatar csillag

Added PDF support - we can now annotate PDF documents

 * Update dom-text-mapper to fcd26632 (master branch)
 * Update dom-text-matcher to fcd26632 (master branch)
 * Update Annotator to f1e08bdd (maintenance branch)

 * Updated assets.py for the changed Annotator files and libraries
 * Updated JSChannel with a patch to allow talking to pdf.js
 * Updated our code to follow Annotator API changess
 * Updated the heatmap for two-phase anchoring and virtual anchors

The change in embed.txt fixes a sporadic code insertation problem.

For pages with no script tags, our embed script sometimes crashed.
I have already fixed this problem for YepNope (see #607), but now
it has come back in our own code.

I am not sure why have not this happened before,
but it definitely was happening now.

I applied the same fix that worked with YepNope. It worked again.
parent 686fe9cf
......@@ -12,6 +12,8 @@ class Annotator.Guest extends Annotator
# Plugin configuration
options:
FuzzyAnchoring: {}
PDF: {}
Document: {}
# Internal state
......@@ -40,7 +42,7 @@ class Annotator.Guest extends Annotator
formatted = {}
if annotation.document?
formatted['uri'] = @plugins.Document.uri()
for k, v of annotation when k isnt 'highlights'
for k, v of annotation when k not in ['highlights', 'anchors']
formatted[k] = v
# Work around issue in jschannel where a repeated object is considered
# recursive, even if it is not its own ancestor.
......@@ -65,6 +67,14 @@ class Annotator.Guest extends Annotator
if not @plugins[name]
this.addPlugin(name, opts)
this.subscribe "annotationPhysicallyAnchored", (anchor) =>
if anchor.annotation.id? # Is this a finished annotation ?
@plugins.Heatmap._update()
this.subscribe "annotationPhysicallyUnAnchored", (anchor) =>
if anchor.annotation.id? # Is this a finished annotation ?
@plugins.Heatmap._update()
# Scan the document text with the DOM Text libraries
this.scanDocument "Annotator initialized"
......@@ -135,9 +145,7 @@ class Annotator.Guest extends Annotator
scanDocument: (reason = "something happened") =>
try
console.log "Analyzing host frame, because " + reason + "..."
r = this._scan()
scanTime = r.time
console.log "Traversal+scan took " + scanTime + " ms."
this._scan()
catch e
console.log e.message
console.log e.stack
......@@ -149,8 +157,6 @@ class Annotator.Guest extends Annotator
setTimeout =>
unless @selectedRanges?.length
@panel?.notify method: 'back'
this._setupMatching()
@domMatcher.setRootNode @wrapper[0]
this
# These methods aren't used in the iframe-hosted configuration of Annotator.
......@@ -342,11 +348,7 @@ class Annotator.Guest extends Annotator
# Uncomment the traces below to investigate this further.
deleteAnnotation: (annotation) ->
if annotation.deleted
# console.log "Not deleting annotation the second time."
# console.trace()
return
else
# console.log "Deleting an annotation in " + @role + "."
# console.trace()
annotation.deleted = true
super
......@@ -115,6 +115,32 @@ class Annotator.Plugin.Heatmap extends Annotator.Plugin
.interpolate(d3.interpolateHcl)
c(v).toString()
_collectPendingVirtualAnnotations: (startIndex, endIndex) ->
results = []
for index in [startIndex .. endIndex]
anchors = @annotator.anchors[index]
if anchors?
$.merge results, (anchor.annotation for anchor in anchors when not anchor.allRendered)
results
_scrollTo: (where, up) ->
wrapper = @annotator.wrapper
defaultView = wrapper[0].ownerDocument.defaultView
pad = defaultView.innerHeight * .2
where?.scrollintoview
complete: ->
if this.parentNode is this.ownerDocument
scrollable = $(this.ownerDocument.body)
else
scrollable = $(this)
top = scrollable.scrollTop()
correction = pad * (if up then -1 else +1)
scrollable.stop().animate {scrollTop: top + correction}, 300
_scrollUpTo: (where) -> this._scrollTo where, true
_scrollDownTo: (where) -> this._scrollTo where, false
_update: =>
wrapper = @annotator.wrapper
highlights = wrapper.find('.annotator-hl')
......@@ -123,6 +149,17 @@ class Annotator.Plugin.Heatmap extends Annotator.Plugin
# Keep track of buckets of annotations above and below the viewport
above = []
below = []
# Get the page numbers
mapper = @annotator.domMapper
firstPage = 0
currentPage = mapper.getPageIndex()
lastPage = mapper.getPageCount() - 1
# Collect the pending virtual anchors from above and below
$.merge above, this._collectPendingVirtualAnnotations 0, currentPage-1
$.merge below, this._collectPendingVirtualAnnotations currentPage+1, lastPage
comments = @annotator.comments.slice()
# Construct control points for the heatmap highlights
......@@ -322,50 +359,54 @@ class Annotator.Plugin.Heatmap extends Annotator.Plugin
# Does one of a few things when a tab is clicked depending on type
.on 'click', (bucket) =>
d3.event.stopPropagation()
highlights = wrapper.find('.annotator-hl')
pad = defaultView.innerHeight * .2
# If it's the upper tab, scroll to next bucket above
if @isUpper bucket
# If it's the upper tab, scroll to next bucket above (virtual version)
if (@isUpper bucket)
@dynamicBucket = true
threshold = defaultView.pageYOffset
{next} = highlights.toArray().reduce (acc, hl) ->
{pos, next} = acc
if pos < $(hl).offset().top < threshold
pos: $(hl).offset().top
next: $(hl)
# Find the next annotation, based on character position
{next} = @buckets[bucket].reduce (acc, ann) ->
{start, next} = acc
if start < ann.anchors[0].virtual.start
start: ann.anchors[0].virtual.start
next: ann
else
acc
, {pos: 0, next: null}
next?.scrollintoview
complete: ->
if this.parentNode is this.ownerDocument
scrollable = $(this.ownerDocument.body)
else
scrollable = $(this)
top = scrollable.scrollTop()
scrollable.stop().animate {scrollTop: top - pad}, 300
# If it's the lower tab, scroll to next bucket below
else if @isLower bucket
, {start: 0, next: null}
anchor = next.anchors[0] # This is where we want to go
startPage = anchor.virtual.startPage
if anchor.physical[startPage]? # Is this rendered?
hl = anchor.physical[startPage].highlights
this._scrollUpTo $(hl)
else # Not rendered yet
@pendingScroll =
anchor: anchor
page: startPage
@annotator.domMapper.setPageIndex startPage
# If it's the lower tab, scroll to next bucket below (virtual version)
else if (@isLower bucket)
@dynamicBucket = true
threshold = defaultView.pageYOffset + defaultView.innerHeight - pad
{next} = highlights.toArray().reduce (acc, hl) ->
{pos, next} = acc
if threshold < $(hl).offset().top < pos
pos: $(hl).offset().top
next: $(hl)
# Find the next annotation, based on character position
{next} = @buckets[bucket].reduce (acc, ann) ->
{start, next} = acc
if ann.anchors[0].virtual.start < start
start: ann.anchors[0].virtual.start
next: ann
else
acc
, {pos: Number.MAX_VALUE, next: null}
next?.scrollintoview
complete: ->
if this.parentNode is this.ownerDocument
scrollable = $(this.ownerDocument.body)
else
scrollable = $(this)
top = scrollable.scrollTop()
scrollable.stop().animate {scrollTop: top + pad}, 300
, {start: @annotator.domMapper.getCorpus().length, next: null}
anchor = next.anchors[0] # This is where we want to go
startPage = anchor.virtual.startPage
if anchor.physical[startPage]? # Is this rendered?
hl = anchor.physical[startPage].highlights
this._scrollDownTo $(hl)
else # Not rendered yet
# Pass this value to our listener
@pendingScroll =
anchor: anchor
page: startPage
@annotator.domMapper.setPageIndex startPage
# If it's neither of the above, load the bucket into the viewer
else
......@@ -392,6 +433,14 @@ class Annotator.Plugin.Heatmap extends Annotator.Plugin
if @dynamicBucket
this._fillDynamicBucket()
# Event handler to finish scrolling when we have to wait for rendering
@annotator.subscribe "annotationPhysicallyAnchored", (anchor) =>
if @pendingScroll? and anchor is @pendingScroll.anchor
# The wanted annotation has been anchored.
hl = anchor.physical[@pendingScroll.page].highlights
this._scrollDownTo $(hl)
delete @pendingScroll
_fillDynamicBucket: =>
top = window.pageYOffset
bottom = top + $(window).innerHeight()
......
# Annotator plugin for fuzzy text matching
class Annotator.Plugin.FuzzyAnchoring extends Annotator.Plugin
pluginInit: ->
# Initialize the text matcher library
@textFinder = new DomTextMatcher => @annotator.domMapper.getCorpus()
# Register our fuzzy strategies
@annotator.virtualAnchoringStrategies.push
# Two-phased fuzzy text matching strategy. (Using context and quote.)
# This can handle document structure changes,
# and also content changes.
name: "two-phase fuzzy"
code: this.createVirtualAnchorWithTwoPhaseFuzzyMatching
@annotator.virtualAnchoringStrategies.push
# Naive fuzzy text matching strategy. (Using only the quote.)
# This can handle document structure changes,
# and also content changes.
name: "one-phase fuzzy"
code: this.createVirtualAnchorWithFuzzyMatching
createVirtualAnchorWithTwoPhaseFuzzyMatching: (target) =>
# Fetch the quote and the context
quoteSelector = @annotator.findSelector target.selector, "TextQuoteSelector"
prefix = quoteSelector?.prefix
suffix = quoteSelector?.suffix
quote = quoteSelector?.exact
# No context, to joy
unless (prefix? and suffix?) then return null
# Fetch the expected start and end positions
posSelector = @annotator.findSelector target.selector, "TextPositionSelector"
expectedStart = posSelector?.start
expectedEnd = posSelector?.end
options =
contextMatchDistance: @annotator.domMapper.getCorpus().length * 2
contextMatchThreshold: 0.5
patternMatchThreshold: 0.5
flexContext: true
result = @textFinder.searchFuzzyWithContext prefix, suffix, quote,
expectedStart, expectedEnd, false, options
# If we did not got a result, give up
unless result.matches.length
# console.log "Fuzzy matching did not return any results. Giving up on two-phase strategy."
return null
# here is our result
match = result.matches[0]
# console.log "2-phase fuzzy found match at: [" + match.start + ":" +
# match.end + "]: '" + match.found + "' (exact: " + match.exact + ")"
# OK, we have everything
# Compile the data required to store this virtual anchor
start: match.start
end: match.end
startPage: @annotator.domMapper.getPageIndexForPos match.start
endPage: @annotator.domMapper.getPageIndexForPos match.end
quote: match.found
diffHTML: unless match.exact then match.comparison.diffHTML
diffCaseOnly: unless match.exact then match.exactExceptCase
createVirtualAnchorWithFuzzyMatching: (target) =>
# Fetch the quote
quoteSelector = @annotator.findSelector target.selector, "TextQuoteSelector"
quote = quoteSelector?.exact
# No quote, no joy
unless quote? then return null
# For too short quotes, this strategy is bound to return false positives.
# See https://github.com/hypothesis/h/issues/853 for details.
return unless quote.length >= 32
# Get a starting position for the search
posSelector = @annotator.findSelector target.selector, "TextPositionSelector"
expectedStart = posSelector?.start
# Get full document length
len = @annotator.domMapper.getCorpus().length
# If we don't have the position saved, start at the middle of the doc
expectedStart ?= len / 2
# Do the fuzzy search
options =
matchDistance: len * 2
withFuzzyComparison: true
result = @textFinder.searchFuzzy quote, expectedStart, false, options
# If we did not got a result, give up
unless result.matches.length
# console.log "Fuzzy matching did not return any results. Giving up on one-phase strategy."
return null
# here is our result
match = result.matches[0]
# console.log "1-phase fuzzy found match at: [" + match.start + ":" +
# match.end + "]: '" + match.found + "' (exact: " + match.exact + ")"
# OK, we have everything
# Compile the data required to store this virtual anchor
start: match.start
end: match.end
startPage: @annotator.domMapper.getPageIndexForPos match.start
endPage: @annotator.domMapper.getPageIndexForPos match.end
quote: match.found
diffHTML: unless match.exact then match.comparison.diffHTML
diffCaseOnly: unless match.exact then match.exactExceptCase
......@@ -29,7 +29,7 @@ Annotator::setupPlugins = (config={}, options={}) ->
win = util.getGlobal()
# Set up the default plugins.
plugins = ['Unsupported', 'Auth', 'Tags', 'Filter', 'Store', 'AnnotateItPermissions']
plugins = ['Unsupported', 'Auth', 'Tags', 'Filter', 'Store', 'AnnotateItPermissions', "FuzzyAnchoring", "PDF"]
# If Showdown is included add the Markdown plugin.
if win.Showdown
......
# Document mapper module for PDF.js documents
class window.PDFTextMapper extends window.PageTextMapperCore
# Are we working with a PDF document?
@applicable: -> PDFView?.initialized ? false
requiresSmartStringPadding: true
# Get the number of pages
getPageCount: -> PDFView.pages.length
# Where are we in the document?
getPageIndex: -> PDFView.page - 1
# Jump to a given page
setPageIndex: (index) -> PDFView.page = index + 1
# Determine whether a given page has been rendered
_isPageRendered: (index) ->
return PDFView.pages[index]?.textLayer?.renderingDone
# Get the root DOM node of a given page
getRootNodeForPage: (index) ->
PDFView.pages[index].textLayer.textLayerDiv
constructor: ->
@setEvents()
# Install watchers for various events to detect page rendering/unrendering
setEvents: ->
# Detect page rendering
addEventListener "pagerender", (evt) =>
index = evt.detail.pageNumber - 1
@_onPageRendered index
# Detect page un-rendering
addEventListener "DOMNodeRemoved", (evt) =>
node = evt.target
if node.nodeType is Node.ELEMENT_NODE and node.nodeName.toLowerCase() is "div" and node.className is "textLayer"
index = parseInt node.parentNode.id.substr(13) - 1
# Forget info about the new DOM subtree
@_unmapPage @pageInfo[index]
# Do something about cross-page selections
window.DomTextMapper.instances.push
id: "cross-page catcher"
rootNode: document.getElementById "viewer"
performUpdateOnNode: (node, data) =>
if "viewer" is node.getAttribute? "id"
# This event escaped the pages.
# Must be a cross-page selection.
if data.start? and data.end?
startPage = @getPageForNode data.start
endPage = @getPageForNode data.end
for index in [ startPage.index .. endPage.index ]
#console.log "Should rescan page #" + index
@_updateMap @pageInfo[index]
documentChanged: ->
timestamp: ->
_extractionPattern: /[ ]+/g
_parseExtractedText: (text) => text.replace @_extractionPattern, " "
# Extract the text from the PDF
scan: ->
console.log "Scanning document for text..."
# Create a promise
pendingScan = new PDFJS.Promise()
# Tell the Find Controller to go digging
PDFFindController.extractText()
# When all the text has been extracted
PDFJS.Promise.all(PDFFindController.extractTextPromises).then =>
# PDF.js text extraction has finished.
# Post-process the extracted text
@pageInfo = ({ content: @_parseExtractedText page } for page in PDFFindController.pageContents)
# Do some besic calculations with the content
@_onHavePageContents()
# OK, we are ready to rock.
pendingScan.resolve()
# Do whatever we need to do after scanning
@_onAfterScan()
# Return the promise
pendingScan
# Look up the page for a given DOM node
getPageForNode: (node) ->
# Search for the root of this page
div = node
while (
(div.nodeType isnt Node.ELEMENT_NODE) or
not div.getAttribute("class")? or
(div.getAttribute("class") isnt "textLayer")
)
div = div.parentNode
# Fetch the page number from the id. ("pageContainerN")
index = parseInt div.parentNode.id.substr(13) - 1
# Look up the page
@pageInfo[index]
# Annotator plugin for annotating documents handled by PDF.js
class Annotator.Plugin.PDF extends Annotator.Plugin
pluginInit: ->
@annotator.documentAccessStrategies.unshift
# Strategy to handle PDF documents rendered by PDF.js
name: "PDF.js"
mapper: PDFTextMapper
......@@ -213,7 +213,7 @@ class Range.BrowserRange
if window.DomTextMapper? and changed
# console.log "Ranged normalization changed the DOM, updating d-t-m"
window.DomTextMapper.changed nr.commonAncestor, "range normalization"
window.DomTextMapper.changed nr.commonAncestor, "range normalization", nr
new Range.NormalizedRange(nr)
......
// Generated by CoffeeScript 1.6.3
/*
** Annotator 1.2.6-dev-b9ec63a
** Annotator 1.2.6-dev-33543a5
** https://github.com/okfn/annotator/
**
** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/okfn/annotator/blob/master/LICENSE
**
** Built at: 2013-10-02 17:26:42Z
** Built at: 2013-10-27 05:12:46Z
*/
......
// Generated by CoffeeScript 1.6.3
/*
** Annotator 1.2.6-dev-0aaf331
** Annotator 1.2.6-dev-33543a5
** https://github.com/okfn/annotator/
**
** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/okfn/annotator/blob/master/LICENSE
**
** Built at: 2013-10-24 01:17:53Z
** Built at: 2013-10-27 05:12:46Z
*/
......
// Generated by CoffeeScript 1.6.3
/*
** Annotator 1.2.6-dev-f1e08bd
** https://github.com/okfn/annotator/
**
** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/okfn/annotator/blob/master/LICENSE
**
** Built at: 2013-11-12 02:02:47Z
*/
/*
//
*/
// Generated by CoffeeScript 1.6.3
(function() {
var _ref,
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__hasProp = {}.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; };
Annotator.Plugin.FuzzyAnchoring = (function(_super) {
__extends(FuzzyAnchoring, _super);
function FuzzyAnchoring() {
this.createVirtualAnchorWithFuzzyMatching = __bind(this.createVirtualAnchorWithFuzzyMatching, this);
this.createVirtualAnchorWithTwoPhaseFuzzyMatching = __bind(this.createVirtualAnchorWithTwoPhaseFuzzyMatching, this);
_ref = FuzzyAnchoring.__super__.constructor.apply(this, arguments);
return _ref;
}
FuzzyAnchoring.prototype.pluginInit = function() {
var _this = this;
this.textFinder = new DomTextMatcher(function() {
return _this.annotator.domMapper.getCorpus();
});
this.annotator.virtualAnchoringStrategies.push({
name: "two-phase fuzzy",
code: this.createVirtualAnchorWithTwoPhaseFuzzyMatching
});
return this.annotator.virtualAnchoringStrategies.push({
name: "one-phase fuzzy",
code: this.createVirtualAnchorWithFuzzyMatching
});
};
FuzzyAnchoring.prototype.createVirtualAnchorWithTwoPhaseFuzzyMatching = function(target) {
var expectedEnd, expectedStart, match, options, posSelector, prefix, quote, quoteSelector, result, suffix;
quoteSelector = this.annotator.findSelector(target.selector, "TextQuoteSelector");
prefix = quoteSelector != null ? quoteSelector.prefix : void 0;
suffix = quoteSelector != null ? quoteSelector.suffix : void 0;
quote = quoteSelector != null ? quoteSelector.exact : void 0;
if (!((prefix != null) && (suffix != null))) {
return null;
}
posSelector = this.annotator.findSelector(target.selector, "TextPositionSelector");
expectedStart = posSelector != null ? posSelector.start : void 0;
expectedEnd = posSelector != null ? posSelector.end : void 0;
options = {
contextMatchDistance: this.annotator.domMapper.getCorpus().length * 2,
contextMatchThreshold: 0.5,
patternMatchThreshold: 0.5,
flexContext: true
};
result = this.textFinder.searchFuzzyWithContext(prefix, suffix, quote, expectedStart, expectedEnd, false, options);
if (!result.matches.length) {
return null;
}
match = result.matches[0];
return {
start: match.start,
end: match.end,
startPage: this.annotator.domMapper.getPageIndexForPos(match.start),
endPage: this.annotator.domMapper.getPageIndexForPos(match.end),
quote: match.found,
diffHTML: !match.exact ? match.comparison.diffHTML : void 0,
diffCaseOnly: !match.exact ? match.exactExceptCase : void 0
};
};
FuzzyAnchoring.prototype.createVirtualAnchorWithFuzzyMatching = function(target) {
var expectedStart, len, match, options, posSelector, quote, quoteSelector, result;
quoteSelector = this.annotator.findSelector(target.selector, "TextQuoteSelector");
quote = quoteSelector != null ? quoteSelector.exact : void 0;
if (quote == null) {
return null;
}
if (!(quote.length >= 32)) {
return;
}
posSelector = this.annotator.findSelector(target.selector, "TextPositionSelector");
expectedStart = posSelector != null ? posSelector.start : void 0;
len = this.annotator.domMapper.getCorpus().length;
if (expectedStart == null) {
expectedStart = len / 2;
}
options = {
matchDistance: len * 2,
withFuzzyComparison: true
};
result = this.textFinder.searchFuzzy(quote, expectedStart, false, options);
if (!result.matches.length) {
return null;
}
match = result.matches[0];
return {
start: match.start,
end: match.end,
startPage: this.annotator.domMapper.getPageIndexForPos(match.start),
endPage: this.annotator.domMapper.getPageIndexForPos(match.end),
quote: match.found,
diffHTML: !match.exact ? match.comparison.diffHTML : void 0,
diffCaseOnly: !match.exact ? match.exactExceptCase : void 0
};
};
return FuzzyAnchoring;
})(Annotator.Plugin);
}).call(this);
//
//@ sourceMappingURL=annotator.fuzzyanchoring.map
\ No newline at end of file
{"version":3,"file":"annotator.fuzzyanchoring.js","sources":["_preamble.coffee","_annotator_mapsrc/src/plugin/fuzzyanchoring.coffee"],"names":[],"mappings":";AAAA;;;;;;;;;;CAAA;CAAA;;;;;;;ACCA;CAAA,GAAA,EAAA;KAAA;;oSAAA;;CAAA,CAAM,IAAgB,GAAP;CAEb;;;;;;;CAAA;;CAAA,EAAY,MAAA,CAAZ;CAEE,SAAA,EAAA;CAAA,EAAkB,CAAjB,EAAD,GAAiC,CAAjC,IAAkB;CAAmB,IAAA,IAAS,MAAV;CAAlB,MAAe;CAAjC,GAGC,EAAD,GAAU,iBAA2B;CAInC,CAAM,EAAN,IAAA,SAAA;CAAA,CACM,EAAN,IAAA,oCADA;CAPF,OAGA;CAOC,GAAA,KAAS,IAAV,aAAqC;CAInC,CAAM,EAAN,IAAA,SAAA;CAAA,CACM,EAAN,IAAA,4BADA;CAhBQ,OAYV;CAZF,IAAY;;CAAZ,EAmB8C,GAAA,GAAC,mCAA/C;CAEE,SAAA,2FAAA;CAAA,CAAyD,CAAzC,CAAC,EAAjB,EAAgB,CAAU,GAAV,CAAhB,MAAgB;CAAhB,EACS,GAAT,OAAsB;CADtB,EAES,GAAT,OAAsB;CAFtB,EAGQ,EAAR,CAAA,OAAqB;AAGd,CAAP,GAAA,EAAA,UAAQ;CAA0B,GAAA,WAAO;QANzC;CAAA,CASuD,CAAzC,CAAC,EAAf,EAAc,CAAU,EAAxB,CAAc,UAAA;CATd,EAUgB,GAAhB,KAA2B,EAA3B;CAVA,EAWc,GAAd,KAAA;CAXA,EAcE,GADF,CAAA;CACE,CAAsB,CAA0C,CAAzC,EAAD,EAAtB,CAAgC,WAAhC;CAAA,CACuB,CADvB,KACA,aAAA;CADA,CAEuB,CAFvB,KAEA,aAAA;CAFA,CAGa,EAHb,IAGA,GAAA;CAjBF,OAAA;CAAA,CAkBoD,CAA3C,CAAC,CAAD,CAAT,CAAS,GAAW,CAAX,EAAA,SAAA;AAIF,CAAP,GAAA,EAAA,CAAqB;CAEnB,GAAA,WAAO;QAxBT;CAAA,EA2BQ,EAAR,CAAA,CAAuB;aAMvB;CAAA,CAAO,GAAP,GAAA;CAAA,CACK,CAAL,EAAU,GAAV;CADA,CAEW,EAAC,CAA4C,GAAxD,CAAA,SAAW;CAFX,CAGS,CAAA,CAAC,CAA4C,EAAtD,CAAA,CAAmB,SAAV;CAHT,CAIO,GAAP,GAAA;AACiB,CALjB,CAKU,CAAwB,EAAZ,CALtB,EAKA,EAAkD;AAC7B,CANrB,CAMc,CAAwB,EAAZ,CAN1B,EAMA,IAAA,GAAc;CAzC8B;CAnB9C,IAmB8C;;CAnB9C,EA8DsC,GAAA,GAAC,2BAAvC;CAEE,SAAA,mEAAA;CAAA,CAAyD,CAAzC,CAAC,EAAjB,EAAgB,CAAU,GAAV,CAAhB,MAAgB;CAAhB,EACQ,EAAR,CAAA,OAAqB;CAGrB,GAAO,EAAP,OAAA;CAAmB,GAAA,WAAO;QAJ1B;AAQA,CAAA,CAAA,EAAA,CAAmB,CAAnB;CAAA,aAAA;QARA;CAAA,CAWuD,CAAzC,CAAC,EAAf,EAAc,CAAU,EAAxB,CAAc,UAAA;CAXd,EAYgB,GAAhB,KAA2B,EAA3B;CAZA,EAeA,CAAO,EAAP,GAAgB;;GAGC,KAAjB;QAlBA;CAAA,EAsBE,GADF,CAAA;CACE,CAAe,CAAA,KAAf,KAAA;CAAA,CACqB,EADrB,IACA,WAAA;CAvBF,OAAA;CAAA,CAwBwC,CAA/B,CAAC,CAAD,CAAT,CAAS,GAAW,CAAX,EAAA;AAGF,CAAP,GAAA,EAAA,CAAqB;CAEnB,GAAA,WAAO;QA7BT;CAAA,EAgCQ,EAAR,CAAA,CAAuB;aAMvB;CAAA,CAAO,GAAP,GAAA;CAAA,CACK,CAAL,EAAU,GAAV;CADA,CAEW,EAAC,CAA4C,GAAxD,CAAA,SAAW;CAFX,CAGS,CAAA,CAAC,CAA4C,EAAtD,CAAA,CAAmB,SAAV;CAHT,CAIO,GAAP,GAAA;AACiB,CALjB,CAKU,CAAwB,EAAZ,CALtB,EAKA,EAAkD;AAC7B,CANrB,CAMc,CAAwB,EAAZ,CAN1B,EAMA,IAAA,GAAc;CA9CsB;CA9DtC,IA8DsC;;CA9DtC;;CAF4C,QAAS;CAAvD"}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
// Generated by CoffeeScript 1.6.3
/*
** Annotator 1.2.6-dev-f1e08bd
** https://github.com/okfn/annotator/
**
** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/okfn/annotator/blob/master/LICENSE
**
** Built at: 2013-11-12 02:02:48Z
*/
/*
//
*/
// Generated by CoffeeScript 1.6.3
(function() {
var _ref,
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__hasProp = {}.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; };
window.PDFTextMapper = (function(_super) {
__extends(PDFTextMapper, _super);
PDFTextMapper.applicable = function() {
var _ref;
return (_ref = typeof PDFView !== "undefined" && PDFView !== null ? PDFView.initialized : void 0) != null ? _ref : false;
};
PDFTextMapper.prototype.requiresSmartStringPadding = true;
PDFTextMapper.prototype.getPageCount = function() {
return PDFView.pages.length;
};
PDFTextMapper.prototype.getPageIndex = function() {
return PDFView.page - 1;
};
PDFTextMapper.prototype.setPageIndex = function(index) {
return PDFView.page = index + 1;
};
PDFTextMapper.prototype._isPageRendered = function(index) {
var _ref, _ref1;
return (_ref = PDFView.pages[index]) != null ? (_ref1 = _ref.textLayer) != null ? _ref1.renderingDone : void 0 : void 0;
};
PDFTextMapper.prototype.getRootNodeForPage = function(index) {
return PDFView.pages[index].textLayer.textLayerDiv;
};
function PDFTextMapper() {
this._parseExtractedText = __bind(this._parseExtractedText, this);
this.setEvents();
}
PDFTextMapper.prototype.setEvents = function() {
var _this = this;
addEventListener("pagerender", function(evt) {
var index;
index = evt.detail.pageNumber - 1;
return _this._onPageRendered(index);
});
addEventListener("DOMNodeRemoved", function(evt) {
var index, node;
node = evt.target;
if (node.nodeType === Node.ELEMENT_NODE && node.nodeName.toLowerCase() === "div" && node.className === "textLayer") {
index = parseInt(node.parentNode.id.substr(13) - 1);
return _this._unmapPage(_this.pageInfo[index]);
}
});
return window.DomTextMapper.instances.push({
id: "cross-page catcher",
rootNode: document.getElementById("viewer"),
performUpdateOnNode: function(node, data) {
var endPage, index, startPage, _i, _ref, _ref1, _results;
if ("viewer" === (typeof node.getAttribute === "function" ? node.getAttribute("id") : void 0)) {
if ((data.start != null) && (data.end != null)) {
startPage = _this.getPageForNode(data.start);
endPage = _this.getPageForNode(data.end);
_results = [];
for (index = _i = _ref = startPage.index, _ref1 = endPage.index; _ref <= _ref1 ? _i <= _ref1 : _i >= _ref1; index = _ref <= _ref1 ? ++_i : --_i) {
_results.push(_this._updateMap(_this.pageInfo[index]));
}
return _results;
}
}
},
documentChanged: function() {},
timestamp: function() {}
});
};
PDFTextMapper.prototype._extractionPattern = /[ ]+/g;
PDFTextMapper.prototype._parseExtractedText = function(text) {
return text.replace(this._extractionPattern, " ");
};
PDFTextMapper.prototype.scan = function() {
var pendingScan,
_this = this;
console.log("Scanning document for text...");
pendingScan = new PDFJS.Promise();
PDFFindController.extractText();
PDFJS.Promise.all(PDFFindController.extractTextPromises).then(function() {
var page;
_this.pageInfo = (function() {
var _i, _len, _ref, _results;
_ref = PDFFindController.pageContents;
_results = [];
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
page = _ref[_i];
_results.push({
content: this._parseExtractedText(page)
});
}
return _results;
}).call(_this);
_this._onHavePageContents();
pendingScan.resolve();
return _this._onAfterScan();
});
return pendingScan;
};
PDFTextMapper.prototype.getPageForNode = function(node) {
var div, index;
div = node;
while ((div.nodeType !== Node.ELEMENT_NODE) || (div.getAttribute("class") == null) || (div.getAttribute("class") !== "textLayer")) {
div = div.parentNode;
}
index = parseInt(div.parentNode.id.substr(13) - 1);
return this.pageInfo[index];
};
return PDFTextMapper;
})(window.PageTextMapperCore);
Annotator.Plugin.PDF = (function(_super) {
__extends(PDF, _super);
function PDF() {
_ref = PDF.__super__.constructor.apply(this, arguments);
return _ref;
}
PDF.prototype.pluginInit = function() {
return this.annotator.documentAccessStrategies.unshift({
name: "PDF.js",
mapper: PDFTextMapper
});
};
return PDF;
})(Annotator.Plugin);
}).call(this);
//
//@ sourceMappingURL=annotator.pdf.map
\ No newline at end of file
{"version":3,"file":"annotator.pdf.js","sources":["_preamble.coffee","_annotator_mapsrc/src/plugin/pdf.coffee"],"names":[],"mappings":";AAAA;;;;;;;;;;CAAA;CAAA;;;;;;;ACCA;CAAA,GAAA,EAAA;KAAA;;oSAAA;;CAAA,CAAM,IAAM;CAGV;;CAAA,EAAa,CAAb,KAAa,CAAb,GAAC;CAAe,GAAA,MAAA;CAAH,EAA0B;CAAvC,IAAa;;CAAb,EAE4B,CAF5B,sBAEA;;CAFA,EAKc,MAAA,GAAd;CAAyB,IAAK,EAAN,MAAP;CALjB,IAKc;;CALd,EAQc,MAAA,GAAd;CAAyB,EAAO,CAAf,GAAO,MAAP;CARjB,IAQc;;CARd,EAWc,EAAA,IAAC,GAAf;CAAiC,EAAO,CAAf,CAAe,EAAR,MAAP;CAXzB,IAWc;;CAXd,EAciB,EAAA,IAAC,MAAlB;CACE,SAAA,CAAA;CAAA,IAAsC,CAAtC;CAfF,IAciB;;CAdjB,EAkBoB,EAAA,IAAC,SAArB;CACU,IAAM,EAAP,EAAuB,IAA9B;CAnBF,IAkBoB;;CAGP,EAAA,CAAA,mBAAA;CACX,gEAAA;CAAA,GAAC,EAAD,GAAA;CAtBF,IAqBa;;CArBb,EAyBW,MAAX;CAEE,SAAA,EAAA;CAAA,CAA+B,CAAA,GAA/B,GAAgC,GAAhC,IAAA;CACE,IAAA,OAAA;CAAA,EAAQ,EAAR,CAAkB,EAAlB,EAAQ;CACP,IAAA,UAAD;CAFF,MAA+B;CAA/B,CAKmC,CAAA,GAAnC,GAAoC,OAApC;CACE,UAAA,CAAA;CAAA,EAAO,CAAP,EAAA,EAAA;CACA,GAAG,CAAiB,GAApB,CAAmF,EAAzC,CAAvC;CACD,CAAmC,CAA3B,CAAa,CAArB,CAAiB,EAAT,EAAR;CAGC,IAAA,GAAqB,EAAtB,OAAA;UAN+B;CAAnC,MAAmC;CAS5B,GAAP,EAAM,GAAwB,IAA9B;CACE,CAAA,MAAA,YAAA;CAAA,CACU,MAAV,MAAU;CADV,CAEqB,CAAA,CAAA,IAArB,CAAsB,UAAtB;CACE,aAAA,sCAAA;CAAA,EAAe,CAAZ,IAAA,EAAH;CAGE,GAAG,QAAH,MAAA,EAAG;CACD,EAAY,CAAoB,CAAnB,IAAb,KAAA;CAAA,EACU,CAAoB,CAAnB,EAAX,OAAA;AACA,CAAA;GAAA,iBAAa,sHAAb;CAEE,IAAC,GAAqB,EAAtB;CAFF;+BAHF;cAHF;YADmB;CAFrB,QAEqB;CAFrB,CAYiB,CAAA,KAAjB,CAAiB,MAAjB;CAZA,CAaW,CAAA,KAAX,CAAA;CA9BO,OAgBT;CAzCF,IAyBW;;CAzBX,EAyDoB,IAzDpB,WAyDA;;CAzDA,EA0DqB,CAAA,KAAC,UAAtB;CAAoC,CAA6B,CAAlC,CAAI,GAAJ,MAAA,KAAA;CA1D/B,IA0DqB;;CA1DrB,EA6DM,CAAN,KAAM;CACJ,SAAA,CAAA;SAAA,GAAA;CAAA,EAAA,GAAA,CAAO,wBAAP;CAAA,EAGkB,CAAA,CAAK,CAAvB,CAAkB,IAAlB;CAHA,KAMA,KAAA,MAAiB;CANjB,EASA,CAAA,CAAK,CAAL,CAAa,EAAiD,QAA3B,EAAnC;CAIE,GAAA,QAAA;CAAA,IAAC,GAAD;;CAAa;CAAA;gBAAA,2BAAA;6BAAA;CAAA;CAAA,CAAW,EAAC,GAAV,OAAA,KAAS;CAAX;CAAA;;CAAb;CAAA,IAGC,GAAD,WAAA;CAHA,MAMA,CAAA,GAAW;CAGV,IAAA,OAAD,GAAA;CAbF,MAA8D;CAV1D,YA0BJ;CAvFF,IA6DM;;CA7DN,EA2FgB,CAAA,KAAC,KAAjB;CAEE,SAAA;CAAA,EAAA,CAAA,EAAA;CACA,EACM,CAAmB,CAAJ,EAElB,CAFA,GAED,CAFA,CAAA,sBAAA;CAIA,EAAA,KAAA,EAAA;CANF,MACA;CADA,CASkC,CAA1B,EAAR,CAAA,EAAQ,EAAuB;CAG9B,GAAA,CAAS,GAAA,KAAV;CAzGF,IA2FgB;;CA3FhB;;CAHiC,KAAM;;CAAzC,CAgHM,IAAgB,GAAP;CAEb;;;;;CAAA;;CAAA,EAAY,MAAA,CAAZ;CACG,GAAA,GAAD,EAAU,IAAV,WAAmC;CAEjC,CAAM,EAAN,IAAA;CAAA,CACQ,IAAR,EAAA,KADA;CAHQ,OACV;CADF,IAAY;;CAAZ;;CAFiC,QAAS;CAhH5C"}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -268,6 +268,8 @@
else if (null !== (oMatch = cfg.origin.match(/^https?:\/\/(?:[-a-zA-Z0-9_\.])+(?::\d+)?/))) {
cfg.origin = oMatch[0].toLowerCase();
validOrigin = true;
} else if (cfg.origin == "resource://pdf.js") {
validOrigin = true
}
}
......
# Common functions for all page-based document mapper modules
class window.PageTextMapperCore
CONTEXT_LEN: 32
# Get the page index for a given character position
getPageIndexForPos: (pos) ->
for info in @pageInfo
if info.start <= pos < info.end
return info.index
console.log "Not on page " + info.index
return -1
# A new page was rendered
_onPageRendered: (index) =>
#console.log "Allegedly rendered page #" + index
# Is it really rendered?
unless @_isPageRendered index
#console.log "Page #" + index + " is not really rendered yet."
setTimeout (=> @_onPageRendered index), 1000
return
# Collect info about the new DOM subtree
@_mapPage @pageInfo[index]
# Determine whether a given page has been rendered and mapped
isPageMapped: (index) ->
return @pageInfo[index]?.domMapper?
# Create the mappings for a given page
_mapPage: (info) ->
info.node = @getRootNodeForPage info.index
info.domMapper = new DomTextMapper("d-t-m for page #" + info.index)
info.domMapper.setRootNode info.node
info.domMapper.documentChanged()
if @requiresSmartStringPadding
info.domMapper.setExpectedContent info.content
info.domMapper.scan()
renderedContent = info.domMapper.getCorpus()
if renderedContent isnt info.content
console.log "Oops. Mismatch between rendered and extracted text, while mapping page #" + info.index + "!"
console.trace()
console.log "Rendered: " + renderedContent
console.log "Extracted: " + info.content
# Announce the newly available page
setTimeout ->
event = document.createEvent "UIEvents"
event.initUIEvent "docPageMapped", false, false, window, 0
event.pageIndex = info.index
window.dispatchEvent event
# Update the mappings for a given page
_updateMap: (info) ->
#console.log "Updating mappings for page #" + info.index
info.domMapper.scan()
# Delete the mappings for a given page
_unmapPage: (info) ->
delete info.domMapper
# Announce the unavailable page
event = document.createEvent "UIEvents"
event.initUIEvent "docPageUnmapped", false, false, window, 0
event.pageIndex = info.index
window.dispatchEvent event
# Look up info about a give DOM node, uniting page and node info
getInfoForNode: (node) ->
pageData = @getPageForNode node
nodeData = pageData.domMapper.getInfoForNode node
# Copy info about the node
info = {}
for k,v of nodeData
info[k] = v
# Correct the chatacter offsets with that of the page
info.start += pageData.start
info.end += pageData.start
info.pageIndex = pageData.index
info
# Return some data about a given character range
getMappingsForCharRange: (start, end, pages) ->
#console.log "Get mappings for char range [" + start + "; " + end + "], for pages " + pages + "."
# Check out which pages are these on
startIndex = @getPageIndexForPos start
endIndex = @getPageIndexForPos end
#console.log "These are on pages [" + startIndex + ".." + endIndex + "]."
# Function to get the relevant section inside a given page
getSection = (index) =>
info = @pageInfo[index]
# Calculate in-page offsets
realStart = (Math.max info.start, start) - info.start
realEnd = (Math.min info.end, end) - info.start
# Get the range inside the page
mappings = info.domMapper.getMappingsForCharRange realStart, realEnd
mappings.sections[0]
# Get the section for all involved pages
sections = {}
for index in pages ? [startIndex..endIndex]
sections[index] = getSection index
# Return the data
sections: sections
getCorpus: -> @_corpus
getContextForCharRange: (start, end) ->
prefixStart = Math.max 0, start - @CONTEXT_LEN
prefixLen = start - prefixStart
prefix = @_corpus.substr prefixStart, prefixLen
suffix = @_corpus.substr end, @CONTEXT_LEN
[prefix.trim(), suffix.trim()]
# Call this in scan, when you have the page contents
_onHavePageContents: ->
# Join all the text together
@_corpus = (info.content for info in @pageInfo).join " "
# Go over the pages, and calculate some basic info
pos = 0
@pageInfo.forEach (info, i) =>
info.index = i
info.len = info.content.length
info.start = pos
info.end = (pos += info.len + 1)
# Call this in scan, after resolving the promise
_onAfterScan: ->
# Go over the pages again, and map the rendered ones
@pageInfo.forEach (info, i) =>
if @_isPageRendered i
@_mapPage info
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment