Commit 94465e93 authored by RawKStar77's avatar RawKStar77

Convert coffee files in scripts/vendor to js, modify scripts/vendor/.gitignore

parent c55570c6
*.min.js
dom_text.js
page_text_mapper_core.js
class window.DomTextMapper
@applicable: -> true
USE_TABLE_TEXT_WORKAROUND = true
USE_EMPTY_TEXT_WORKAROUND = true
SELECT_CHILDREN_INSTEAD = ["thead", "tbody", "tfoot", "ol", "a", "caption", "p", "span", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "li", "form"]
CONTEXT_LEN = 32
@instances: 0
constructor: (@id)->
@setRealRoot()
DomTextMapper.instances += 1
@id ?= "d-t-m #" + DomTextMapper.instances
log: (msg...) ->
console.log @id, ": ", msg...
# ===== Public methods =======
# Change handler
_onChange: (event) =>
# @log "received change event", event
# @log "source", event.target
# @log "reason", event.reason ? "no reason"
# @log "data", event.data
@documentChanged()
@performUpdateOnNode event.target, false, event.data
@lastScanned = @timestamp()
# Change the root node, and subscribe to the events
_changeRootNode: (node) ->
@rootNode?.removeEventListener "domChange", @_onChange
@rootNode = node
@rootNode.addEventListener "domChange", @_onChange
node
# Consider only the sub-tree beginning with the given node.
#
# This will be the root node to use for all operations.
setRootNode: (rootNode) ->
@rootWin = window
@pathStartNode = @_changeRootNode rootNode
# Consider only the sub-tree beginning with the node whose ID was given.
#
# This will be the root node to use for all operations.
setRootId: (rootId) -> @setRootNode document.getElementById rootId
# Use this iframe for operations.
#
# Call this when mapping content in an iframe.
setRootIframe: (iframeId) ->
iframe = window.document.getElementById iframeId
unless iframe?
throw new Error "Can't find iframe with specified ID!"
@rootWin = iframe.contentWindow
unless @rootWin?
throw new Error "Can't access contents of the specified iframe!"
@_changeRootNode @rootWin.document
@pathStartNode = @getBody()
# Return the default path
getDefaultPath: -> @getPathTo @pathStartNode
# Work with the whole DOM tree
#
# (This is the default; you only need to call this, if you have configured
# a different root earlier, and now you want to restore the default setting.)
setRealRoot: ->
@rootWin = window
@_changeRootNode document
@pathStartNode = @getBody()
# Notify the library that the document has changed.
# This means that subsequent calls can not safely re-use previously cached
# data structures, so some calculations will be necessary again.
#
# The usage of this feature is not mandatorry; if not receiving change
# notifications, the library will just assume that the document can change
# anythime, and therefore will not assume any stability.
documentChanged: ->
@lastDOMChange = @timestamp()
# @log "Registered document change."
setExpectedContent: (content) ->
@expectedContent = content
# Scan the document
#
# Traverses the DOM, collects various information, and
# creates mappings between the string indices
# (as appearing in the rendered text) and the DOM elements.
#
# An map is returned, where the keys are the paths, and the
# values are objects with info about those parts of the DOM.
# path: the valid path value
# node: reference to the DOM node
# content: the text content of the node, as rendered by the browser
# length: the length of the next content
scan: ->
if @domStableSince @lastScanned
# @log "We have a valid DOM structure cache."
return
else
# @log "Last scan time: " + @lastScanned
# @log "Last DOM change: " + @lastDOMChange
# @log "No valid DOM structure scan available, doing scan."
unless @pathStartNode.ownerDocument.body.contains @pathStartNode
# We cannot map nodes that are not attached.
# @log "This is not attached to dom. Exiting."
return
# @log "No valid cache, will have to do a scan."
startTime = @timestamp()
@saveSelection()
@path = {}
@traverseSubTree @pathStartNode, @getDefaultPath()
t1 = @timestamp()
# @log "Phase I (Path traversal) took " + (t1 - startTime) + " ms."
path = @getPathTo @pathStartNode
node = @path[path].node
@collectPositions node, path, null, 0, 0
@restoreSelection()
@lastScanned = @timestamp()
@_corpus = @path[path].content
# @log "Corpus is: " + @_corpus
t2 = @timestamp()
# @log "Phase II (offset calculation) took " + (t2 - t1) + " ms."
null
# Select the given path (for visual identification),
# and optionally scroll to it
selectPath: (path, scroll = false) ->
info = @path[path]
unless info? then throw new Error "I have no info about a node at " + path
node = info?.node
node or= @lookUpNode info.path
@selectNode node, scroll
performUpdateOnNode: (node, escalating = false) ->
unless node? then throw new Error "Called performUpdate with a null node!"
unless @path? then return #We don't have data yet. Not updating.
startTime = @timestamp()
unless escalating then @saveSelection()
path = @getPathTo node
pathInfo = @path[path]
unless pathInfo?
@performUpdateOnNode node.parentNode, true
unless escalating then @restoreSelection()
return
# @log "Performing update on node @ path " + path
# if escalating then @log "(Escalated)"
# @log "Updating data about " + path + ": "
if pathInfo.node is node and pathInfo.content is @getNodeContent node, false
# @log "Good, the node and the overall content is still the same"
# @log "Dropping obsolete path info for children..."
prefix = path + "/"
pathsToDrop =p
# FIXME: There must be a more elegant way to do this.
pathsToDrop = []
for p, data of @path when @stringStartsWith p, prefix
pathsToDrop.push p
for p in pathsToDrop
delete @path[p]
# @log "Done. Collecting new path info..."
@traverseSubTree node, path
# @log "Done. Updating mappings..."
if pathInfo.node is @pathStartNode
# @log "Ended up rescanning the whole doc."
@collectPositions node, path, null, 0, 0
else
parentPath = @parentPath path
parentPathInfo = @path[parentPath]
unless parentPathInfo?
throw new Error "While performing update on node " + path +
", no path info found for parent path: " + parentPath
oldIndex = if node is node.parentNode.firstChild
0
else
@path[@getPathTo node.previousSibling].end - parentPathInfo.start
@collectPositions node, path, parentPathInfo.content,
parentPathInfo.start, oldIndex
# @log "Data update took " + (@timestamp() - startTime) + " ms."
else
# @log "Hm..node has been replaced, or overall content has changed!"
if pathInfo.node isnt @pathStartNode
# @log "I guess I must go up one level."
parentNode = if node.parentNode?
# @log "Node has parent, using that."
node.parentNode
else
parentPath = @parentPath path
# @log "Node has no parent, will look up " + parentPath
@lookUpNode parentPath
@performUpdateOnNode parentNode, true
else
throw new Error "Can not keep up with the changes,
since even the node configured as path start node was replaced."
unless escalating then @restoreSelection()
# Return info for a given path in the DOM
getInfoForPath: (path) ->
unless @path?
throw new Error "Can't get info before running a scan() !"
result = @path[path]
unless result?
throw new Error "Found no info for path '" + path + "'!"
result
# Return the offset of the start of given path in the DOM
getStartPosForPath: (path) ->
info = @getInfoForPath path
info.start ? @getFirstPosAfter info.node
getFirstPosAfter: (node) ->
if node.nextSibling? # Do we have a next sibling?
# Check the sibling
node = node.nextSibling
path = @getPathTo node
info = @path[path]
info.start ? @getFirstPosAfter node
else
# Nothing to see on this level. Move up in the tree.
@getFirstPosAfter node.parentNode
# Return the offset of the start of given path in the DOM
getEndPosForPath: (path) ->
info = @getInfoForPath path
info.end ? @getFirstPosBefore info.node
getFirstPosBefore: (node) ->
if node.previousSibling? # Do we have a previous sibling?
# Check the sibling
node = node.previousSibling
path = @getPathTo node
info = @path[path]
info.end ? @getFirstPosBefore node
else
# Nothing to see on this level. Move up in the tree.
@getFirstPosBefore node.parentNode
# Return info for a given node in the DOM
getInfoForNode: (node) ->
unless node?
throw new Error "Called getInfoForNode(node) with null node!"
@getInfoForPath @getPathTo node
# Return the offset of the start of given node in the DOM
getStartPosForNode: (node) ->
unless node?
throw new Error "Called getStartInfoForNode(node) with null node!"
@getStartPosForPath @getPathTo node
# Return the offset of the end of a given node in the DOM
getEndPosForNode: (node) ->
unless node?
throw new Error "Called getInfoForNode(node) with null node!"
@getEndPosForPath @getPathTo node
# Get the matching DOM elements for a given set of charRanges
# (Calles getMappingsForCharRange for each element in the given ist)
getMappingsForCharRanges: (charRanges) ->
(@getMappingsForCharRange charRange.start, charRange.end) for charRange in charRanges
# Return the rendered value of a part of the dom.
# If path is not given, the default path is used.
getContentForPath: (path = null) ->
path ?= @getDefaultPath()
@path[path].content
# Return the length of the rendered value of a part of the dom.
# If path is not given, the default path is used.
getLengthForPath: (path = null) ->
path ?= @getDefaultPath()
@path[path].length
getDocLength: -> @_corpus.length
getCorpus: -> @_corpus
# Get the context that encompasses the given charRange
# in the rendered text of the document
getContextForCharRange: (start, end) ->
if start < 0
throw Error "Negative range start is invalid!"
if end > @_corpus.length
throw Error "Range end is after the end of corpus!"
prefixStart = Math.max 0, start - CONTEXT_LEN
prefix = @_corpus[ prefixStart ... start ]
suffix = @_corpus[ end ... end + CONTEXT_LEN ]
[prefix.trim(), suffix.trim()]
# Get the matching DOM elements for a given charRange
#
# If the "path" argument is supplied, scan is called automatically.
# (Except if the supplied path is the same as the last scanned path.)
getMappingsForCharRange: (start, end) ->
unless (start? and end?)
throw new Error "start and end is required!"
# @log "Collecting nodes for [" + start + ":" + end + "]"
@scan()
# Collect the matching path infos
# @log "Collecting mappings"
mappings = []
for p, info of @path when info.atomic and
@regions_overlap info.start, info.end, start, end
do (info) =>
# @log "Checking " + info.path
# @log info
mapping =
element: info
full = start <= info.start and info.end <= end
if full
mapping.full = true
mapping.wanted = info.content
mapping.yields = info.content
mapping.startCorrected = 0
mapping.endCorrected = 0
else
if info.node.nodeType is Node.TEXT_NODE
if start <= info.start
mapping.end = end - info.start
mapping.wanted = info.content.substr 0, mapping.end
else if info.end <= end
mapping.start = start - info.start
mapping.wanted = info.content.substr mapping.start
else
mapping.start = start - info.start
mapping.end = end - info.start
mapping.wanted = info.content.substr mapping.start,
mapping.end - mapping.start
@computeSourcePositions mapping
mapping.yields = info.node.data.substr mapping.startCorrected,
mapping.endCorrected - mapping.startCorrected
else if (info.node.nodeType is Node.ELEMENT_NODE) and
(info.node.tagName.toLowerCase() is "img")
@log "Can not select a sub-string from the title of an image.
Selecting all."
mapping.full = true
mapping.wanted = info.content
else
@log "Warning: no idea how to handle partial mappings
for node type " + info.node.nodeType
if info.node.tagName? then @log "Tag: " + info.node.tagName
@log "Selecting all."
mapping.full = true
mapping.wanted = info.content
mappings.push mapping
# @log "Done with " + info.path
if mappings.length is 0
@log "Collecting nodes for [" + start + ":" + end + "]"
@log "Should be: '" + @_corpus[ start ... end ] + "'."
throw new Error "No mappings found for [" + start + ":" + end + "]!"
mappings = mappings.sort (a, b) -> a.element.start - b.element.start
# Create a DOM range object
# @log "Building range..."
r = @rootWin.document.createRange()
startMapping = mappings[0]
startNode = startMapping.element.node
startPath = startMapping.element.path
startOffset = startMapping.startCorrected
if startMapping.full
r.setStartBefore startNode
startInfo = startPath
else
r.setStart startNode, startOffset
startInfo = startPath + ":" + startOffset
endMapping = mappings[mappings.length - 1]
endNode = endMapping.element.node
endPath = endMapping.element.path
endOffset = endMapping.endCorrected
if endMapping.full
r.setEndAfter endNode
endInfo = endPath
else
r.setEnd endNode, endOffset
endInfo = endPath + ":" + endOffset
result = {
mappings: mappings
realRange: r
rangeInfo:
startPath: startPath
startOffset: startOffset
startInfo: startInfo
endPath: endPath
endOffset: endOffset
endInfo: endInfo
safeParent: r.commonAncestorContainer
}
# Return the result
sections: [result]
# ===== Private methods (never call from outside the module) =======
timestamp: -> new Date().getTime()
stringStartsWith: (string, prefix) ->
unless prefix
throw Error "Requires a non-empty prefix!"
string[ 0 ... prefix.length ] is prefix
stringEndsWith: (string, suffix) ->
unless suffix
throw Error "Requires a non-empty suffix!"
string[ string.length - suffix.length ... string.length ] is suffix
parentPath: (path) -> path.substr 0, path.lastIndexOf "/"
domChangedSince: (timestamp) ->
if @lastDOMChange? and timestamp?
@lastDOMChange > timestamp
else
true
domStableSince: (timestamp) -> not @domChangedSince timestamp
getProperNodeName: (node) ->
nodeName = node.nodeName
switch nodeName
when "#text" then return "text()"
when "#comment" then return "comment()"
when "#cdata-section" then return "cdata-section()"
else return nodeName
getNodePosition: (node) ->
pos = 0
tmp = node
while tmp
if tmp.nodeName is node.nodeName
pos++
tmp = tmp.previousSibling
pos
getPathSegment: (node) ->
name = @getProperNodeName node
pos = @getNodePosition node
name + (if pos > 1 then "[#{pos}]" else "")
getPathTo: (node) ->
xpath = '';
while node != @rootNode
unless node?
throw new Error "Called getPathTo on a node which was not a descendant of @rootNode. " + @rootNode
xpath = (@getPathSegment node) + '/' + xpath
node = node.parentNode
xpath = (if @rootNode.ownerDocument? then './' else '/') + xpath
xpath = xpath.replace /\/$/, ''
xpath
# This method is called recursively, to traverse a given sub-tree of the DOM.
traverseSubTree: (node, path, invisible = false, verbose = false) ->
# Step one: get rendered node content, and store path info,
# if there is valuable content
@underTraverse = path
cont = @getNodeContent node, false
@path[path] =
path: path
content: cont
length: cont.length
node : node
if cont.length
if verbose then @log "Collected info about path " + path
if invisible
@log "Something seems to be wrong. I see visible content @ " +
path + ", while some of the ancestor nodes reported empty contents.
Probably a new selection API bug...."
@log "Anyway, text is '" + cont + "'."
else
if verbose then @log "Found no content at path " + path
invisible = true
# Step two: cover all children.
# Q: should we check children even if
# the given node had no rendered content?
# A: I seem to remember that the answer is yes, but I don't remember why.
if node.hasChildNodes()
for child in node.childNodes
subpath = path + '/' + (@getPathSegment child)
@traverseSubTree child, subpath, invisible, verbose
null
getBody: -> (@rootWin.document.getElementsByTagName "body")[0]
regions_overlap: (start1, end1, start2, end2) ->
start1 < end2 and start2 < end1
lookUpNode: (path) ->
doc = @rootNode.ownerDocument ? @rootNode
results = doc.evaluate path, @rootNode, null, 0, null
node = results.iterateNext()
# save the original selection
saveSelection: ->
if @savedSelection?
@log "Selection saved at:"
@log @selectionSaved
throw new Error "Selection already saved!"
sel = @rootWin.getSelection()
# @log "Saving selection: " + sel.rangeCount + " ranges."
@savedSelection = ((sel.getRangeAt i) for i in [0 ... sel.rangeCount])
@selectionSaved = (new Error "selection was saved here").stack
# restore selection
restoreSelection: ->
# @log "Restoring selection: " + @savedSelection.length + " ranges."
unless @savedSelection? then throw new Error "No selection to restore."
sel = @rootWin.getSelection()
sel.removeAllRanges()
sel.addRange range for range in @savedSelection
delete @savedSelection
# Select the given node (for visual identification),
# and optionally scroll to it
selectNode: (node, scroll = false) ->
unless node?
throw new Error "Called selectNode with null node!"
sel = @rootWin.getSelection()
# clear the selection
sel.removeAllRanges()
# create our range, and select it
realRange = @rootWin.document.createRange()
# There is some weird, bogus behaviour in Chrome,
# triggered by whitespaces between the table tag and it's children.
# See the select-tbody and the select-the-parent-when-selecting problems
# described here:
# https://github.com/hypothesis/h/issues/280
# And the WebKit bug report here:
# https://bugs.webkit.org/show_bug.cgi?id=110595
#
# To work around this, when told to select specific nodes, we have to
# do various other things. See bellow.
if node.nodeType is Node.ELEMENT_NODE and node.hasChildNodes() and
node.tagName.toLowerCase() in SELECT_CHILDREN_INSTEAD
# This is an element where direct selection sometimes fails,
# because if the WebKit bug.
# (Sometimes it selects nothing, sometimes it selects something wrong.)
# So we select directly the children instead.
children = node.childNodes
realRange.setStartBefore children[0]
realRange.setEndAfter children[children.length - 1]
sel.addRange realRange
else
if USE_TABLE_TEXT_WORKAROUND and node.nodeType is Node.TEXT_NODE and
node.parentNode.tagName.toLowerCase() is "table"
# This is a text element that should not even be here.
# Selecting it might select the whole table,
# so we don't select anything
else
# Normal element, should be selected
try
realRange.setStartBefore node
realRange.setEndAfter node
sel.addRange realRange
catch exception
# This might be caused by the fact that FF can't select a
# TextNode containing only whitespace.
# If this is the case, then it's OK.
unless USE_EMPTY_TEXT_WORKAROUND and @isWhitespace node
# No, this is not the case. Then this is an error.
@log "Warning: failed to scan element @ " + @underTraverse
@log "Content is: " + node.innerHTML
@log "We won't be able to properly anchor to any text inside this element."
# throw exception
if scroll
sn = node
while sn? and not sn.scrollIntoViewIfNeeded?
sn = sn.parentNode
if sn?
sn.scrollIntoViewIfNeeded()
else
@log "Failed to scroll to element. (Browser does not support scrollIntoViewIfNeeded?)"
sel
# Read and convert the text of the current selection.
readSelectionText: (sel) ->
sel or= @rootWin.getSelection()
sel.toString().trim().replace(/\n/g, " ").replace /\s{2,}/g, " "
# Read the "text content" of a sub-tree of the DOM by
# creating a selection from it
getNodeSelectionText: (node, shouldRestoreSelection = true) ->
if shouldRestoreSelection then @saveSelection()
sel = @selectNode node
text = @readSelectionText sel
if shouldRestoreSelection then @restoreSelection()
text
# Convert "display" text indices to "source" text indices.
computeSourcePositions: (match) ->
# @log "In computeSourcePosition"
# @log match.element.path
# @log match.element.node.data
# the HTML source of the text inside a text element.
# @log "Calculating source position at " + match.element.path
sourceText = match.element.node.data.replace /\n/g, " "
# @log "sourceText is '" + sourceText + "'"
# what gets displayed, when the node is processed by the browser.
displayText = match.element.content
# @log "displayText is '" + displayText + "'"
# The selected charRange in displayText.
displayStart = if match.start? then match.start else 0
displayEnd = if match.end? then match.end else displayText.length
# @log "Display charRange is: " + displayStart + "-" + displayEnd
if displayEnd is 0
# Handle empty text nodes
match.startCorrected = 0
match.endCorrected = 0
return
sourceIndex = 0
displayIndex = 0
until sourceStart? and sourceEnd?
sc = sourceText[sourceIndex]
dc = displayText[displayIndex]
if sc is dc
if displayIndex is displayStart
sourceStart = sourceIndex
displayIndex++
if displayIndex is displayEnd
sourceEnd = sourceIndex + 1
sourceIndex++
match.startCorrected = sourceStart
match.endCorrected = sourceEnd
# @log "computeSourcePosition done. Corrected charRange is: " +
# match.startCorrected + "-" + match.endCorrected
null
# Internal function used to read out the text content of a given node,
# as render by the browser.
# The current implementation uses the browser selection API to do so.
getNodeContent: (node, shouldRestoreSelection = true) ->
if node is @pathStartNode and @expectedContent?
# @log "Returning fake expectedContent for getNodeContent"
@expectedContent
else
@getNodeSelectionText node, shouldRestoreSelection
# Internal function to collect mapping data from a given DOM element.
#
# Input parameters:
# node: the node to scan
# path: the path to the node (relative to rootNode
# parentContent: the content of the node's parent node
# (as rendered by the browser)
# This is used to determine whether the given node is rendered
# at all.
# If not given, it will be assumed that it is rendered
# parentIndex: the starting character offset
# of content of this node's parent node in the rendered content
# index: ths first character offset position in the content of this
# node's parent node
# where the content of this node might start
#
# Returns:
# the first character offset position in the content of this node's
# parent node that is not accounted for by this node
collectPositions: (node, path, parentContent = null, parentIndex = 0, index = 0) ->
# @log "Scanning path " + path
# content = @getNodeContent node, false
pathInfo = @path[path]
content = pathInfo?.content
unless content
# node has no content, not interesting
pathInfo.start = parentIndex + index
pathInfo.end = parentIndex + index
pathInfo.atomic = false
return index
startIndex = if parentContent?
parentContent.indexOf content, index
else
index
if startIndex is -1
# content of node is not present in parent's content - probably hidden,
# or something similar
@log "Content of this not is not present in content of parent, at path " + path
@log "(Content: '" + content + "'.)"
return index
endIndex = startIndex + content.length
atomic = not node.hasChildNodes()
pathInfo.start = parentIndex + startIndex
pathInfo.end = parentIndex + endIndex
pathInfo.atomic = atomic
if not atomic
children = node.childNodes
i = 0
pos = 0
typeCount = Object()
while i < children.length
child = children[i]
nodeName = @getProperNodeName child
oldCount = typeCount[nodeName]
newCount = if oldCount? then oldCount + 1 else 1
typeCount[nodeName] = newCount
childPath = path + "/" + nodeName + (if newCount > 1
"[" + newCount + "]"
else
""
)
pos = @collectPositions child, childPath, content,
parentIndex + startIndex, pos
i++
endIndex
WHITESPACE = /^\s*$/
# Decides whether a given node is a text node that only contains whitespace
isWhitespace: (node) ->
result = switch node.nodeType
when Node.TEXT_NODE
WHITESPACE.test node.data
when Node.ELEMENT_NODE
mightBeEmpty = true
for child in node.childNodes
mightBeEmpty = mightBeEmpty and @isWhitespace child
mightBeEmpty
else false
result
# Internal debug method to verify the consistency of mapping info
_testMap: ->
@log "Verifying map info: was it all properly traversed?"
for i,p of @path
unless p.atomic? then @log i + " is missing data."
@log "Verifying map info: do atomic elements match?"
for i,p of @path when p.atomic
expected = @_corpus[ p.start ... p.end ]
ok = p.content is expected
unless ok then @log "Mismatch on " + i + ": content is '" + p.content + "', range in corpus is '" + expected + "'."
ok
null
# Fake two-phase / pagination support, used for HTML documents
getPageIndex: -> 0
getPageCount: -> 1
getPageIndexForPos: -> 0
isPageMapped: -> true
// Generated by CoffeeScript 1.7.1
(function() {
var __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__slice = [].slice,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
window.DomTextMapper = (function() {
var CONTEXT_LEN, SELECT_CHILDREN_INSTEAD, USE_EMPTY_TEXT_WORKAROUND, USE_TABLE_TEXT_WORKAROUND, WHITESPACE;
DomTextMapper.applicable = function() {
return true;
};
USE_TABLE_TEXT_WORKAROUND = true;
USE_EMPTY_TEXT_WORKAROUND = true;
SELECT_CHILDREN_INSTEAD = ["thead", "tbody", "tfoot", "ol", "a", "caption", "p", "span", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "li", "form"];
CONTEXT_LEN = 32;
DomTextMapper.instances = 0;
function DomTextMapper(id) {
this.id = id;
this._onChange = __bind(this._onChange, this);
this.setRealRoot();
DomTextMapper.instances += 1;
if (this.id == null) {
this.id = "d-t-m #" + DomTextMapper.instances;
}
}
DomTextMapper.prototype.log = function() {
var msg;
msg = 1 <= arguments.length ? __slice.call(arguments, 0) : [];
return console.log.apply(console, [this.id, ": "].concat(__slice.call(msg)));
};
DomTextMapper.prototype._onChange = function(event) {
this.documentChanged();
this.performUpdateOnNode(event.target, false, event.data);
return this.lastScanned = this.timestamp();
};
DomTextMapper.prototype._changeRootNode = function(node) {
var _ref;
if ((_ref = this.rootNode) != null) {
_ref.removeEventListener("domChange", this._onChange);
}
this.rootNode = node;
this.rootNode.addEventListener("domChange", this._onChange);
return node;
};
DomTextMapper.prototype.setRootNode = function(rootNode) {
this.rootWin = window;
return this.pathStartNode = this._changeRootNode(rootNode);
};
DomTextMapper.prototype.setRootId = function(rootId) {
return this.setRootNode(document.getElementById(rootId));
};
DomTextMapper.prototype.setRootIframe = function(iframeId) {
var iframe;
iframe = window.document.getElementById(iframeId);
if (iframe == null) {
throw new Error("Can't find iframe with specified ID!");
}
this.rootWin = iframe.contentWindow;
if (this.rootWin == null) {
throw new Error("Can't access contents of the specified iframe!");
}
this._changeRootNode(this.rootWin.document);
return this.pathStartNode = this.getBody();
};
DomTextMapper.prototype.getDefaultPath = function() {
return this.getPathTo(this.pathStartNode);
};
DomTextMapper.prototype.setRealRoot = function() {
this.rootWin = window;
this._changeRootNode(document);
return this.pathStartNode = this.getBody();
};
DomTextMapper.prototype.documentChanged = function() {
return this.lastDOMChange = this.timestamp();
};
DomTextMapper.prototype.setExpectedContent = function(content) {
return this.expectedContent = content;
};
DomTextMapper.prototype.scan = function() {
var node, path, startTime, t1, t2;
if (this.domStableSince(this.lastScanned)) {
return;
} else {
}
if (!this.pathStartNode.ownerDocument.body.contains(this.pathStartNode)) {
return;
}
startTime = this.timestamp();
this.saveSelection();
this.path = {};
this.traverseSubTree(this.pathStartNode, this.getDefaultPath());
t1 = this.timestamp();
path = this.getPathTo(this.pathStartNode);
node = this.path[path].node;
this.collectPositions(node, path, null, 0, 0);
this.restoreSelection();
this.lastScanned = this.timestamp();
this._corpus = this.path[path].content;
t2 = this.timestamp();
return null;
};
DomTextMapper.prototype.selectPath = function(path, scroll) {
var info, node;
if (scroll == null) {
scroll = false;
}
info = this.path[path];
if (info == null) {
throw new Error("I have no info about a node at " + path);
}
node = info != null ? info.node : void 0;
node || (node = this.lookUpNode(info.path));
return this.selectNode(node, scroll);
};
DomTextMapper.prototype.performUpdateOnNode = function(node, escalating) {
var data, oldIndex, p, parentNode, parentPath, parentPathInfo, path, pathInfo, pathsToDrop, prefix, startTime, _i, _len, _ref;
if (escalating == null) {
escalating = false;
}
if (node == null) {
throw new Error("Called performUpdate with a null node!");
}
if (this.path == null) {
return;
}
startTime = this.timestamp();
if (!escalating) {
this.saveSelection();
}
path = this.getPathTo(node);
pathInfo = this.path[path];
if (pathInfo == null) {
this.performUpdateOnNode(node.parentNode, true);
if (!escalating) {
this.restoreSelection();
}
return;
}
if (pathInfo.node === node && pathInfo.content === this.getNodeContent(node, false)) {
prefix = path + "/";
pathsToDrop = p;
pathsToDrop = [];
_ref = this.path;
for (p in _ref) {
data = _ref[p];
if (this.stringStartsWith(p, prefix)) {
pathsToDrop.push(p);
}
}
for (_i = 0, _len = pathsToDrop.length; _i < _len; _i++) {
p = pathsToDrop[_i];
delete this.path[p];
}
this.traverseSubTree(node, path);
if (pathInfo.node === this.pathStartNode) {
this.collectPositions(node, path, null, 0, 0);
} else {
parentPath = this.parentPath(path);
parentPathInfo = this.path[parentPath];
if (parentPathInfo == null) {
throw new Error("While performing update on node " + path + ", no path info found for parent path: " + parentPath);
}
oldIndex = node === node.parentNode.firstChild ? 0 : this.path[this.getPathTo(node.previousSibling)].end - parentPathInfo.start;
this.collectPositions(node, path, parentPathInfo.content, parentPathInfo.start, oldIndex);
}
} else {
if (pathInfo.node !== this.pathStartNode) {
parentNode = node.parentNode != null ? node.parentNode : (parentPath = this.parentPath(path), this.lookUpNode(parentPath));
this.performUpdateOnNode(parentNode, true);
} else {
throw new Error("Can not keep up with the changes, since even the node configured as path start node was replaced.");
}
}
if (!escalating) {
return this.restoreSelection();
}
};
DomTextMapper.prototype.getInfoForPath = function(path) {
var result;
if (this.path == null) {
throw new Error("Can't get info before running a scan() !");
}
result = this.path[path];
if (result == null) {
throw new Error("Found no info for path '" + path + "'!");
}
return result;
};
DomTextMapper.prototype.getStartPosForPath = function(path) {
var info, _ref;
info = this.getInfoForPath(path);
return (_ref = info.start) != null ? _ref : this.getFirstPosAfter(info.node);
};
DomTextMapper.prototype.getFirstPosAfter = function(node) {
var info, path, _ref;
if (node.nextSibling != null) {
node = node.nextSibling;
path = this.getPathTo(node);
info = this.path[path];
return (_ref = info.start) != null ? _ref : this.getFirstPosAfter(node);
} else {
return this.getFirstPosAfter(node.parentNode);
}
};
DomTextMapper.prototype.getEndPosForPath = function(path) {
var info, _ref;
info = this.getInfoForPath(path);
return (_ref = info.end) != null ? _ref : this.getFirstPosBefore(info.node);
};
DomTextMapper.prototype.getFirstPosBefore = function(node) {
var info, path, _ref;
if (node.previousSibling != null) {
node = node.previousSibling;
path = this.getPathTo(node);
info = this.path[path];
return (_ref = info.end) != null ? _ref : this.getFirstPosBefore(node);
} else {
return this.getFirstPosBefore(node.parentNode);
}
};
DomTextMapper.prototype.getInfoForNode = function(node) {
if (node == null) {
throw new Error("Called getInfoForNode(node) with null node!");
}
return this.getInfoForPath(this.getPathTo(node));
};
DomTextMapper.prototype.getStartPosForNode = function(node) {
if (node == null) {
throw new Error("Called getStartInfoForNode(node) with null node!");
}
return this.getStartPosForPath(this.getPathTo(node));
};
DomTextMapper.prototype.getEndPosForNode = function(node) {
if (node == null) {
throw new Error("Called getInfoForNode(node) with null node!");
}
return this.getEndPosForPath(this.getPathTo(node));
};
DomTextMapper.prototype.getMappingsForCharRanges = function(charRanges) {
var charRange, _i, _len, _results;
_results = [];
for (_i = 0, _len = charRanges.length; _i < _len; _i++) {
charRange = charRanges[_i];
_results.push(this.getMappingsForCharRange(charRange.start, charRange.end));
}
return _results;
};
DomTextMapper.prototype.getContentForPath = function(path) {
if (path == null) {
path = null;
}
if (path == null) {
path = this.getDefaultPath();
}
return this.path[path].content;
};
DomTextMapper.prototype.getLengthForPath = function(path) {
if (path == null) {
path = null;
}
if (path == null) {
path = this.getDefaultPath();
}
return this.path[path].length;
};
DomTextMapper.prototype.getDocLength = function() {
return this._corpus.length;
};
DomTextMapper.prototype.getCorpus = function() {
return this._corpus;
};
DomTextMapper.prototype.getContextForCharRange = function(start, end) {
var prefix, prefixStart, suffix;
if (start < 0) {
throw Error("Negative range start is invalid!");
}
if (end > this._corpus.length) {
throw Error("Range end is after the end of corpus!");
}
prefixStart = Math.max(0, start - CONTEXT_LEN);
prefix = this._corpus.slice(prefixStart, start);
suffix = this._corpus.slice(end, end + CONTEXT_LEN);
return [prefix.trim(), suffix.trim()];
};
DomTextMapper.prototype.getMappingsForCharRange = function(start, end) {
var endInfo, endMapping, endNode, endOffset, endPath, info, mappings, p, r, result, startInfo, startMapping, startNode, startOffset, startPath, _ref;
if (!((start != null) && (end != null))) {
throw new Error("start and end is required!");
}
this.scan();
mappings = [];
_ref = this.path;
for (p in _ref) {
info = _ref[p];
if (info.atomic && this.regions_overlap(info.start, info.end, start, end)) {
(function(_this) {
return (function(info) {
var full, mapping;
mapping = {
element: info
};
full = start <= info.start && info.end <= end;
if (full) {
mapping.full = true;
mapping.wanted = info.content;
mapping.yields = info.content;
mapping.startCorrected = 0;
mapping.endCorrected = 0;
} else {
if (info.node.nodeType === Node.TEXT_NODE) {
if (start <= info.start) {
mapping.end = end - info.start;
mapping.wanted = info.content.substr(0, mapping.end);
} else if (info.end <= end) {
mapping.start = start - info.start;
mapping.wanted = info.content.substr(mapping.start);
} else {
mapping.start = start - info.start;
mapping.end = end - info.start;
mapping.wanted = info.content.substr(mapping.start, mapping.end - mapping.start);
}
_this.computeSourcePositions(mapping);
mapping.yields = info.node.data.substr(mapping.startCorrected, mapping.endCorrected - mapping.startCorrected);
} else if ((info.node.nodeType === Node.ELEMENT_NODE) && (info.node.tagName.toLowerCase() === "img")) {
_this.log("Can not select a sub-string from the title of an image. Selecting all.");
mapping.full = true;
mapping.wanted = info.content;
} else {
_this.log("Warning: no idea how to handle partial mappings for node type " + info.node.nodeType);
if (info.node.tagName != null) {
_this.log("Tag: " + info.node.tagName);
}
_this.log("Selecting all.");
mapping.full = true;
mapping.wanted = info.content;
}
}
return mappings.push(mapping);
});
})(this)(info);
}
}
if (mappings.length === 0) {
this.log("Collecting nodes for [" + start + ":" + end + "]");
this.log("Should be: '" + this._corpus.slice(start, end) + "'.");
throw new Error("No mappings found for [" + start + ":" + end + "]!");
}
mappings = mappings.sort(function(a, b) {
return a.element.start - b.element.start;
});
r = this.rootWin.document.createRange();
startMapping = mappings[0];
startNode = startMapping.element.node;
startPath = startMapping.element.path;
startOffset = startMapping.startCorrected;
if (startMapping.full) {
r.setStartBefore(startNode);
startInfo = startPath;
} else {
r.setStart(startNode, startOffset);
startInfo = startPath + ":" + startOffset;
}
endMapping = mappings[mappings.length - 1];
endNode = endMapping.element.node;
endPath = endMapping.element.path;
endOffset = endMapping.endCorrected;
if (endMapping.full) {
r.setEndAfter(endNode);
endInfo = endPath;
} else {
r.setEnd(endNode, endOffset);
endInfo = endPath + ":" + endOffset;
}
result = {
mappings: mappings,
realRange: r,
rangeInfo: {
startPath: startPath,
startOffset: startOffset,
startInfo: startInfo,
endPath: endPath,
endOffset: endOffset,
endInfo: endInfo
},
safeParent: r.commonAncestorContainer
};
return {
sections: [result]
};
};
DomTextMapper.prototype.timestamp = function() {
return new Date().getTime();
};
DomTextMapper.prototype.stringStartsWith = function(string, prefix) {
if (!prefix) {
throw Error("Requires a non-empty prefix!");
}
return string.slice(0, prefix.length) === prefix;
};
DomTextMapper.prototype.stringEndsWith = function(string, suffix) {
if (!suffix) {
throw Error("Requires a non-empty suffix!");
}
return string.slice(string.length - suffix.length, string.length) === suffix;
};
DomTextMapper.prototype.parentPath = function(path) {
return path.substr(0, path.lastIndexOf("/"));
};
DomTextMapper.prototype.domChangedSince = function(timestamp) {
if ((this.lastDOMChange != null) && (timestamp != null)) {
return this.lastDOMChange > timestamp;
} else {
return true;
}
};
DomTextMapper.prototype.domStableSince = function(timestamp) {
return !this.domChangedSince(timestamp);
};
DomTextMapper.prototype.getProperNodeName = function(node) {
var nodeName;
nodeName = node.nodeName;
switch (nodeName) {
case "#text":
return "text()";
case "#comment":
return "comment()";
case "#cdata-section":
return "cdata-section()";
default:
return nodeName;
}
};
DomTextMapper.prototype.getNodePosition = function(node) {
var pos, tmp;
pos = 0;
tmp = node;
while (tmp) {
if (tmp.nodeName === node.nodeName) {
pos++;
}
tmp = tmp.previousSibling;
}
return pos;
};
DomTextMapper.prototype.getPathSegment = function(node) {
var name, pos;
name = this.getProperNodeName(node);
pos = this.getNodePosition(node);
return name + (pos > 1 ? "[" + pos + "]" : "");
};
DomTextMapper.prototype.getPathTo = function(node) {
var xpath;
xpath = '';
while (node !== this.rootNode) {
if (node == null) {
throw new Error("Called getPathTo on a node which was not a descendant of @rootNode. " + this.rootNode);
}
xpath = (this.getPathSegment(node)) + '/' + xpath;
node = node.parentNode;
}
xpath = (this.rootNode.ownerDocument != null ? './' : '/') + xpath;
xpath = xpath.replace(/\/$/, '');
return xpath;
};
DomTextMapper.prototype.traverseSubTree = function(node, path, invisible, verbose) {
var child, cont, subpath, _i, _len, _ref;
if (invisible == null) {
invisible = false;
}
if (verbose == null) {
verbose = false;
}
this.underTraverse = path;
cont = this.getNodeContent(node, false);
this.path[path] = {
path: path,
content: cont,
length: cont.length,
node: node
};
if (cont.length) {
if (verbose) {
this.log("Collected info about path " + path);
}
if (invisible) {
this.log("Something seems to be wrong. I see visible content @ " + path + ", while some of the ancestor nodes reported empty contents. Probably a new selection API bug....");
this.log("Anyway, text is '" + cont + "'.");
}
} else {
if (verbose) {
this.log("Found no content at path " + path);
}
invisible = true;
}
if (node.hasChildNodes()) {
_ref = node.childNodes;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
child = _ref[_i];
subpath = path + '/' + (this.getPathSegment(child));
this.traverseSubTree(child, subpath, invisible, verbose);
}
}
return null;
};
DomTextMapper.prototype.getBody = function() {
return (this.rootWin.document.getElementsByTagName("body"))[0];
};
DomTextMapper.prototype.regions_overlap = function(start1, end1, start2, end2) {
return start1 < end2 && start2 < end1;
};
DomTextMapper.prototype.lookUpNode = function(path) {
var doc, node, results, _ref;
doc = (_ref = this.rootNode.ownerDocument) != null ? _ref : this.rootNode;
results = doc.evaluate(path, this.rootNode, null, 0, null);
return node = results.iterateNext();
};
DomTextMapper.prototype.saveSelection = function() {
var i, sel;
if (this.savedSelection != null) {
this.log("Selection saved at:");
this.log(this.selectionSaved);
throw new Error("Selection already saved!");
}
sel = this.rootWin.getSelection();
this.savedSelection = (function() {
var _i, _ref, _results;
_results = [];
for (i = _i = 0, _ref = sel.rangeCount; 0 <= _ref ? _i < _ref : _i > _ref; i = 0 <= _ref ? ++_i : --_i) {
_results.push(sel.getRangeAt(i));
}
return _results;
})();
return this.selectionSaved = (new Error("selection was saved here")).stack;
};
DomTextMapper.prototype.restoreSelection = function() {
var range, sel, _i, _len, _ref;
if (this.savedSelection == null) {
throw new Error("No selection to restore.");
}
sel = this.rootWin.getSelection();
sel.removeAllRanges();
_ref = this.savedSelection;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
range = _ref[_i];
sel.addRange(range);
}
return delete this.savedSelection;
};
DomTextMapper.prototype.selectNode = function(node, scroll) {
var children, exception, realRange, sel, sn, _ref;
if (scroll == null) {
scroll = false;
}
if (node == null) {
throw new Error("Called selectNode with null node!");
}
sel = this.rootWin.getSelection();
sel.removeAllRanges();
realRange = this.rootWin.document.createRange();
if (node.nodeType === Node.ELEMENT_NODE && node.hasChildNodes() && (_ref = node.tagName.toLowerCase(), __indexOf.call(SELECT_CHILDREN_INSTEAD, _ref) >= 0)) {
children = node.childNodes;
realRange.setStartBefore(children[0]);
realRange.setEndAfter(children[children.length - 1]);
sel.addRange(realRange);
} else {
if (USE_TABLE_TEXT_WORKAROUND && node.nodeType === Node.TEXT_NODE && node.parentNode.tagName.toLowerCase() === "table") {
} else {
try {
realRange.setStartBefore(node);
realRange.setEndAfter(node);
sel.addRange(realRange);
} catch (_error) {
exception = _error;
if (!(USE_EMPTY_TEXT_WORKAROUND && this.isWhitespace(node))) {
this.log("Warning: failed to scan element @ " + this.underTraverse);
this.log("Content is: " + node.innerHTML);
this.log("We won't be able to properly anchor to any text inside this element.");
}
}
}
}
if (scroll) {
sn = node;
while ((sn != null) && (sn.scrollIntoViewIfNeeded == null)) {
sn = sn.parentNode;
}
if (sn != null) {
sn.scrollIntoViewIfNeeded();
} else {
this.log("Failed to scroll to element. (Browser does not support scrollIntoViewIfNeeded?)");
}
}
return sel;
};
DomTextMapper.prototype.readSelectionText = function(sel) {
sel || (sel = this.rootWin.getSelection());
return sel.toString().trim().replace(/\n/g, " ").replace(/\s{2,}/g, " ");
};
DomTextMapper.prototype.getNodeSelectionText = function(node, shouldRestoreSelection) {
var sel, text;
if (shouldRestoreSelection == null) {
shouldRestoreSelection = true;
}
if (shouldRestoreSelection) {
this.saveSelection();
}
sel = this.selectNode(node);
text = this.readSelectionText(sel);
if (shouldRestoreSelection) {
this.restoreSelection();
}
return text;
};
DomTextMapper.prototype.computeSourcePositions = function(match) {
var dc, displayEnd, displayIndex, displayStart, displayText, sc, sourceEnd, sourceIndex, sourceStart, sourceText;
sourceText = match.element.node.data.replace(/\n/g, " ");
displayText = match.element.content;
displayStart = match.start != null ? match.start : 0;
displayEnd = match.end != null ? match.end : displayText.length;
if (displayEnd === 0) {
match.startCorrected = 0;
match.endCorrected = 0;
return;
}
sourceIndex = 0;
displayIndex = 0;
while (!((sourceStart != null) && (sourceEnd != null))) {
sc = sourceText[sourceIndex];
dc = displayText[displayIndex];
if (sc === dc) {
if (displayIndex === displayStart) {
sourceStart = sourceIndex;
}
displayIndex++;
if (displayIndex === displayEnd) {
sourceEnd = sourceIndex + 1;
}
}
sourceIndex++;
}
match.startCorrected = sourceStart;
match.endCorrected = sourceEnd;
return null;
};
DomTextMapper.prototype.getNodeContent = function(node, shouldRestoreSelection) {
if (shouldRestoreSelection == null) {
shouldRestoreSelection = true;
}
if (node === this.pathStartNode && (this.expectedContent != null)) {
return this.expectedContent;
} else {
return this.getNodeSelectionText(node, shouldRestoreSelection);
}
};
DomTextMapper.prototype.collectPositions = function(node, path, parentContent, parentIndex, index) {
var atomic, child, childPath, children, content, endIndex, i, newCount, nodeName, oldCount, pathInfo, pos, startIndex, typeCount;
if (parentContent == null) {
parentContent = null;
}
if (parentIndex == null) {
parentIndex = 0;
}
if (index == null) {
index = 0;
}
pathInfo = this.path[path];
content = pathInfo != null ? pathInfo.content : void 0;
if (!content) {
pathInfo.start = parentIndex + index;
pathInfo.end = parentIndex + index;
pathInfo.atomic = false;
return index;
}
startIndex = parentContent != null ? parentContent.indexOf(content, index) : index;
if (startIndex === -1) {
this.log("Content of this not is not present in content of parent, at path " + path);
this.log("(Content: '" + content + "'.)");
return index;
}
endIndex = startIndex + content.length;
atomic = !node.hasChildNodes();
pathInfo.start = parentIndex + startIndex;
pathInfo.end = parentIndex + endIndex;
pathInfo.atomic = atomic;
if (!atomic) {
children = node.childNodes;
i = 0;
pos = 0;
typeCount = Object();
while (i < children.length) {
child = children[i];
nodeName = this.getProperNodeName(child);
oldCount = typeCount[nodeName];
newCount = oldCount != null ? oldCount + 1 : 1;
typeCount[nodeName] = newCount;
childPath = path + "/" + nodeName + (newCount > 1 ? "[" + newCount + "]" : "");
pos = this.collectPositions(child, childPath, content, parentIndex + startIndex, pos);
i++;
}
}
return endIndex;
};
WHITESPACE = /^\s*$/;
DomTextMapper.prototype.isWhitespace = function(node) {
var child, mightBeEmpty, result;
result = (function() {
var _i, _len, _ref;
switch (node.nodeType) {
case Node.TEXT_NODE:
return WHITESPACE.test(node.data);
case Node.ELEMENT_NODE:
mightBeEmpty = true;
_ref = node.childNodes;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
child = _ref[_i];
mightBeEmpty = mightBeEmpty && this.isWhitespace(child);
}
return mightBeEmpty;
default:
return false;
}
}).call(this);
return result;
};
DomTextMapper.prototype._testMap = function() {
var expected, i, ok, p, _ref, _ref1;
this.log("Verifying map info: was it all properly traversed?");
_ref = this.path;
for (i in _ref) {
p = _ref[i];
if (p.atomic == null) {
this.log(i + " is missing data.");
}
}
this.log("Verifying map info: do atomic elements match?");
_ref1 = this.path;
for (i in _ref1) {
p = _ref1[i];
if (!p.atomic) {
continue;
}
expected = this._corpus.slice(p.start, p.end);
ok = p.content === expected;
if (!ok) {
this.log("Mismatch on " + i + ": content is '" + p.content + "', range in corpus is '" + expected + "'.");
}
ok;
}
return null;
};
DomTextMapper.prototype.getPageIndex = function() {
return 0;
};
DomTextMapper.prototype.getPageCount = function() {
return 1;
};
DomTextMapper.prototype.getPageIndexForPos = function() {
return 0;
};
DomTextMapper.prototype.isPageMapped = function() {
return true;
};
return DomTextMapper;
})();
}).call(this);
# Text search library
class window.DomTextMatcher
constructor: (@corpus) ->
# Search for text using exact string matching
#
# Parameters:
# pattern: what to search for
#
# distinct: forbid overlapping matches? (defaults to true)
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchExact: (pattern, distinct = true, caseSensitive = false) ->
if not @pm then @pm = new window.DTM_ExactMatcher
@pm.setDistinct(distinct)
@pm.setCaseSensitive(caseSensitive)
@_search @pm, pattern
# Search for text using regular expressions
#
# Parameters:
# pattern: what to search for
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchRegex: (pattern, caseSensitive = false) ->
if not @rm then @rm = new window.DTM_RegexMatcher
@rm.setCaseSensitive(caseSensitive)
@_search @rm, pattern
# Search for text using fuzzy text matching
#
# Parameters:
# pattern: what to search for
#
# pos: where to start searching
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
# matchDistance and
# matchThreshold:
# fine-tuning parameters for the d-m-p library.
# See http://code.google.com/p/google-diff-match-patch/wiki/API for details.
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchFuzzy: (pattern, pos, caseSensitive = false, options = {}) ->
@ensureDMP()
@dmp.setMatchDistance options.matchDistance ? 1000
@dmp.setMatchThreshold options.matchThreshold ? 0.5
@dmp.setCaseSensitive caseSensitive
@_search @dmp, pattern, pos, options
searchFuzzyWithContext: (prefix, suffix, pattern, expectedStart = null, expectedEnd = null, caseSensitive = false, options = {}) ->
@ensureDMP()
# No context, to joy
unless (prefix? and suffix?)
throw new Error "Can not do a context-based fuzzy search
with missing context!"
# Get full document length
len = @corpus().length
# Get a starting position for the prefix search
expectedPrefixStart = if expectedStart?
i = expectedStart - prefix.length
if i < 0
0
else i
else
Math.floor(len / 2)
# Do the fuzzy search for the prefix
@dmp.setMatchDistance options.contextMatchDistance ? len * 2
@dmp.setMatchThreshold options.contextMatchThreshold ? 0.5
prefixResult = @dmp.search @corpus(), prefix, expectedPrefixStart
# If the prefix is not found, give up
unless prefixResult.length then return matches: []
# This is where the prefix was found
prefixStart = prefixResult[0].start
prefixEnd = prefixResult[0].end
# Let's find out where do we expect to find the suffix!
# We need the pattern's length.
patternLength = if pattern?
# If we have a pattern, use it's length
pattern.length
else if expectedStart? and expectedEnd?
# We don't have a pattern, but at least
# have valid expectedStart and expectedEnd values,
# get a length from that.
expectedEnd - expectedStart
else
# We have no idea about where the suffix could be.
# Let's just pull a number out of ... thin air.
64
# Get the part of text that is after the prefix
remainingText = @corpus().substr prefixEnd
# Calculate expected position
expectedSuffixStart = patternLength
# Do the fuzzy search for the suffix
suffixResult = @dmp.search remainingText, suffix, expectedSuffixStart
# If the suffix is not found, give up
unless suffixResult.length then return matches: []
# This is where the suffix was found
suffixStart = prefixEnd + suffixResult[0].start
suffixEnd = prefixEnd + suffixResult[0].end
# This if the range between the prefix and the suffix
charRange =
start: prefixEnd
end: suffixStart
# Get the configured threshold for the pattern matching
matchThreshold = options.patternMatchThreshold ? 0.5
# See how good a match we have
analysis = @_analyzeMatch pattern, charRange, true
# Should we try to find a better match by moving the
# initial match around a little bit, even if this has
# a negative impact on the similarity of the context?
if pattern? and options.flexContext and not analysis.exact
# Do we have and exact match for the quote around here?
if not @pm then @pm = new window.DTM_ExactMatcher
@pm.setDistinct false
@pm.setCaseSensitive false
flexMatches = @pm.search @corpus()[prefixStart..suffixEnd], pattern
delete candidate
bestError = 2
for flexMatch in flexMatches
# Calculate the range that matched the quote
flexRange =
start: prefixStart + flexMatch.start
end: prefixStart + flexMatch.end
# Check how the prefix would fare
prefixRange = start: prefixStart, end: flexRange.start
a1 = @_analyzeMatch prefix, prefixRange, true
prefixError = if a1.exact then 0 else a1.comparison.errorLevel
# Check how the suffix would fare
suffixRange = start: flexRange.end, end: suffixEnd
a2 = @_analyzeMatch suffix, suffixRange, true
suffixError = if a2.exact then 0 else a2.comparison.errorLevel
# Did we at least one match?
if a1.exact or a2.exact
# Yes, we did. Calculate the total error
totalError = prefixError + suffixError
# Is this better than our best bet?
if totalError < bestError
# This is our best candidate so far. Store it.
candidate = flexRange
bestError = totalError
if candidate?
console.log "flexContext adjustment: we found a better candidate!"
charRange = candidate
analysis = @_analyzeMatch pattern, charRange, true
# Do we have to compare what we found to a pattern?
if (not pattern?) or # "No pattern, nothing to compare. Assume it's OK."
analysis.exact or # "Found text matches exactly to pattern"
(analysis.comparison.errorLevel <= matchThreshold) # still acceptable
# Collect the results
match = {}
for obj in [charRange, analysis]
for k, v of obj
match[k] = v
return matches: [match]
# console.log "Rejecting the match, because error level is too high. (" +
# errorLevel + ")"
return matches: []
# ===== Private methods (never call from outside the module) =======
# Do some normalization to get a "canonical" form of a string.
# Used to even out some browser differences.
_normalizeString: (string) -> (string.replace /\s{2,}/g, " ").trim()
# Search for text with a custom matcher object
#
# Parameters:
# matcher: the object to use for doing the plain-text part of the search
# pattern: what to search for
# pos: where do we expect to find it
#
# A list of matches is returned.
#
# Each match has "start", "end", "found" and "nodes" fields.
# start and end specify where the pattern was found;
# "found" is the matching slice.
# Nodes is the list of matching nodes, with details about the matches.
#
# If no match is found, an empty list is returned.
_search: (matcher, pattern, pos, options = {}) ->
# Prepare and check the pattern
unless pattern? then throw new Error "Can't search for null pattern!"
pattern = pattern.trim()
unless pattern? then throw new Error "Can't search an for empty pattern!"
fuzzyComparison = options.withFuzzyComparison ? false
t1 = @timestamp()
# Do the text search
textMatches = matcher.search @corpus(), pattern, pos, options
t2 = @timestamp()
matches = []
for textMatch in textMatches
do (textMatch) =>
# See how good a match we have
analysis = @_analyzeMatch pattern, textMatch, fuzzyComparison
# Collect the results
match = {}
for obj in [textMatch, analysis]
for k, v of obj
match[k] = v
matches.push match
null
t3 = @timestamp()
result =
matches: matches
time:
phase1_textMatching: t2 - t1
phase2_matchMapping: t3 - t2
total: t3 - t1
result
timestamp: -> new Date().getTime()
# Read a match returned by the matcher engine, and compare it with the pattern
_analyzeMatch: (pattern, charRange, useFuzzy = false) ->
expected = @_normalizeString pattern
found = @_normalizeString @corpus()[charRange.start .. charRange.end - 1]
result =
found: found
exact: found is expected
# If the match is not exact, check whether the changes are
# only case differences
unless result.exact then result.exactExceptCase =
expected.toLowerCase() is found.toLowerCase()
# if we are interested in fuzzy comparison, calculate that, too
if not result.exact and useFuzzy
@ensureDMP()
result.comparison = @dmp.compare expected, found
result
ensureDMP: ->
unless @dmp?
unless window.DTM_DMPMatcher?
throw new Error "DTM_DMPMatcher is not available.
Have you loaded the text match engines?"
@dmp = new window.DTM_DMPMatcher
// Generated by CoffeeScript 1.7.1
(function() {
window.DomTextMatcher = (function() {
function DomTextMatcher(corpus) {
this.corpus = corpus;
}
DomTextMatcher.prototype.searchExact = function(pattern, distinct, caseSensitive) {
if (distinct == null) {
distinct = true;
}
if (caseSensitive == null) {
caseSensitive = false;
}
if (!this.pm) {
this.pm = new window.DTM_ExactMatcher;
}
this.pm.setDistinct(distinct);
this.pm.setCaseSensitive(caseSensitive);
return this._search(this.pm, pattern);
};
DomTextMatcher.prototype.searchRegex = function(pattern, caseSensitive) {
if (caseSensitive == null) {
caseSensitive = false;
}
if (!this.rm) {
this.rm = new window.DTM_RegexMatcher;
}
this.rm.setCaseSensitive(caseSensitive);
return this._search(this.rm, pattern);
};
DomTextMatcher.prototype.searchFuzzy = function(pattern, pos, caseSensitive, options) {
var _ref, _ref1;
if (caseSensitive == null) {
caseSensitive = false;
}
if (options == null) {
options = {};
}
this.ensureDMP();
this.dmp.setMatchDistance((_ref = options.matchDistance) != null ? _ref : 1000);
this.dmp.setMatchThreshold((_ref1 = options.matchThreshold) != null ? _ref1 : 0.5);
this.dmp.setCaseSensitive(caseSensitive);
return this._search(this.dmp, pattern, pos, options);
};
DomTextMatcher.prototype.searchFuzzyWithContext = function(prefix, suffix, pattern, expectedStart, expectedEnd, caseSensitive, options) {
var a1, a2, analysis, bestError, candidate, charRange, expectedPrefixStart, expectedSuffixStart, flexMatch, flexMatches, flexRange, i, k, len, match, matchThreshold, obj, patternLength, prefixEnd, prefixError, prefixRange, prefixResult, prefixStart, remainingText, suffixEnd, suffixError, suffixRange, suffixResult, suffixStart, totalError, v, _i, _j, _len, _len1, _ref, _ref1, _ref2, _ref3;
if (expectedStart == null) {
expectedStart = null;
}
if (expectedEnd == null) {
expectedEnd = null;
}
if (caseSensitive == null) {
caseSensitive = false;
}
if (options == null) {
options = {};
}
this.ensureDMP();
if (!((prefix != null) && (suffix != null))) {
throw new Error("Can not do a context-based fuzzy search with missing context!");
}
len = this.corpus().length;
expectedPrefixStart = expectedStart != null ? (i = expectedStart - prefix.length, i < 0 ? 0 : i) : Math.floor(len / 2);
this.dmp.setMatchDistance((_ref = options.contextMatchDistance) != null ? _ref : len * 2);
this.dmp.setMatchThreshold((_ref1 = options.contextMatchThreshold) != null ? _ref1 : 0.5);
prefixResult = this.dmp.search(this.corpus(), prefix, expectedPrefixStart);
if (!prefixResult.length) {
return {
matches: []
};
}
prefixStart = prefixResult[0].start;
prefixEnd = prefixResult[0].end;
patternLength = pattern != null ? pattern.length : (expectedStart != null) && (expectedEnd != null) ? expectedEnd - expectedStart : 64;
remainingText = this.corpus().substr(prefixEnd);
expectedSuffixStart = patternLength;
suffixResult = this.dmp.search(remainingText, suffix, expectedSuffixStart);
if (!suffixResult.length) {
return {
matches: []
};
}
suffixStart = prefixEnd + suffixResult[0].start;
suffixEnd = prefixEnd + suffixResult[0].end;
charRange = {
start: prefixEnd,
end: suffixStart
};
matchThreshold = (_ref2 = options.patternMatchThreshold) != null ? _ref2 : 0.5;
analysis = this._analyzeMatch(pattern, charRange, true);
if ((pattern != null) && options.flexContext && !analysis.exact) {
if (!this.pm) {
this.pm = new window.DTM_ExactMatcher;
}
this.pm.setDistinct(false);
this.pm.setCaseSensitive(false);
flexMatches = this.pm.search(this.corpus().slice(prefixStart, +suffixEnd + 1 || 9e9), pattern);
delete candidate;
bestError = 2;
for (_i = 0, _len = flexMatches.length; _i < _len; _i++) {
flexMatch = flexMatches[_i];
flexRange = {
start: prefixStart + flexMatch.start,
end: prefixStart + flexMatch.end
};
prefixRange = {
start: prefixStart,
end: flexRange.start
};
a1 = this._analyzeMatch(prefix, prefixRange, true);
prefixError = a1.exact ? 0 : a1.comparison.errorLevel;
suffixRange = {
start: flexRange.end,
end: suffixEnd
};
a2 = this._analyzeMatch(suffix, suffixRange, true);
suffixError = a2.exact ? 0 : a2.comparison.errorLevel;
if (a1.exact || a2.exact) {
totalError = prefixError + suffixError;
if (totalError < bestError) {
candidate = flexRange;
bestError = totalError;
}
}
}
if (candidate != null) {
console.log("flexContext adjustment: we found a better candidate!");
charRange = candidate;
analysis = this._analyzeMatch(pattern, charRange, true);
}
}
if ((pattern == null) || analysis.exact || (analysis.comparison.errorLevel <= matchThreshold)) {
match = {};
_ref3 = [charRange, analysis];
for (_j = 0, _len1 = _ref3.length; _j < _len1; _j++) {
obj = _ref3[_j];
for (k in obj) {
v = obj[k];
match[k] = v;
}
}
return {
matches: [match]
};
}
return {
matches: []
};
};
DomTextMatcher.prototype._normalizeString = function(string) {
return (string.replace(/\s{2,}/g, " ")).trim();
};
DomTextMatcher.prototype._search = function(matcher, pattern, pos, options) {
var fuzzyComparison, matches, result, t1, t2, t3, textMatch, textMatches, _fn, _i, _len, _ref;
if (options == null) {
options = {};
}
if (pattern == null) {
throw new Error("Can't search for null pattern!");
}
pattern = pattern.trim();
if (pattern == null) {
throw new Error("Can't search an for empty pattern!");
}
fuzzyComparison = (_ref = options.withFuzzyComparison) != null ? _ref : false;
t1 = this.timestamp();
textMatches = matcher.search(this.corpus(), pattern, pos, options);
t2 = this.timestamp();
matches = [];
_fn = (function(_this) {
return function(textMatch) {
var analysis, k, match, obj, v, _j, _len1, _ref1;
analysis = _this._analyzeMatch(pattern, textMatch, fuzzyComparison);
match = {};
_ref1 = [textMatch, analysis];
for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) {
obj = _ref1[_j];
for (k in obj) {
v = obj[k];
match[k] = v;
}
}
matches.push(match);
return null;
};
})(this);
for (_i = 0, _len = textMatches.length; _i < _len; _i++) {
textMatch = textMatches[_i];
_fn(textMatch);
}
t3 = this.timestamp();
result = {
matches: matches,
time: {
phase1_textMatching: t2 - t1,
phase2_matchMapping: t3 - t2,
total: t3 - t1
}
};
return result;
};
DomTextMatcher.prototype.timestamp = function() {
return new Date().getTime();
};
DomTextMatcher.prototype._analyzeMatch = function(pattern, charRange, useFuzzy) {
var expected, found, result;
if (useFuzzy == null) {
useFuzzy = false;
}
expected = this._normalizeString(pattern);
found = this._normalizeString(this.corpus().slice(charRange.start, +(charRange.end - 1) + 1 || 9e9));
result = {
found: found,
exact: found === expected
};
if (!result.exact) {
result.exactExceptCase = expected.toLowerCase() === found.toLowerCase();
}
if (!result.exact && useFuzzy) {
this.ensureDMP();
result.comparison = this.dmp.compare(expected, found);
}
return result;
};
DomTextMatcher.prototype.ensureDMP = function() {
if (this.dmp == null) {
if (window.DTM_DMPMatcher == null) {
throw new Error("DTM_DMPMatcher is not available. Have you loaded the text match engines?");
}
return this.dmp = new window.DTM_DMPMatcher;
}
};
return DomTextMatcher;
})();
}).call(this);
# Common functions for all page-based document mapper modules
class window.PageTextMapperCore
CONTEXT_LEN: 32
# Get the page index for a given character position
getPageIndexForPos: (pos) ->
for info in @pageInfo
if info.start <= pos < info.end
return info.index
console.log "Not on page " + info.index
return -1
# A new page was rendered
_onPageRendered: (index) =>
#console.log "Allegedly rendered page #" + index
# Is it really rendered?
unless @_isPageRendered(index) and @pageInfo[index]
# console.log "Page #" + index + " is not really rendered yet."
setTimeout (=> @_onPageRendered index), 1000
return
# Collect info about the new DOM subtree
@_mapPage @pageInfo[index]
# Determine whether a given page has been rendered and mapped
isPageMapped: (index) ->
return @pageInfo[index]?.domMapper?
# Create the mappings for a given page
_mapPage: (info) ->
# console.log "Mapping page", info.index
info.node = @getRootNodeForPage info.index
info.domMapper = new DomTextMapper("d-t-m for page #" + info.index)
info.domMapper.setRootNode info.node
info.domMapper.documentChanged()
if @requiresSmartStringPadding
info.domMapper.setExpectedContent info.content
info.domMapper.scan()
renderedContent = info.domMapper.getCorpus()
if renderedContent isnt info.content
console.log "Oops. Mismatch between rendered and extracted text, while mapping page #" + info.index + "!"
console.trace()
console.log "Rendered: " + renderedContent
console.log "Extracted: " + info.content
# Announce the newly available page
setTimeout ->
event = document.createEvent "UIEvents"
event.initUIEvent "docPageMapped", false, false, window, 0
event.pageIndex = info.index
window.dispatchEvent event
# Update the mappings for a given page
_updateMap: (info) ->
#console.log "Updating mappings for page #" + info.index
info.domMapper.documentChanged()
info.domMapper.scan()
# Delete the mappings for a given page
_unmapPage: (info) ->
delete info.domMapper
# Announce the unavailable page
event = document.createEvent "UIEvents"
event.initUIEvent "docPageUnmapped", false, false, window, 0
event.pageIndex = info.index
window.dispatchEvent event
# Announce scrolling
_onScroll: ->
event = document.createEvent "UIEvents"
event.initUIEvent "docPageScrolling", false, false, window, 0
window.dispatchEvent event
# Look up info about a give DOM node, uniting page and node info
getInfoForNode: (node) ->
pageData = @getPageForNode node
# Give up if the given page is not mapped yet
return null unless pageData.domMapper
nodeData = pageData.domMapper.getInfoForNode node
# Copy info about the node
info = {}
for k,v of nodeData
info[k] = v
# Correct the chatacter offsets with that of the page
info.start += pageData.start
info.end += pageData.start
info.pageIndex = pageData.index
info
# Look up the start offset of a give DOM node, uniting page and node info
getStartPosForNode: (node) ->
pageData = @getPageForNode node
nodeStart = pageData.domMapper.getStartPosForNode node
pageData.start + nodeStart
# Look up the end offset of a give DOM node, uniting page and node info
getEndPosForNode: (node) ->
pageData = @getPageForNode node
nodeEnd = pageData.domMapper.getEndPosForNode node
pageData.start + nodeEnd
# Return some data about a given character range
getMappingsForCharRange: (start, end, pages) ->
#console.log "Get mappings for char range [" + start + "; " + end + "], for pages " + pages + "."
# Check out which pages are these on
startIndex = @getPageIndexForPos start
endIndex = @getPageIndexForPos end
#console.log "These are on pages [" + startIndex + ".." + endIndex + "]."
# Function to get the relevant section inside a given page
getSection = (index) =>
info = @pageInfo[index]
# Calculate in-page offsets
realStart = (Math.max info.start, start) - info.start
realEnd = (Math.min info.end, end) - info.start
# Get the range inside the page
mappings = info.domMapper.getMappingsForCharRange realStart, realEnd
mappings.sections[0]
# Get the section for all involved pages
sections = {}
for index in pages ? [startIndex..endIndex]
sections[index] = getSection index
# Return the data
sections: sections
getCorpus: ->
unless @_corpus
throw new Error "Hey! Called getCorpus() before corpus defined!"
@_corpus
getContextForCharRange: (start, end) ->
prefixStart = Math.max 0, start - @CONTEXT_LEN
prefixLen = start - prefixStart
prefix = @_corpus.substr prefixStart, prefixLen
suffix = @_corpus.substr end, @CONTEXT_LEN
[prefix.trim(), suffix.trim()]
# Call this in scan, when you have the page contents
_onHavePageContents: ->
# Join all the text together
@_corpus = (info.content for info in @pageInfo).join " "
# Go over the pages, and calculate some basic info
pos = 0
@pageInfo.forEach (info, i) =>
info.len = info.content.length
info.start = pos
info.end = (pos += info.len + 1)
# Call this in scan, after resolving the promise
_onAfterScan: ->
# Go over the pages again, and map the rendered ones
@pageInfo.forEach (info, i) =>
if @_isPageRendered i
@_mapPage info
# Naive text matcher
class window.DTM_ExactMatcher
constructor: ->
@distinct = true
@caseSensitive = false
setDistinct: (value) -> @distinct = value
setCaseSensitive: (value) -> @caseSensitive = value
search: (text, pattern) ->
# console.log "Searching for '" + pattern + "' in '" + text + "'."
pLen = pattern.length
results = []
index = 0
unless @caseSensitive
text = text.toLowerCase()
pattern = pattern.toLowerCase()
while (i = text.indexOf pattern) > -1
do =>
# console.log "Found '" + pattern + "' @ " + i +
# " (=" + (index + i) + ")"
results.push
start: index + i
end: index + i + pLen
if @distinct
text = text.substr i + pLen
index += i + pLen
else
text = text.substr i + 1
index += i + 1
results
class window.DTM_RegexMatcher
constructor: ->
@caseSensitive = false
setCaseSensitive: (value) -> @caseSensitive = value
search: (text, pattern) ->
re = new RegExp pattern, if @caseSensitive then "g" else "gi"
{ start: m.index, end: m.index + m[0].length } while m = re.exec text
# diff-match-patch - based text matcher
class window.DTM_DMPMatcher
constructor: ->
@dmp = new diff_match_patch
@dmp.Diff_Timeout = 0
@caseSensitive = false
_reverse: (text) -> text.split("").reverse().join ""
# Use this to get the max allowed pattern length.
# Trying to use a longer pattern will give an error.
getMaxPatternLength: -> @dmp.Match_MaxBits
# The following example is a classic dilemma.
# There are two potential matches, one is close to the expected location
# but contains a one character error, the other is far from the expected
# location but is exactly the pattern sought after:
#
# match_main("abc12345678901234567890abbc", "abc", 26)
#
# Which result is returned (0 or 24) is determined by the
# MatchDistance property.
#
# An exact letter match which is 'distance' characters away
# from the fuzzy location would score as a complete mismatch.
# For example, a distance of '0' requires the match be at the exact
# location specified, whereas a threshold of '1000' would require
# a perfect match to be within 800 characters of the expected location
# to be found using a 0.8 threshold (see below).
#
# The larger MatchDistance is, the slower search may take to compute.
#
# This variable defaults to 1000.
setMatchDistance: (distance) -> @dmp.Match_Distance = distance
getMatchDistance: -> @dmp.Match_Distance
# MatchThreshold determines the cut-off value for a valid match.
#
# If Match_Threshold is closer to 0, the requirements for accuracy
# increase. If Match_Threshold is closer to 1 then it is more likely
# that a match will be found. The larger Match_Threshold is, the slower
# search may take to compute.
#
# This variable defaults to 0.5.
setMatchThreshold: (threshold) -> @dmp.Match_Threshold = threshold
getMatchThreshold: -> @dmp.Match_Threshold
getCaseSensitive: -> caseSensitive
setCaseSensitive: (value) -> @caseSensitive = value
# Given a text to search, a pattern to search for and an
# expected location in the text near which to find the pattern,
# return the location which matches closest.
#
# The function will search for the best match based on both the number
# of character errors between the pattern and the potential match,
# as well as the distance between the expected location and the
# potential match.
#
# If no match is found, the function returns null.
search: (text, pattern, expectedStartLoc = 0, options = {}) ->
# console.log "In dtm search. text: '" + text + "', pattern: '" + pattern +
# "', expectedStartLoc: " + expectedStartLoc + ", options:"
# console.log options
if expectedStartLoc < 0
throw new Error "Can't search at negative indices!"
if expectedStartLoc isnt Math.floor expectedStartLoc
throw new Error "Expected start location must be an integer."
unless @caseSensitive
text = text.toLowerCase()
pattern = pattern.toLowerCase()
pLen = pattern.length
maxLen = @getMaxPatternLength()
if pLen <= maxLen
result = @searchForSlice text, pattern, expectedStartLoc
else
startSlice = pattern.substr 0, maxLen
startPos = @searchForSlice text, startSlice, expectedStartLoc
if startPos?
startLen = startPos.end - startPos.start
endSlice = pattern.substr pLen - maxLen, maxLen
endLoc = startPos.start + pLen - maxLen
endPos = @searchForSlice text, endSlice, endLoc
if endPos?
endLen = endPos.end - endPos.start
matchLen = endPos.end - startPos.start
startIndex = startPos.start
endIndex = endPos.end
if pLen*0.5 <= matchLen <= pLen*1.5
result =
start: startIndex
end: endPos.end
# data:
# startError: startPos.data.error
# endError: endPos.data.error
# uncheckedMidSection: Math.max 0, matchLen - startLen - endLen
# lengthError: matchLen - pLen
# else
# console.log "Sorry, matchLen (" + matchLen + ") is not between " +
# 0.5*pLen + " and " + 1.5*pLen
# else
# console.log "endSlice ('" + endSlice + "') not found"
# else
# console.log "startSlice ('" + startSlice + "') not found"
unless result? then return []
if options.withLevenhstein or options.withDiff
found = text.substr result.start, result.end - result.start
result.diff = @dmp.diff_main pattern, found
if options.withLevenshstein
result.lev = @dmp.diff_levenshtein result.diff
if options.withDiff
@dmp.diff_cleanupSemantic result.diff
result.diffHTML = @dmp.diff_prettyHtml result.diff
[result]
# Compare two string slices, get Levenhstein and visual diff
compare: (text1, text2) ->
unless (text1? and text2?)
throw new Error "Can not compare non-existing strings!"
result = {}
result.diff = @dmp.diff_main text1, text2
result.lev = @dmp.diff_levenshtein result.diff
result.errorLevel = result.lev / text1.length
@dmp.diff_cleanupSemantic result.diff
result.diffHTML = @dmp.diff_prettyHtml result.diff
result
# ============= Private part ==========================================
# You don't need to call the functions below this point manually
searchForSlice: (text, slice, expectedStartLoc) ->
# console.log "searchForSlice: '" + text + "', '" + slice + "', " +
# expectedStartLoc
r1 = @dmp.match_main text, slice, expectedStartLoc
startIndex = r1.index
if startIndex is -1 then return null
txet = @_reverse text
nrettap = @_reverse slice
expectedEndLoc = startIndex + slice.length
expectedDneLoc = text.length - expectedEndLoc
r2 = @dmp.match_main txet, nrettap, expectedDneLoc
dneIndex = r2.index
endIndex = text.length - dneIndex
result =
start: startIndex
end: endIndex
// Generated by CoffeeScript 1.7.1
(function() {
window.DTM_ExactMatcher = (function() {
function DTM_ExactMatcher() {
this.distinct = true;
this.caseSensitive = false;
}
DTM_ExactMatcher.prototype.setDistinct = function(value) {
return this.distinct = value;
};
DTM_ExactMatcher.prototype.setCaseSensitive = function(value) {
return this.caseSensitive = value;
};
DTM_ExactMatcher.prototype.search = function(text, pattern) {
var i, index, pLen, results;
pLen = pattern.length;
results = [];
index = 0;
if (!this.caseSensitive) {
text = text.toLowerCase();
pattern = pattern.toLowerCase();
}
while ((i = text.indexOf(pattern)) > -1) {
(function(_this) {
return (function() {
results.push({
start: index + i,
end: index + i + pLen
});
if (_this.distinct) {
text = text.substr(i + pLen);
return index += i + pLen;
} else {
text = text.substr(i + 1);
return index += i + 1;
}
});
})(this)();
}
return results;
};
return DTM_ExactMatcher;
})();
window.DTM_RegexMatcher = (function() {
function DTM_RegexMatcher() {
this.caseSensitive = false;
}
DTM_RegexMatcher.prototype.setCaseSensitive = function(value) {
return this.caseSensitive = value;
};
DTM_RegexMatcher.prototype.search = function(text, pattern) {
var m, re, _results;
re = new RegExp(pattern, this.caseSensitive ? "g" : "gi");
_results = [];
while (m = re.exec(text)) {
_results.push({
start: m.index,
end: m.index + m[0].length
});
}
return _results;
};
return DTM_RegexMatcher;
})();
window.DTM_DMPMatcher = (function() {
function DTM_DMPMatcher() {
this.dmp = new diff_match_patch;
this.dmp.Diff_Timeout = 0;
this.caseSensitive = false;
}
DTM_DMPMatcher.prototype._reverse = function(text) {
return text.split("").reverse().join("");
};
DTM_DMPMatcher.prototype.getMaxPatternLength = function() {
return this.dmp.Match_MaxBits;
};
DTM_DMPMatcher.prototype.setMatchDistance = function(distance) {
return this.dmp.Match_Distance = distance;
};
DTM_DMPMatcher.prototype.getMatchDistance = function() {
return this.dmp.Match_Distance;
};
DTM_DMPMatcher.prototype.setMatchThreshold = function(threshold) {
return this.dmp.Match_Threshold = threshold;
};
DTM_DMPMatcher.prototype.getMatchThreshold = function() {
return this.dmp.Match_Threshold;
};
DTM_DMPMatcher.prototype.getCaseSensitive = function() {
return caseSensitive;
};
DTM_DMPMatcher.prototype.setCaseSensitive = function(value) {
return this.caseSensitive = value;
};
DTM_DMPMatcher.prototype.search = function(text, pattern, expectedStartLoc, options) {
var endIndex, endLen, endLoc, endPos, endSlice, found, matchLen, maxLen, pLen, result, startIndex, startLen, startPos, startSlice;
if (expectedStartLoc == null) {
expectedStartLoc = 0;
}
if (options == null) {
options = {};
}
if (expectedStartLoc < 0) {
throw new Error("Can't search at negative indices!");
}
if (expectedStartLoc !== Math.floor(expectedStartLoc)) {
throw new Error("Expected start location must be an integer.");
}
if (!this.caseSensitive) {
text = text.toLowerCase();
pattern = pattern.toLowerCase();
}
pLen = pattern.length;
maxLen = this.getMaxPatternLength();
if (pLen <= maxLen) {
result = this.searchForSlice(text, pattern, expectedStartLoc);
} else {
startSlice = pattern.substr(0, maxLen);
startPos = this.searchForSlice(text, startSlice, expectedStartLoc);
if (startPos != null) {
startLen = startPos.end - startPos.start;
endSlice = pattern.substr(pLen - maxLen, maxLen);
endLoc = startPos.start + pLen - maxLen;
endPos = this.searchForSlice(text, endSlice, endLoc);
if (endPos != null) {
endLen = endPos.end - endPos.start;
matchLen = endPos.end - startPos.start;
startIndex = startPos.start;
endIndex = endPos.end;
if ((pLen * 0.5 <= matchLen && matchLen <= pLen * 1.5)) {
result = {
start: startIndex,
end: endPos.end
};
}
}
}
}
if (result == null) {
return [];
}
if (options.withLevenhstein || options.withDiff) {
found = text.substr(result.start, result.end - result.start);
result.diff = this.dmp.diff_main(pattern, found);
if (options.withLevenshstein) {
result.lev = this.dmp.diff_levenshtein(result.diff);
}
if (options.withDiff) {
this.dmp.diff_cleanupSemantic(result.diff);
result.diffHTML = this.dmp.diff_prettyHtml(result.diff);
}
}
return [result];
};
DTM_DMPMatcher.prototype.compare = function(text1, text2) {
var result;
if (!((text1 != null) && (text2 != null))) {
throw new Error("Can not compare non-existing strings!");
}
result = {};
result.diff = this.dmp.diff_main(text1, text2);
result.lev = this.dmp.diff_levenshtein(result.diff);
result.errorLevel = result.lev / text1.length;
this.dmp.diff_cleanupSemantic(result.diff);
result.diffHTML = this.dmp.diff_prettyHtml(result.diff);
return result;
};
DTM_DMPMatcher.prototype.searchForSlice = function(text, slice, expectedStartLoc) {
var dneIndex, endIndex, expectedDneLoc, expectedEndLoc, nrettap, r1, r2, result, startIndex, txet;
r1 = this.dmp.match_main(text, slice, expectedStartLoc);
startIndex = r1.index;
if (startIndex === -1) {
return null;
}
txet = this._reverse(text);
nrettap = this._reverse(slice);
expectedEndLoc = startIndex + slice.length;
expectedDneLoc = text.length - expectedEndLoc;
r2 = this.dmp.match_main(txet, nrettap, expectedDneLoc);
dneIndex = r2.index;
endIndex = text.length - dneIndex;
return result = {
start: startIndex,
end: endIndex
};
};
return DTM_DMPMatcher;
})();
}).call(this);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment