Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
coopwire-hypothesis
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
孙灵跃 Leon Sun
coopwire-hypothesis
Commits
94465e93
Commit
94465e93
authored
Sep 02, 2014
by
RawKStar77
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Convert coffee files in scripts/vendor to js, modify scripts/vendor/.gitignore
parent
c55570c6
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1296 additions
and
1436 deletions
+1296
-1436
.gitignore
h/static/scripts/vendor/.gitignore
+0
-2
dom_text_mapper.coffee
h/static/scripts/vendor/dom_text_mapper.coffee
+0
-786
dom_text_mapper.js
h/static/scripts/vendor/dom_text_mapper.js
+834
-0
dom_text_matcher.coffee
h/static/scripts/vendor/dom_text_matcher.coffee
+0
-282
dom_text_matcher.js
h/static/scripts/vendor/dom_text_matcher.js
+248
-0
page_text_mapper_core.coffee
h/static/scripts/vendor/page_text_mapper_core.coffee
+0
-165
text_match_engines.coffee
h/static/scripts/vendor/text_match_engines.coffee
+0
-201
text_match_engines.js
h/static/scripts/vendor/text_match_engines.js
+214
-0
No files found.
h/static/scripts/vendor/.gitignore
View file @
94465e93
*.min.js
*.min.js
dom_text.js
page_text_mapper_core.js
h/static/scripts/vendor/dom_text_mapper.coffee
deleted
100644 → 0
View file @
c55570c6
class
window
.
DomTextMapper
@
applicable
:
->
true
USE_TABLE_TEXT_WORKAROUND
=
true
USE_EMPTY_TEXT_WORKAROUND
=
true
SELECT_CHILDREN_INSTEAD
=
[
"thead"
,
"tbody"
,
"tfoot"
,
"ol"
,
"a"
,
"caption"
,
"p"
,
"span"
,
"div"
,
"h1"
,
"h2"
,
"h3"
,
"h4"
,
"h5"
,
"h6"
,
"ul"
,
"li"
,
"form"
]
CONTEXT_LEN
=
32
@
instances
:
0
constructor
:
(
@
id
)
->
@
setRealRoot
()
DomTextMapper
.
instances
+=
1
@
id
?=
"d-t-m #"
+
DomTextMapper
.
instances
log
:
(
msg
...)
->
console
.
log
@
id
,
": "
,
msg
...
# ===== Public methods =======
# Change handler
_onChange
:
(
event
)
=>
# @log "received change event", event
# @log "source", event.target
# @log "reason", event.reason ? "no reason"
# @log "data", event.data
@
documentChanged
()
@
performUpdateOnNode
event
.
target
,
false
,
event
.
data
@
lastScanned
=
@
timestamp
()
# Change the root node, and subscribe to the events
_changeRootNode
:
(
node
)
->
@
rootNode
?
.
removeEventListener
"domChange"
,
@
_onChange
@
rootNode
=
node
@
rootNode
.
addEventListener
"domChange"
,
@
_onChange
node
# Consider only the sub-tree beginning with the given node.
#
# This will be the root node to use for all operations.
setRootNode
:
(
rootNode
)
->
@
rootWin
=
window
@
pathStartNode
=
@
_changeRootNode
rootNode
# Consider only the sub-tree beginning with the node whose ID was given.
#
# This will be the root node to use for all operations.
setRootId
:
(
rootId
)
->
@
setRootNode
document
.
getElementById
rootId
# Use this iframe for operations.
#
# Call this when mapping content in an iframe.
setRootIframe
:
(
iframeId
)
->
iframe
=
window
.
document
.
getElementById
iframeId
unless
iframe
?
throw
new
Error
"Can't find iframe with specified ID!"
@
rootWin
=
iframe
.
contentWindow
unless
@
rootWin
?
throw
new
Error
"Can't access contents of the specified iframe!"
@
_changeRootNode
@
rootWin
.
document
@
pathStartNode
=
@
getBody
()
# Return the default path
getDefaultPath
:
->
@
getPathTo
@
pathStartNode
# Work with the whole DOM tree
#
# (This is the default; you only need to call this, if you have configured
# a different root earlier, and now you want to restore the default setting.)
setRealRoot
:
->
@
rootWin
=
window
@
_changeRootNode
document
@
pathStartNode
=
@
getBody
()
# Notify the library that the document has changed.
# This means that subsequent calls can not safely re-use previously cached
# data structures, so some calculations will be necessary again.
#
# The usage of this feature is not mandatorry; if not receiving change
# notifications, the library will just assume that the document can change
# anythime, and therefore will not assume any stability.
documentChanged
:
->
@
lastDOMChange
=
@
timestamp
()
# @log "Registered document change."
setExpectedContent
:
(
content
)
->
@
expectedContent
=
content
# Scan the document
#
# Traverses the DOM, collects various information, and
# creates mappings between the string indices
# (as appearing in the rendered text) and the DOM elements.
#
# An map is returned, where the keys are the paths, and the
# values are objects with info about those parts of the DOM.
# path: the valid path value
# node: reference to the DOM node
# content: the text content of the node, as rendered by the browser
# length: the length of the next content
scan
:
->
if
@
domStableSince
@
lastScanned
# @log "We have a valid DOM structure cache."
return
else
# @log "Last scan time: " + @lastScanned
# @log "Last DOM change: " + @lastDOMChange
# @log "No valid DOM structure scan available, doing scan."
unless
@
pathStartNode
.
ownerDocument
.
body
.
contains
@
pathStartNode
# We cannot map nodes that are not attached.
# @log "This is not attached to dom. Exiting."
return
# @log "No valid cache, will have to do a scan."
startTime
=
@
timestamp
()
@
saveSelection
()
@
path
=
{}
@
traverseSubTree
@
pathStartNode
,
@
getDefaultPath
()
t1
=
@
timestamp
()
# @log "Phase I (Path traversal) took " + (t1 - startTime) + " ms."
path
=
@
getPathTo
@
pathStartNode
node
=
@
path
[
path
].
node
@
collectPositions
node
,
path
,
null
,
0
,
0
@
restoreSelection
()
@
lastScanned
=
@
timestamp
()
@
_corpus
=
@
path
[
path
].
content
# @log "Corpus is: " + @_corpus
t2
=
@
timestamp
()
# @log "Phase II (offset calculation) took " + (t2 - t1) + " ms."
null
# Select the given path (for visual identification),
# and optionally scroll to it
selectPath
:
(
path
,
scroll
=
false
)
->
info
=
@
path
[
path
]
unless
info
?
then
throw
new
Error
"I have no info about a node at "
+
path
node
=
info
?
.
node
node
or=
@
lookUpNode
info
.
path
@
selectNode
node
,
scroll
performUpdateOnNode
:
(
node
,
escalating
=
false
)
->
unless
node
?
then
throw
new
Error
"Called performUpdate with a null node!"
unless
@
path
?
then
return
#We don't have data yet. Not updating.
startTime
=
@
timestamp
()
unless
escalating
then
@
saveSelection
()
path
=
@
getPathTo
node
pathInfo
=
@
path
[
path
]
unless
pathInfo
?
@
performUpdateOnNode
node
.
parentNode
,
true
unless
escalating
then
@
restoreSelection
()
return
# @log "Performing update on node @ path " + path
# if escalating then @log "(Escalated)"
# @log "Updating data about " + path + ": "
if
pathInfo
.
node
is
node
and
pathInfo
.
content
is
@
getNodeContent
node
,
false
# @log "Good, the node and the overall content is still the same"
# @log "Dropping obsolete path info for children..."
prefix
=
path
+
"/"
pathsToDrop
=
p
# FIXME: There must be a more elegant way to do this.
pathsToDrop
=
[]
for
p
,
data
of
@
path
when
@
stringStartsWith
p
,
prefix
pathsToDrop
.
push
p
for
p
in
pathsToDrop
delete
@
path
[
p
]
# @log "Done. Collecting new path info..."
@
traverseSubTree
node
,
path
# @log "Done. Updating mappings..."
if
pathInfo
.
node
is
@
pathStartNode
# @log "Ended up rescanning the whole doc."
@
collectPositions
node
,
path
,
null
,
0
,
0
else
parentPath
=
@
parentPath
path
parentPathInfo
=
@
path
[
parentPath
]
unless
parentPathInfo
?
throw
new
Error
"While performing update on node "
+
path
+
", no path info found for parent path: "
+
parentPath
oldIndex
=
if
node
is
node
.
parentNode
.
firstChild
0
else
@
path
[
@
getPathTo
node
.
previousSibling
].
end
-
parentPathInfo
.
start
@
collectPositions
node
,
path
,
parentPathInfo
.
content
,
parentPathInfo
.
start
,
oldIndex
# @log "Data update took " + (@timestamp() - startTime) + " ms."
else
# @log "Hm..node has been replaced, or overall content has changed!"
if
pathInfo
.
node
isnt
@
pathStartNode
# @log "I guess I must go up one level."
parentNode
=
if
node
.
parentNode
?
# @log "Node has parent, using that."
node
.
parentNode
else
parentPath
=
@
parentPath
path
# @log "Node has no parent, will look up " + parentPath
@
lookUpNode
parentPath
@
performUpdateOnNode
parentNode
,
true
else
throw
new
Error
"Can not keep up with the changes,
since even the node configured as path start node was replaced."
unless
escalating
then
@
restoreSelection
()
# Return info for a given path in the DOM
getInfoForPath
:
(
path
)
->
unless
@
path
?
throw
new
Error
"Can't get info before running a scan() !"
result
=
@
path
[
path
]
unless
result
?
throw
new
Error
"Found no info for path '"
+
path
+
"'!"
result
# Return the offset of the start of given path in the DOM
getStartPosForPath
:
(
path
)
->
info
=
@
getInfoForPath
path
info
.
start
?
@
getFirstPosAfter
info
.
node
getFirstPosAfter
:
(
node
)
->
if
node
.
nextSibling
?
# Do we have a next sibling?
# Check the sibling
node
=
node
.
nextSibling
path
=
@
getPathTo
node
info
=
@
path
[
path
]
info
.
start
?
@
getFirstPosAfter
node
else
# Nothing to see on this level. Move up in the tree.
@
getFirstPosAfter
node
.
parentNode
# Return the offset of the start of given path in the DOM
getEndPosForPath
:
(
path
)
->
info
=
@
getInfoForPath
path
info
.
end
?
@
getFirstPosBefore
info
.
node
getFirstPosBefore
:
(
node
)
->
if
node
.
previousSibling
?
# Do we have a previous sibling?
# Check the sibling
node
=
node
.
previousSibling
path
=
@
getPathTo
node
info
=
@
path
[
path
]
info
.
end
?
@
getFirstPosBefore
node
else
# Nothing to see on this level. Move up in the tree.
@
getFirstPosBefore
node
.
parentNode
# Return info for a given node in the DOM
getInfoForNode
:
(
node
)
->
unless
node
?
throw
new
Error
"Called getInfoForNode(node) with null node!"
@
getInfoForPath
@
getPathTo
node
# Return the offset of the start of given node in the DOM
getStartPosForNode
:
(
node
)
->
unless
node
?
throw
new
Error
"Called getStartInfoForNode(node) with null node!"
@
getStartPosForPath
@
getPathTo
node
# Return the offset of the end of a given node in the DOM
getEndPosForNode
:
(
node
)
->
unless
node
?
throw
new
Error
"Called getInfoForNode(node) with null node!"
@
getEndPosForPath
@
getPathTo
node
# Get the matching DOM elements for a given set of charRanges
# (Calles getMappingsForCharRange for each element in the given ist)
getMappingsForCharRanges
:
(
charRanges
)
->
(
@
getMappingsForCharRange
charRange
.
start
,
charRange
.
end
)
for
charRange
in
charRanges
# Return the rendered value of a part of the dom.
# If path is not given, the default path is used.
getContentForPath
:
(
path
=
null
)
->
path
?=
@
getDefaultPath
()
@
path
[
path
].
content
# Return the length of the rendered value of a part of the dom.
# If path is not given, the default path is used.
getLengthForPath
:
(
path
=
null
)
->
path
?=
@
getDefaultPath
()
@
path
[
path
].
length
getDocLength
:
->
@
_corpus
.
length
getCorpus
:
->
@
_corpus
# Get the context that encompasses the given charRange
# in the rendered text of the document
getContextForCharRange
:
(
start
,
end
)
->
if
start
<
0
throw
Error
"Negative range start is invalid!"
if
end
>
@
_corpus
.
length
throw
Error
"Range end is after the end of corpus!"
prefixStart
=
Math
.
max
0
,
start
-
CONTEXT_LEN
prefix
=
@
_corpus
[
prefixStart
...
start
]
suffix
=
@
_corpus
[
end
...
end
+
CONTEXT_LEN
]
[
prefix
.
trim
(),
suffix
.
trim
()]
# Get the matching DOM elements for a given charRange
#
# If the "path" argument is supplied, scan is called automatically.
# (Except if the supplied path is the same as the last scanned path.)
getMappingsForCharRange
:
(
start
,
end
)
->
unless
(
start
?
and
end
?
)
throw
new
Error
"start and end is required!"
# @log "Collecting nodes for [" + start + ":" + end + "]"
@
scan
()
# Collect the matching path infos
# @log "Collecting mappings"
mappings
=
[]
for
p
,
info
of
@
path
when
info
.
atomic
and
@
regions_overlap
info
.
start
,
info
.
end
,
start
,
end
do
(
info
)
=>
# @log "Checking " + info.path
# @log info
mapping
=
element
:
info
full
=
start
<=
info
.
start
and
info
.
end
<=
end
if
full
mapping
.
full
=
true
mapping
.
wanted
=
info
.
content
mapping
.
yields
=
info
.
content
mapping
.
startCorrected
=
0
mapping
.
endCorrected
=
0
else
if
info
.
node
.
nodeType
is
Node
.
TEXT_NODE
if
start
<=
info
.
start
mapping
.
end
=
end
-
info
.
start
mapping
.
wanted
=
info
.
content
.
substr
0
,
mapping
.
end
else
if
info
.
end
<=
end
mapping
.
start
=
start
-
info
.
start
mapping
.
wanted
=
info
.
content
.
substr
mapping
.
start
else
mapping
.
start
=
start
-
info
.
start
mapping
.
end
=
end
-
info
.
start
mapping
.
wanted
=
info
.
content
.
substr
mapping
.
start
,
mapping
.
end
-
mapping
.
start
@
computeSourcePositions
mapping
mapping
.
yields
=
info
.
node
.
data
.
substr
mapping
.
startCorrected
,
mapping
.
endCorrected
-
mapping
.
startCorrected
else
if
(
info
.
node
.
nodeType
is
Node
.
ELEMENT_NODE
)
and
(
info
.
node
.
tagName
.
toLowerCase
()
is
"img"
)
@
log
"Can not select a sub-string from the title of an image.
Selecting all."
mapping
.
full
=
true
mapping
.
wanted
=
info
.
content
else
@
log
"Warning: no idea how to handle partial mappings
for node type "
+
info
.
node
.
nodeType
if
info
.
node
.
tagName
?
then
@
log
"Tag: "
+
info
.
node
.
tagName
@
log
"Selecting all."
mapping
.
full
=
true
mapping
.
wanted
=
info
.
content
mappings
.
push
mapping
# @log "Done with " + info.path
if
mappings
.
length
is
0
@
log
"Collecting nodes for ["
+
start
+
":"
+
end
+
"]"
@
log
"Should be: '"
+
@
_corpus
[
start
...
end
]
+
"'."
throw
new
Error
"No mappings found for ["
+
start
+
":"
+
end
+
"]!"
mappings
=
mappings
.
sort
(
a
,
b
)
->
a
.
element
.
start
-
b
.
element
.
start
# Create a DOM range object
# @log "Building range..."
r
=
@
rootWin
.
document
.
createRange
()
startMapping
=
mappings
[
0
]
startNode
=
startMapping
.
element
.
node
startPath
=
startMapping
.
element
.
path
startOffset
=
startMapping
.
startCorrected
if
startMapping
.
full
r
.
setStartBefore
startNode
startInfo
=
startPath
else
r
.
setStart
startNode
,
startOffset
startInfo
=
startPath
+
":"
+
startOffset
endMapping
=
mappings
[
mappings
.
length
-
1
]
endNode
=
endMapping
.
element
.
node
endPath
=
endMapping
.
element
.
path
endOffset
=
endMapping
.
endCorrected
if
endMapping
.
full
r
.
setEndAfter
endNode
endInfo
=
endPath
else
r
.
setEnd
endNode
,
endOffset
endInfo
=
endPath
+
":"
+
endOffset
result
=
{
mappings
:
mappings
realRange
:
r
rangeInfo
:
startPath
:
startPath
startOffset
:
startOffset
startInfo
:
startInfo
endPath
:
endPath
endOffset
:
endOffset
endInfo
:
endInfo
safeParent
:
r
.
commonAncestorContainer
}
# Return the result
sections
:
[
result
]
# ===== Private methods (never call from outside the module) =======
timestamp
:
->
new
Date
().
getTime
()
stringStartsWith
:
(
string
,
prefix
)
->
unless
prefix
throw
Error
"Requires a non-empty prefix!"
string
[
0
...
prefix
.
length
]
is
prefix
stringEndsWith
:
(
string
,
suffix
)
->
unless
suffix
throw
Error
"Requires a non-empty suffix!"
string
[
string
.
length
-
suffix
.
length
...
string
.
length
]
is
suffix
parentPath
:
(
path
)
->
path
.
substr
0
,
path
.
lastIndexOf
"/"
domChangedSince
:
(
timestamp
)
->
if
@
lastDOMChange
?
and
timestamp
?
@
lastDOMChange
>
timestamp
else
true
domStableSince
:
(
timestamp
)
->
not
@
domChangedSince
timestamp
getProperNodeName
:
(
node
)
->
nodeName
=
node
.
nodeName
switch
nodeName
when
"#text"
then
return
"text()"
when
"#comment"
then
return
"comment()"
when
"#cdata-section"
then
return
"cdata-section()"
else
return
nodeName
getNodePosition
:
(
node
)
->
pos
=
0
tmp
=
node
while
tmp
if
tmp
.
nodeName
is
node
.
nodeName
pos
++
tmp
=
tmp
.
previousSibling
pos
getPathSegment
:
(
node
)
->
name
=
@
getProperNodeName
node
pos
=
@
getNodePosition
node
name
+
(
if
pos
>
1
then
"[
#{
pos
}
]"
else
""
)
getPathTo
:
(
node
)
->
xpath
=
''
;
while
node
!=
@
rootNode
unless
node
?
throw
new
Error
"Called getPathTo on a node which was not a descendant of @rootNode. "
+
@
rootNode
xpath
=
(
@
getPathSegment
node
)
+
'/'
+
xpath
node
=
node
.
parentNode
xpath
=
(
if
@
rootNode
.
ownerDocument
?
then
'./'
else
'/'
)
+
xpath
xpath
=
xpath
.
replace
/\/$/
,
''
xpath
# This method is called recursively, to traverse a given sub-tree of the DOM.
traverseSubTree
:
(
node
,
path
,
invisible
=
false
,
verbose
=
false
)
->
# Step one: get rendered node content, and store path info,
# if there is valuable content
@
underTraverse
=
path
cont
=
@
getNodeContent
node
,
false
@
path
[
path
]
=
path
:
path
content
:
cont
length
:
cont
.
length
node
:
node
if
cont
.
length
if
verbose
then
@
log
"Collected info about path "
+
path
if
invisible
@
log
"Something seems to be wrong. I see visible content @ "
+
path
+
", while some of the ancestor nodes reported empty contents.
Probably a new selection API bug...."
@
log
"Anyway, text is '"
+
cont
+
"'."
else
if
verbose
then
@
log
"Found no content at path "
+
path
invisible
=
true
# Step two: cover all children.
# Q: should we check children even if
# the given node had no rendered content?
# A: I seem to remember that the answer is yes, but I don't remember why.
if
node
.
hasChildNodes
()
for
child
in
node
.
childNodes
subpath
=
path
+
'/'
+
(
@
getPathSegment
child
)
@
traverseSubTree
child
,
subpath
,
invisible
,
verbose
null
getBody
:
->
(
@
rootWin
.
document
.
getElementsByTagName
"body"
)[
0
]
regions_overlap
:
(
start1
,
end1
,
start2
,
end2
)
->
start1
<
end2
and
start2
<
end1
lookUpNode
:
(
path
)
->
doc
=
@
rootNode
.
ownerDocument
?
@
rootNode
results
=
doc
.
evaluate
path
,
@
rootNode
,
null
,
0
,
null
node
=
results
.
iterateNext
()
# save the original selection
saveSelection
:
->
if
@
savedSelection
?
@
log
"Selection saved at:"
@
log
@
selectionSaved
throw
new
Error
"Selection already saved!"
sel
=
@
rootWin
.
getSelection
()
# @log "Saving selection: " + sel.rangeCount + " ranges."
@
savedSelection
=
((
sel
.
getRangeAt
i
)
for
i
in
[
0
...
sel
.
rangeCount
])
@
selectionSaved
=
(
new
Error
"selection was saved here"
).
stack
# restore selection
restoreSelection
:
->
# @log "Restoring selection: " + @savedSelection.length + " ranges."
unless
@
savedSelection
?
then
throw
new
Error
"No selection to restore."
sel
=
@
rootWin
.
getSelection
()
sel
.
removeAllRanges
()
sel
.
addRange
range
for
range
in
@
savedSelection
delete
@
savedSelection
# Select the given node (for visual identification),
# and optionally scroll to it
selectNode
:
(
node
,
scroll
=
false
)
->
unless
node
?
throw
new
Error
"Called selectNode with null node!"
sel
=
@
rootWin
.
getSelection
()
# clear the selection
sel
.
removeAllRanges
()
# create our range, and select it
realRange
=
@
rootWin
.
document
.
createRange
()
# There is some weird, bogus behaviour in Chrome,
# triggered by whitespaces between the table tag and it's children.
# See the select-tbody and the select-the-parent-when-selecting problems
# described here:
# https://github.com/hypothesis/h/issues/280
# And the WebKit bug report here:
# https://bugs.webkit.org/show_bug.cgi?id=110595
#
# To work around this, when told to select specific nodes, we have to
# do various other things. See bellow.
if
node
.
nodeType
is
Node
.
ELEMENT_NODE
and
node
.
hasChildNodes
()
and
node
.
tagName
.
toLowerCase
()
in
SELECT_CHILDREN_INSTEAD
# This is an element where direct selection sometimes fails,
# because if the WebKit bug.
# (Sometimes it selects nothing, sometimes it selects something wrong.)
# So we select directly the children instead.
children
=
node
.
childNodes
realRange
.
setStartBefore
children
[
0
]
realRange
.
setEndAfter
children
[
children
.
length
-
1
]
sel
.
addRange
realRange
else
if
USE_TABLE_TEXT_WORKAROUND
and
node
.
nodeType
is
Node
.
TEXT_NODE
and
node
.
parentNode
.
tagName
.
toLowerCase
()
is
"table"
# This is a text element that should not even be here.
# Selecting it might select the whole table,
# so we don't select anything
else
# Normal element, should be selected
try
realRange
.
setStartBefore
node
realRange
.
setEndAfter
node
sel
.
addRange
realRange
catch
exception
# This might be caused by the fact that FF can't select a
# TextNode containing only whitespace.
# If this is the case, then it's OK.
unless
USE_EMPTY_TEXT_WORKAROUND
and
@
isWhitespace
node
# No, this is not the case. Then this is an error.
@
log
"Warning: failed to scan element @ "
+
@
underTraverse
@
log
"Content is: "
+
node
.
innerHTML
@
log
"We won't be able to properly anchor to any text inside this element."
# throw exception
if
scroll
sn
=
node
while
sn
?
and
not
sn
.
scrollIntoViewIfNeeded
?
sn
=
sn
.
parentNode
if
sn
?
sn
.
scrollIntoViewIfNeeded
()
else
@
log
"Failed to scroll to element. (Browser does not support scrollIntoViewIfNeeded?)"
sel
# Read and convert the text of the current selection.
readSelectionText
:
(
sel
)
->
sel
or=
@
rootWin
.
getSelection
()
sel
.
toString
().
trim
().
replace
(
/\n/g
,
" "
).
replace
/\s{2,}/g
,
" "
# Read the "text content" of a sub-tree of the DOM by
# creating a selection from it
getNodeSelectionText
:
(
node
,
shouldRestoreSelection
=
true
)
->
if
shouldRestoreSelection
then
@
saveSelection
()
sel
=
@
selectNode
node
text
=
@
readSelectionText
sel
if
shouldRestoreSelection
then
@
restoreSelection
()
text
# Convert "display" text indices to "source" text indices.
computeSourcePositions
:
(
match
)
->
# @log "In computeSourcePosition"
# @log match.element.path
# @log match.element.node.data
# the HTML source of the text inside a text element.
# @log "Calculating source position at " + match.element.path
sourceText
=
match
.
element
.
node
.
data
.
replace
/\n/g
,
" "
# @log "sourceText is '" + sourceText + "'"
# what gets displayed, when the node is processed by the browser.
displayText
=
match
.
element
.
content
# @log "displayText is '" + displayText + "'"
# The selected charRange in displayText.
displayStart
=
if
match
.
start
?
then
match
.
start
else
0
displayEnd
=
if
match
.
end
?
then
match
.
end
else
displayText
.
length
# @log "Display charRange is: " + displayStart + "-" + displayEnd
if
displayEnd
is
0
# Handle empty text nodes
match
.
startCorrected
=
0
match
.
endCorrected
=
0
return
sourceIndex
=
0
displayIndex
=
0
until
sourceStart
?
and
sourceEnd
?
sc
=
sourceText
[
sourceIndex
]
dc
=
displayText
[
displayIndex
]
if
sc
is
dc
if
displayIndex
is
displayStart
sourceStart
=
sourceIndex
displayIndex
++
if
displayIndex
is
displayEnd
sourceEnd
=
sourceIndex
+
1
sourceIndex
++
match
.
startCorrected
=
sourceStart
match
.
endCorrected
=
sourceEnd
# @log "computeSourcePosition done. Corrected charRange is: " +
# match.startCorrected + "-" + match.endCorrected
null
# Internal function used to read out the text content of a given node,
# as render by the browser.
# The current implementation uses the browser selection API to do so.
getNodeContent
:
(
node
,
shouldRestoreSelection
=
true
)
->
if
node
is
@
pathStartNode
and
@
expectedContent
?
# @log "Returning fake expectedContent for getNodeContent"
@
expectedContent
else
@
getNodeSelectionText
node
,
shouldRestoreSelection
# Internal function to collect mapping data from a given DOM element.
#
# Input parameters:
# node: the node to scan
# path: the path to the node (relative to rootNode
# parentContent: the content of the node's parent node
# (as rendered by the browser)
# This is used to determine whether the given node is rendered
# at all.
# If not given, it will be assumed that it is rendered
# parentIndex: the starting character offset
# of content of this node's parent node in the rendered content
# index: ths first character offset position in the content of this
# node's parent node
# where the content of this node might start
#
# Returns:
# the first character offset position in the content of this node's
# parent node that is not accounted for by this node
collectPositions
:
(
node
,
path
,
parentContent
=
null
,
parentIndex
=
0
,
index
=
0
)
->
# @log "Scanning path " + path
# content = @getNodeContent node, false
pathInfo
=
@
path
[
path
]
content
=
pathInfo
?
.
content
unless
content
# node has no content, not interesting
pathInfo
.
start
=
parentIndex
+
index
pathInfo
.
end
=
parentIndex
+
index
pathInfo
.
atomic
=
false
return
index
startIndex
=
if
parentContent
?
parentContent
.
indexOf
content
,
index
else
index
if
startIndex
is
-
1
# content of node is not present in parent's content - probably hidden,
# or something similar
@
log
"Content of this not is not present in content of parent, at path "
+
path
@
log
"(Content: '"
+
content
+
"'.)"
return
index
endIndex
=
startIndex
+
content
.
length
atomic
=
not
node
.
hasChildNodes
()
pathInfo
.
start
=
parentIndex
+
startIndex
pathInfo
.
end
=
parentIndex
+
endIndex
pathInfo
.
atomic
=
atomic
if
not
atomic
children
=
node
.
childNodes
i
=
0
pos
=
0
typeCount
=
Object
()
while
i
<
children
.
length
child
=
children
[
i
]
nodeName
=
@
getProperNodeName
child
oldCount
=
typeCount
[
nodeName
]
newCount
=
if
oldCount
?
then
oldCount
+
1
else
1
typeCount
[
nodeName
]
=
newCount
childPath
=
path
+
"/"
+
nodeName
+
(
if
newCount
>
1
"["
+
newCount
+
"]"
else
""
)
pos
=
@
collectPositions
child
,
childPath
,
content
,
parentIndex
+
startIndex
,
pos
i
++
endIndex
WHITESPACE
=
/^\s*$/
# Decides whether a given node is a text node that only contains whitespace
isWhitespace
:
(
node
)
->
result
=
switch
node
.
nodeType
when
Node
.
TEXT_NODE
WHITESPACE
.
test
node
.
data
when
Node
.
ELEMENT_NODE
mightBeEmpty
=
true
for
child
in
node
.
childNodes
mightBeEmpty
=
mightBeEmpty
and
@
isWhitespace
child
mightBeEmpty
else
false
result
# Internal debug method to verify the consistency of mapping info
_testMap
:
->
@
log
"Verifying map info: was it all properly traversed?"
for
i
,
p
of
@
path
unless
p
.
atomic
?
then
@
log
i
+
" is missing data."
@
log
"Verifying map info: do atomic elements match?"
for
i
,
p
of
@
path
when
p
.
atomic
expected
=
@
_corpus
[
p
.
start
...
p
.
end
]
ok
=
p
.
content
is
expected
unless
ok
then
@
log
"Mismatch on "
+
i
+
": content is '"
+
p
.
content
+
"', range in corpus is '"
+
expected
+
"'."
ok
null
# Fake two-phase / pagination support, used for HTML documents
getPageIndex
:
->
0
getPageCount
:
->
1
getPageIndexForPos
:
->
0
isPageMapped
:
->
true
h/static/scripts/vendor/dom_text_mapper.js
0 → 100644
View file @
94465e93
// Generated by CoffeeScript 1.7.1
(
function
()
{
var
__bind
=
function
(
fn
,
me
){
return
function
(){
return
fn
.
apply
(
me
,
arguments
);
};
},
__slice
=
[].
slice
,
__indexOf
=
[].
indexOf
||
function
(
item
)
{
for
(
var
i
=
0
,
l
=
this
.
length
;
i
<
l
;
i
++
)
{
if
(
i
in
this
&&
this
[
i
]
===
item
)
return
i
;
}
return
-
1
;
};
window
.
DomTextMapper
=
(
function
()
{
var
CONTEXT_LEN
,
SELECT_CHILDREN_INSTEAD
,
USE_EMPTY_TEXT_WORKAROUND
,
USE_TABLE_TEXT_WORKAROUND
,
WHITESPACE
;
DomTextMapper
.
applicable
=
function
()
{
return
true
;
};
USE_TABLE_TEXT_WORKAROUND
=
true
;
USE_EMPTY_TEXT_WORKAROUND
=
true
;
SELECT_CHILDREN_INSTEAD
=
[
"thead"
,
"tbody"
,
"tfoot"
,
"ol"
,
"a"
,
"caption"
,
"p"
,
"span"
,
"div"
,
"h1"
,
"h2"
,
"h3"
,
"h4"
,
"h5"
,
"h6"
,
"ul"
,
"li"
,
"form"
];
CONTEXT_LEN
=
32
;
DomTextMapper
.
instances
=
0
;
function
DomTextMapper
(
id
)
{
this
.
id
=
id
;
this
.
_onChange
=
__bind
(
this
.
_onChange
,
this
);
this
.
setRealRoot
();
DomTextMapper
.
instances
+=
1
;
if
(
this
.
id
==
null
)
{
this
.
id
=
"d-t-m #"
+
DomTextMapper
.
instances
;
}
}
DomTextMapper
.
prototype
.
log
=
function
()
{
var
msg
;
msg
=
1
<=
arguments
.
length
?
__slice
.
call
(
arguments
,
0
)
:
[];
return
console
.
log
.
apply
(
console
,
[
this
.
id
,
": "
].
concat
(
__slice
.
call
(
msg
)));
};
DomTextMapper
.
prototype
.
_onChange
=
function
(
event
)
{
this
.
documentChanged
();
this
.
performUpdateOnNode
(
event
.
target
,
false
,
event
.
data
);
return
this
.
lastScanned
=
this
.
timestamp
();
};
DomTextMapper
.
prototype
.
_changeRootNode
=
function
(
node
)
{
var
_ref
;
if
((
_ref
=
this
.
rootNode
)
!=
null
)
{
_ref
.
removeEventListener
(
"domChange"
,
this
.
_onChange
);
}
this
.
rootNode
=
node
;
this
.
rootNode
.
addEventListener
(
"domChange"
,
this
.
_onChange
);
return
node
;
};
DomTextMapper
.
prototype
.
setRootNode
=
function
(
rootNode
)
{
this
.
rootWin
=
window
;
return
this
.
pathStartNode
=
this
.
_changeRootNode
(
rootNode
);
};
DomTextMapper
.
prototype
.
setRootId
=
function
(
rootId
)
{
return
this
.
setRootNode
(
document
.
getElementById
(
rootId
));
};
DomTextMapper
.
prototype
.
setRootIframe
=
function
(
iframeId
)
{
var
iframe
;
iframe
=
window
.
document
.
getElementById
(
iframeId
);
if
(
iframe
==
null
)
{
throw
new
Error
(
"Can't find iframe with specified ID!"
);
}
this
.
rootWin
=
iframe
.
contentWindow
;
if
(
this
.
rootWin
==
null
)
{
throw
new
Error
(
"Can't access contents of the specified iframe!"
);
}
this
.
_changeRootNode
(
this
.
rootWin
.
document
);
return
this
.
pathStartNode
=
this
.
getBody
();
};
DomTextMapper
.
prototype
.
getDefaultPath
=
function
()
{
return
this
.
getPathTo
(
this
.
pathStartNode
);
};
DomTextMapper
.
prototype
.
setRealRoot
=
function
()
{
this
.
rootWin
=
window
;
this
.
_changeRootNode
(
document
);
return
this
.
pathStartNode
=
this
.
getBody
();
};
DomTextMapper
.
prototype
.
documentChanged
=
function
()
{
return
this
.
lastDOMChange
=
this
.
timestamp
();
};
DomTextMapper
.
prototype
.
setExpectedContent
=
function
(
content
)
{
return
this
.
expectedContent
=
content
;
};
DomTextMapper
.
prototype
.
scan
=
function
()
{
var
node
,
path
,
startTime
,
t1
,
t2
;
if
(
this
.
domStableSince
(
this
.
lastScanned
))
{
return
;
}
else
{
}
if
(
!
this
.
pathStartNode
.
ownerDocument
.
body
.
contains
(
this
.
pathStartNode
))
{
return
;
}
startTime
=
this
.
timestamp
();
this
.
saveSelection
();
this
.
path
=
{};
this
.
traverseSubTree
(
this
.
pathStartNode
,
this
.
getDefaultPath
());
t1
=
this
.
timestamp
();
path
=
this
.
getPathTo
(
this
.
pathStartNode
);
node
=
this
.
path
[
path
].
node
;
this
.
collectPositions
(
node
,
path
,
null
,
0
,
0
);
this
.
restoreSelection
();
this
.
lastScanned
=
this
.
timestamp
();
this
.
_corpus
=
this
.
path
[
path
].
content
;
t2
=
this
.
timestamp
();
return
null
;
};
DomTextMapper
.
prototype
.
selectPath
=
function
(
path
,
scroll
)
{
var
info
,
node
;
if
(
scroll
==
null
)
{
scroll
=
false
;
}
info
=
this
.
path
[
path
];
if
(
info
==
null
)
{
throw
new
Error
(
"I have no info about a node at "
+
path
);
}
node
=
info
!=
null
?
info
.
node
:
void
0
;
node
||
(
node
=
this
.
lookUpNode
(
info
.
path
));
return
this
.
selectNode
(
node
,
scroll
);
};
DomTextMapper
.
prototype
.
performUpdateOnNode
=
function
(
node
,
escalating
)
{
var
data
,
oldIndex
,
p
,
parentNode
,
parentPath
,
parentPathInfo
,
path
,
pathInfo
,
pathsToDrop
,
prefix
,
startTime
,
_i
,
_len
,
_ref
;
if
(
escalating
==
null
)
{
escalating
=
false
;
}
if
(
node
==
null
)
{
throw
new
Error
(
"Called performUpdate with a null node!"
);
}
if
(
this
.
path
==
null
)
{
return
;
}
startTime
=
this
.
timestamp
();
if
(
!
escalating
)
{
this
.
saveSelection
();
}
path
=
this
.
getPathTo
(
node
);
pathInfo
=
this
.
path
[
path
];
if
(
pathInfo
==
null
)
{
this
.
performUpdateOnNode
(
node
.
parentNode
,
true
);
if
(
!
escalating
)
{
this
.
restoreSelection
();
}
return
;
}
if
(
pathInfo
.
node
===
node
&&
pathInfo
.
content
===
this
.
getNodeContent
(
node
,
false
))
{
prefix
=
path
+
"/"
;
pathsToDrop
=
p
;
pathsToDrop
=
[];
_ref
=
this
.
path
;
for
(
p
in
_ref
)
{
data
=
_ref
[
p
];
if
(
this
.
stringStartsWith
(
p
,
prefix
))
{
pathsToDrop
.
push
(
p
);
}
}
for
(
_i
=
0
,
_len
=
pathsToDrop
.
length
;
_i
<
_len
;
_i
++
)
{
p
=
pathsToDrop
[
_i
];
delete
this
.
path
[
p
];
}
this
.
traverseSubTree
(
node
,
path
);
if
(
pathInfo
.
node
===
this
.
pathStartNode
)
{
this
.
collectPositions
(
node
,
path
,
null
,
0
,
0
);
}
else
{
parentPath
=
this
.
parentPath
(
path
);
parentPathInfo
=
this
.
path
[
parentPath
];
if
(
parentPathInfo
==
null
)
{
throw
new
Error
(
"While performing update on node "
+
path
+
", no path info found for parent path: "
+
parentPath
);
}
oldIndex
=
node
===
node
.
parentNode
.
firstChild
?
0
:
this
.
path
[
this
.
getPathTo
(
node
.
previousSibling
)].
end
-
parentPathInfo
.
start
;
this
.
collectPositions
(
node
,
path
,
parentPathInfo
.
content
,
parentPathInfo
.
start
,
oldIndex
);
}
}
else
{
if
(
pathInfo
.
node
!==
this
.
pathStartNode
)
{
parentNode
=
node
.
parentNode
!=
null
?
node
.
parentNode
:
(
parentPath
=
this
.
parentPath
(
path
),
this
.
lookUpNode
(
parentPath
));
this
.
performUpdateOnNode
(
parentNode
,
true
);
}
else
{
throw
new
Error
(
"Can not keep up with the changes, since even the node configured as path start node was replaced."
);
}
}
if
(
!
escalating
)
{
return
this
.
restoreSelection
();
}
};
DomTextMapper
.
prototype
.
getInfoForPath
=
function
(
path
)
{
var
result
;
if
(
this
.
path
==
null
)
{
throw
new
Error
(
"Can't get info before running a scan() !"
);
}
result
=
this
.
path
[
path
];
if
(
result
==
null
)
{
throw
new
Error
(
"Found no info for path '"
+
path
+
"'!"
);
}
return
result
;
};
DomTextMapper
.
prototype
.
getStartPosForPath
=
function
(
path
)
{
var
info
,
_ref
;
info
=
this
.
getInfoForPath
(
path
);
return
(
_ref
=
info
.
start
)
!=
null
?
_ref
:
this
.
getFirstPosAfter
(
info
.
node
);
};
DomTextMapper
.
prototype
.
getFirstPosAfter
=
function
(
node
)
{
var
info
,
path
,
_ref
;
if
(
node
.
nextSibling
!=
null
)
{
node
=
node
.
nextSibling
;
path
=
this
.
getPathTo
(
node
);
info
=
this
.
path
[
path
];
return
(
_ref
=
info
.
start
)
!=
null
?
_ref
:
this
.
getFirstPosAfter
(
node
);
}
else
{
return
this
.
getFirstPosAfter
(
node
.
parentNode
);
}
};
DomTextMapper
.
prototype
.
getEndPosForPath
=
function
(
path
)
{
var
info
,
_ref
;
info
=
this
.
getInfoForPath
(
path
);
return
(
_ref
=
info
.
end
)
!=
null
?
_ref
:
this
.
getFirstPosBefore
(
info
.
node
);
};
DomTextMapper
.
prototype
.
getFirstPosBefore
=
function
(
node
)
{
var
info
,
path
,
_ref
;
if
(
node
.
previousSibling
!=
null
)
{
node
=
node
.
previousSibling
;
path
=
this
.
getPathTo
(
node
);
info
=
this
.
path
[
path
];
return
(
_ref
=
info
.
end
)
!=
null
?
_ref
:
this
.
getFirstPosBefore
(
node
);
}
else
{
return
this
.
getFirstPosBefore
(
node
.
parentNode
);
}
};
DomTextMapper
.
prototype
.
getInfoForNode
=
function
(
node
)
{
if
(
node
==
null
)
{
throw
new
Error
(
"Called getInfoForNode(node) with null node!"
);
}
return
this
.
getInfoForPath
(
this
.
getPathTo
(
node
));
};
DomTextMapper
.
prototype
.
getStartPosForNode
=
function
(
node
)
{
if
(
node
==
null
)
{
throw
new
Error
(
"Called getStartInfoForNode(node) with null node!"
);
}
return
this
.
getStartPosForPath
(
this
.
getPathTo
(
node
));
};
DomTextMapper
.
prototype
.
getEndPosForNode
=
function
(
node
)
{
if
(
node
==
null
)
{
throw
new
Error
(
"Called getInfoForNode(node) with null node!"
);
}
return
this
.
getEndPosForPath
(
this
.
getPathTo
(
node
));
};
DomTextMapper
.
prototype
.
getMappingsForCharRanges
=
function
(
charRanges
)
{
var
charRange
,
_i
,
_len
,
_results
;
_results
=
[];
for
(
_i
=
0
,
_len
=
charRanges
.
length
;
_i
<
_len
;
_i
++
)
{
charRange
=
charRanges
[
_i
];
_results
.
push
(
this
.
getMappingsForCharRange
(
charRange
.
start
,
charRange
.
end
));
}
return
_results
;
};
DomTextMapper
.
prototype
.
getContentForPath
=
function
(
path
)
{
if
(
path
==
null
)
{
path
=
null
;
}
if
(
path
==
null
)
{
path
=
this
.
getDefaultPath
();
}
return
this
.
path
[
path
].
content
;
};
DomTextMapper
.
prototype
.
getLengthForPath
=
function
(
path
)
{
if
(
path
==
null
)
{
path
=
null
;
}
if
(
path
==
null
)
{
path
=
this
.
getDefaultPath
();
}
return
this
.
path
[
path
].
length
;
};
DomTextMapper
.
prototype
.
getDocLength
=
function
()
{
return
this
.
_corpus
.
length
;
};
DomTextMapper
.
prototype
.
getCorpus
=
function
()
{
return
this
.
_corpus
;
};
DomTextMapper
.
prototype
.
getContextForCharRange
=
function
(
start
,
end
)
{
var
prefix
,
prefixStart
,
suffix
;
if
(
start
<
0
)
{
throw
Error
(
"Negative range start is invalid!"
);
}
if
(
end
>
this
.
_corpus
.
length
)
{
throw
Error
(
"Range end is after the end of corpus!"
);
}
prefixStart
=
Math
.
max
(
0
,
start
-
CONTEXT_LEN
);
prefix
=
this
.
_corpus
.
slice
(
prefixStart
,
start
);
suffix
=
this
.
_corpus
.
slice
(
end
,
end
+
CONTEXT_LEN
);
return
[
prefix
.
trim
(),
suffix
.
trim
()];
};
DomTextMapper
.
prototype
.
getMappingsForCharRange
=
function
(
start
,
end
)
{
var
endInfo
,
endMapping
,
endNode
,
endOffset
,
endPath
,
info
,
mappings
,
p
,
r
,
result
,
startInfo
,
startMapping
,
startNode
,
startOffset
,
startPath
,
_ref
;
if
(
!
((
start
!=
null
)
&&
(
end
!=
null
)))
{
throw
new
Error
(
"start and end is required!"
);
}
this
.
scan
();
mappings
=
[];
_ref
=
this
.
path
;
for
(
p
in
_ref
)
{
info
=
_ref
[
p
];
if
(
info
.
atomic
&&
this
.
regions_overlap
(
info
.
start
,
info
.
end
,
start
,
end
))
{
(
function
(
_this
)
{
return
(
function
(
info
)
{
var
full
,
mapping
;
mapping
=
{
element
:
info
};
full
=
start
<=
info
.
start
&&
info
.
end
<=
end
;
if
(
full
)
{
mapping
.
full
=
true
;
mapping
.
wanted
=
info
.
content
;
mapping
.
yields
=
info
.
content
;
mapping
.
startCorrected
=
0
;
mapping
.
endCorrected
=
0
;
}
else
{
if
(
info
.
node
.
nodeType
===
Node
.
TEXT_NODE
)
{
if
(
start
<=
info
.
start
)
{
mapping
.
end
=
end
-
info
.
start
;
mapping
.
wanted
=
info
.
content
.
substr
(
0
,
mapping
.
end
);
}
else
if
(
info
.
end
<=
end
)
{
mapping
.
start
=
start
-
info
.
start
;
mapping
.
wanted
=
info
.
content
.
substr
(
mapping
.
start
);
}
else
{
mapping
.
start
=
start
-
info
.
start
;
mapping
.
end
=
end
-
info
.
start
;
mapping
.
wanted
=
info
.
content
.
substr
(
mapping
.
start
,
mapping
.
end
-
mapping
.
start
);
}
_this
.
computeSourcePositions
(
mapping
);
mapping
.
yields
=
info
.
node
.
data
.
substr
(
mapping
.
startCorrected
,
mapping
.
endCorrected
-
mapping
.
startCorrected
);
}
else
if
((
info
.
node
.
nodeType
===
Node
.
ELEMENT_NODE
)
&&
(
info
.
node
.
tagName
.
toLowerCase
()
===
"img"
))
{
_this
.
log
(
"Can not select a sub-string from the title of an image. Selecting all."
);
mapping
.
full
=
true
;
mapping
.
wanted
=
info
.
content
;
}
else
{
_this
.
log
(
"Warning: no idea how to handle partial mappings for node type "
+
info
.
node
.
nodeType
);
if
(
info
.
node
.
tagName
!=
null
)
{
_this
.
log
(
"Tag: "
+
info
.
node
.
tagName
);
}
_this
.
log
(
"Selecting all."
);
mapping
.
full
=
true
;
mapping
.
wanted
=
info
.
content
;
}
}
return
mappings
.
push
(
mapping
);
});
})(
this
)(
info
);
}
}
if
(
mappings
.
length
===
0
)
{
this
.
log
(
"Collecting nodes for ["
+
start
+
":"
+
end
+
"]"
);
this
.
log
(
"Should be: '"
+
this
.
_corpus
.
slice
(
start
,
end
)
+
"'."
);
throw
new
Error
(
"No mappings found for ["
+
start
+
":"
+
end
+
"]!"
);
}
mappings
=
mappings
.
sort
(
function
(
a
,
b
)
{
return
a
.
element
.
start
-
b
.
element
.
start
;
});
r
=
this
.
rootWin
.
document
.
createRange
();
startMapping
=
mappings
[
0
];
startNode
=
startMapping
.
element
.
node
;
startPath
=
startMapping
.
element
.
path
;
startOffset
=
startMapping
.
startCorrected
;
if
(
startMapping
.
full
)
{
r
.
setStartBefore
(
startNode
);
startInfo
=
startPath
;
}
else
{
r
.
setStart
(
startNode
,
startOffset
);
startInfo
=
startPath
+
":"
+
startOffset
;
}
endMapping
=
mappings
[
mappings
.
length
-
1
];
endNode
=
endMapping
.
element
.
node
;
endPath
=
endMapping
.
element
.
path
;
endOffset
=
endMapping
.
endCorrected
;
if
(
endMapping
.
full
)
{
r
.
setEndAfter
(
endNode
);
endInfo
=
endPath
;
}
else
{
r
.
setEnd
(
endNode
,
endOffset
);
endInfo
=
endPath
+
":"
+
endOffset
;
}
result
=
{
mappings
:
mappings
,
realRange
:
r
,
rangeInfo
:
{
startPath
:
startPath
,
startOffset
:
startOffset
,
startInfo
:
startInfo
,
endPath
:
endPath
,
endOffset
:
endOffset
,
endInfo
:
endInfo
},
safeParent
:
r
.
commonAncestorContainer
};
return
{
sections
:
[
result
]
};
};
DomTextMapper
.
prototype
.
timestamp
=
function
()
{
return
new
Date
().
getTime
();
};
DomTextMapper
.
prototype
.
stringStartsWith
=
function
(
string
,
prefix
)
{
if
(
!
prefix
)
{
throw
Error
(
"Requires a non-empty prefix!"
);
}
return
string
.
slice
(
0
,
prefix
.
length
)
===
prefix
;
};
DomTextMapper
.
prototype
.
stringEndsWith
=
function
(
string
,
suffix
)
{
if
(
!
suffix
)
{
throw
Error
(
"Requires a non-empty suffix!"
);
}
return
string
.
slice
(
string
.
length
-
suffix
.
length
,
string
.
length
)
===
suffix
;
};
DomTextMapper
.
prototype
.
parentPath
=
function
(
path
)
{
return
path
.
substr
(
0
,
path
.
lastIndexOf
(
"/"
));
};
DomTextMapper
.
prototype
.
domChangedSince
=
function
(
timestamp
)
{
if
((
this
.
lastDOMChange
!=
null
)
&&
(
timestamp
!=
null
))
{
return
this
.
lastDOMChange
>
timestamp
;
}
else
{
return
true
;
}
};
DomTextMapper
.
prototype
.
domStableSince
=
function
(
timestamp
)
{
return
!
this
.
domChangedSince
(
timestamp
);
};
DomTextMapper
.
prototype
.
getProperNodeName
=
function
(
node
)
{
var
nodeName
;
nodeName
=
node
.
nodeName
;
switch
(
nodeName
)
{
case
"#text"
:
return
"text()"
;
case
"#comment"
:
return
"comment()"
;
case
"#cdata-section"
:
return
"cdata-section()"
;
default
:
return
nodeName
;
}
};
DomTextMapper
.
prototype
.
getNodePosition
=
function
(
node
)
{
var
pos
,
tmp
;
pos
=
0
;
tmp
=
node
;
while
(
tmp
)
{
if
(
tmp
.
nodeName
===
node
.
nodeName
)
{
pos
++
;
}
tmp
=
tmp
.
previousSibling
;
}
return
pos
;
};
DomTextMapper
.
prototype
.
getPathSegment
=
function
(
node
)
{
var
name
,
pos
;
name
=
this
.
getProperNodeName
(
node
);
pos
=
this
.
getNodePosition
(
node
);
return
name
+
(
pos
>
1
?
"["
+
pos
+
"]"
:
""
);
};
DomTextMapper
.
prototype
.
getPathTo
=
function
(
node
)
{
var
xpath
;
xpath
=
''
;
while
(
node
!==
this
.
rootNode
)
{
if
(
node
==
null
)
{
throw
new
Error
(
"Called getPathTo on a node which was not a descendant of @rootNode. "
+
this
.
rootNode
);
}
xpath
=
(
this
.
getPathSegment
(
node
))
+
'/'
+
xpath
;
node
=
node
.
parentNode
;
}
xpath
=
(
this
.
rootNode
.
ownerDocument
!=
null
?
'./'
:
'/'
)
+
xpath
;
xpath
=
xpath
.
replace
(
/
\/
$/
,
''
);
return
xpath
;
};
DomTextMapper
.
prototype
.
traverseSubTree
=
function
(
node
,
path
,
invisible
,
verbose
)
{
var
child
,
cont
,
subpath
,
_i
,
_len
,
_ref
;
if
(
invisible
==
null
)
{
invisible
=
false
;
}
if
(
verbose
==
null
)
{
verbose
=
false
;
}
this
.
underTraverse
=
path
;
cont
=
this
.
getNodeContent
(
node
,
false
);
this
.
path
[
path
]
=
{
path
:
path
,
content
:
cont
,
length
:
cont
.
length
,
node
:
node
};
if
(
cont
.
length
)
{
if
(
verbose
)
{
this
.
log
(
"Collected info about path "
+
path
);
}
if
(
invisible
)
{
this
.
log
(
"Something seems to be wrong. I see visible content @ "
+
path
+
", while some of the ancestor nodes reported empty contents. Probably a new selection API bug...."
);
this
.
log
(
"Anyway, text is '"
+
cont
+
"'."
);
}
}
else
{
if
(
verbose
)
{
this
.
log
(
"Found no content at path "
+
path
);
}
invisible
=
true
;
}
if
(
node
.
hasChildNodes
())
{
_ref
=
node
.
childNodes
;
for
(
_i
=
0
,
_len
=
_ref
.
length
;
_i
<
_len
;
_i
++
)
{
child
=
_ref
[
_i
];
subpath
=
path
+
'/'
+
(
this
.
getPathSegment
(
child
));
this
.
traverseSubTree
(
child
,
subpath
,
invisible
,
verbose
);
}
}
return
null
;
};
DomTextMapper
.
prototype
.
getBody
=
function
()
{
return
(
this
.
rootWin
.
document
.
getElementsByTagName
(
"body"
))[
0
];
};
DomTextMapper
.
prototype
.
regions_overlap
=
function
(
start1
,
end1
,
start2
,
end2
)
{
return
start1
<
end2
&&
start2
<
end1
;
};
DomTextMapper
.
prototype
.
lookUpNode
=
function
(
path
)
{
var
doc
,
node
,
results
,
_ref
;
doc
=
(
_ref
=
this
.
rootNode
.
ownerDocument
)
!=
null
?
_ref
:
this
.
rootNode
;
results
=
doc
.
evaluate
(
path
,
this
.
rootNode
,
null
,
0
,
null
);
return
node
=
results
.
iterateNext
();
};
DomTextMapper
.
prototype
.
saveSelection
=
function
()
{
var
i
,
sel
;
if
(
this
.
savedSelection
!=
null
)
{
this
.
log
(
"Selection saved at:"
);
this
.
log
(
this
.
selectionSaved
);
throw
new
Error
(
"Selection already saved!"
);
}
sel
=
this
.
rootWin
.
getSelection
();
this
.
savedSelection
=
(
function
()
{
var
_i
,
_ref
,
_results
;
_results
=
[];
for
(
i
=
_i
=
0
,
_ref
=
sel
.
rangeCount
;
0
<=
_ref
?
_i
<
_ref
:
_i
>
_ref
;
i
=
0
<=
_ref
?
++
_i
:
--
_i
)
{
_results
.
push
(
sel
.
getRangeAt
(
i
));
}
return
_results
;
})();
return
this
.
selectionSaved
=
(
new
Error
(
"selection was saved here"
)).
stack
;
};
DomTextMapper
.
prototype
.
restoreSelection
=
function
()
{
var
range
,
sel
,
_i
,
_len
,
_ref
;
if
(
this
.
savedSelection
==
null
)
{
throw
new
Error
(
"No selection to restore."
);
}
sel
=
this
.
rootWin
.
getSelection
();
sel
.
removeAllRanges
();
_ref
=
this
.
savedSelection
;
for
(
_i
=
0
,
_len
=
_ref
.
length
;
_i
<
_len
;
_i
++
)
{
range
=
_ref
[
_i
];
sel
.
addRange
(
range
);
}
return
delete
this
.
savedSelection
;
};
DomTextMapper
.
prototype
.
selectNode
=
function
(
node
,
scroll
)
{
var
children
,
exception
,
realRange
,
sel
,
sn
,
_ref
;
if
(
scroll
==
null
)
{
scroll
=
false
;
}
if
(
node
==
null
)
{
throw
new
Error
(
"Called selectNode with null node!"
);
}
sel
=
this
.
rootWin
.
getSelection
();
sel
.
removeAllRanges
();
realRange
=
this
.
rootWin
.
document
.
createRange
();
if
(
node
.
nodeType
===
Node
.
ELEMENT_NODE
&&
node
.
hasChildNodes
()
&&
(
_ref
=
node
.
tagName
.
toLowerCase
(),
__indexOf
.
call
(
SELECT_CHILDREN_INSTEAD
,
_ref
)
>=
0
))
{
children
=
node
.
childNodes
;
realRange
.
setStartBefore
(
children
[
0
]);
realRange
.
setEndAfter
(
children
[
children
.
length
-
1
]);
sel
.
addRange
(
realRange
);
}
else
{
if
(
USE_TABLE_TEXT_WORKAROUND
&&
node
.
nodeType
===
Node
.
TEXT_NODE
&&
node
.
parentNode
.
tagName
.
toLowerCase
()
===
"table"
)
{
}
else
{
try
{
realRange
.
setStartBefore
(
node
);
realRange
.
setEndAfter
(
node
);
sel
.
addRange
(
realRange
);
}
catch
(
_error
)
{
exception
=
_error
;
if
(
!
(
USE_EMPTY_TEXT_WORKAROUND
&&
this
.
isWhitespace
(
node
)))
{
this
.
log
(
"Warning: failed to scan element @ "
+
this
.
underTraverse
);
this
.
log
(
"Content is: "
+
node
.
innerHTML
);
this
.
log
(
"We won't be able to properly anchor to any text inside this element."
);
}
}
}
}
if
(
scroll
)
{
sn
=
node
;
while
((
sn
!=
null
)
&&
(
sn
.
scrollIntoViewIfNeeded
==
null
))
{
sn
=
sn
.
parentNode
;
}
if
(
sn
!=
null
)
{
sn
.
scrollIntoViewIfNeeded
();
}
else
{
this
.
log
(
"Failed to scroll to element. (Browser does not support scrollIntoViewIfNeeded?)"
);
}
}
return
sel
;
};
DomTextMapper
.
prototype
.
readSelectionText
=
function
(
sel
)
{
sel
||
(
sel
=
this
.
rootWin
.
getSelection
());
return
sel
.
toString
().
trim
().
replace
(
/
\n
/g
,
" "
).
replace
(
/
\s{2,}
/g
,
" "
);
};
DomTextMapper
.
prototype
.
getNodeSelectionText
=
function
(
node
,
shouldRestoreSelection
)
{
var
sel
,
text
;
if
(
shouldRestoreSelection
==
null
)
{
shouldRestoreSelection
=
true
;
}
if
(
shouldRestoreSelection
)
{
this
.
saveSelection
();
}
sel
=
this
.
selectNode
(
node
);
text
=
this
.
readSelectionText
(
sel
);
if
(
shouldRestoreSelection
)
{
this
.
restoreSelection
();
}
return
text
;
};
DomTextMapper
.
prototype
.
computeSourcePositions
=
function
(
match
)
{
var
dc
,
displayEnd
,
displayIndex
,
displayStart
,
displayText
,
sc
,
sourceEnd
,
sourceIndex
,
sourceStart
,
sourceText
;
sourceText
=
match
.
element
.
node
.
data
.
replace
(
/
\n
/g
,
" "
);
displayText
=
match
.
element
.
content
;
displayStart
=
match
.
start
!=
null
?
match
.
start
:
0
;
displayEnd
=
match
.
end
!=
null
?
match
.
end
:
displayText
.
length
;
if
(
displayEnd
===
0
)
{
match
.
startCorrected
=
0
;
match
.
endCorrected
=
0
;
return
;
}
sourceIndex
=
0
;
displayIndex
=
0
;
while
(
!
((
sourceStart
!=
null
)
&&
(
sourceEnd
!=
null
)))
{
sc
=
sourceText
[
sourceIndex
];
dc
=
displayText
[
displayIndex
];
if
(
sc
===
dc
)
{
if
(
displayIndex
===
displayStart
)
{
sourceStart
=
sourceIndex
;
}
displayIndex
++
;
if
(
displayIndex
===
displayEnd
)
{
sourceEnd
=
sourceIndex
+
1
;
}
}
sourceIndex
++
;
}
match
.
startCorrected
=
sourceStart
;
match
.
endCorrected
=
sourceEnd
;
return
null
;
};
DomTextMapper
.
prototype
.
getNodeContent
=
function
(
node
,
shouldRestoreSelection
)
{
if
(
shouldRestoreSelection
==
null
)
{
shouldRestoreSelection
=
true
;
}
if
(
node
===
this
.
pathStartNode
&&
(
this
.
expectedContent
!=
null
))
{
return
this
.
expectedContent
;
}
else
{
return
this
.
getNodeSelectionText
(
node
,
shouldRestoreSelection
);
}
};
DomTextMapper
.
prototype
.
collectPositions
=
function
(
node
,
path
,
parentContent
,
parentIndex
,
index
)
{
var
atomic
,
child
,
childPath
,
children
,
content
,
endIndex
,
i
,
newCount
,
nodeName
,
oldCount
,
pathInfo
,
pos
,
startIndex
,
typeCount
;
if
(
parentContent
==
null
)
{
parentContent
=
null
;
}
if
(
parentIndex
==
null
)
{
parentIndex
=
0
;
}
if
(
index
==
null
)
{
index
=
0
;
}
pathInfo
=
this
.
path
[
path
];
content
=
pathInfo
!=
null
?
pathInfo
.
content
:
void
0
;
if
(
!
content
)
{
pathInfo
.
start
=
parentIndex
+
index
;
pathInfo
.
end
=
parentIndex
+
index
;
pathInfo
.
atomic
=
false
;
return
index
;
}
startIndex
=
parentContent
!=
null
?
parentContent
.
indexOf
(
content
,
index
)
:
index
;
if
(
startIndex
===
-
1
)
{
this
.
log
(
"Content of this not is not present in content of parent, at path "
+
path
);
this
.
log
(
"(Content: '"
+
content
+
"'.)"
);
return
index
;
}
endIndex
=
startIndex
+
content
.
length
;
atomic
=
!
node
.
hasChildNodes
();
pathInfo
.
start
=
parentIndex
+
startIndex
;
pathInfo
.
end
=
parentIndex
+
endIndex
;
pathInfo
.
atomic
=
atomic
;
if
(
!
atomic
)
{
children
=
node
.
childNodes
;
i
=
0
;
pos
=
0
;
typeCount
=
Object
();
while
(
i
<
children
.
length
)
{
child
=
children
[
i
];
nodeName
=
this
.
getProperNodeName
(
child
);
oldCount
=
typeCount
[
nodeName
];
newCount
=
oldCount
!=
null
?
oldCount
+
1
:
1
;
typeCount
[
nodeName
]
=
newCount
;
childPath
=
path
+
"/"
+
nodeName
+
(
newCount
>
1
?
"["
+
newCount
+
"]"
:
""
);
pos
=
this
.
collectPositions
(
child
,
childPath
,
content
,
parentIndex
+
startIndex
,
pos
);
i
++
;
}
}
return
endIndex
;
};
WHITESPACE
=
/^
\s
*$/
;
DomTextMapper
.
prototype
.
isWhitespace
=
function
(
node
)
{
var
child
,
mightBeEmpty
,
result
;
result
=
(
function
()
{
var
_i
,
_len
,
_ref
;
switch
(
node
.
nodeType
)
{
case
Node
.
TEXT_NODE
:
return
WHITESPACE
.
test
(
node
.
data
);
case
Node
.
ELEMENT_NODE
:
mightBeEmpty
=
true
;
_ref
=
node
.
childNodes
;
for
(
_i
=
0
,
_len
=
_ref
.
length
;
_i
<
_len
;
_i
++
)
{
child
=
_ref
[
_i
];
mightBeEmpty
=
mightBeEmpty
&&
this
.
isWhitespace
(
child
);
}
return
mightBeEmpty
;
default
:
return
false
;
}
}).
call
(
this
);
return
result
;
};
DomTextMapper
.
prototype
.
_testMap
=
function
()
{
var
expected
,
i
,
ok
,
p
,
_ref
,
_ref1
;
this
.
log
(
"Verifying map info: was it all properly traversed?"
);
_ref
=
this
.
path
;
for
(
i
in
_ref
)
{
p
=
_ref
[
i
];
if
(
p
.
atomic
==
null
)
{
this
.
log
(
i
+
" is missing data."
);
}
}
this
.
log
(
"Verifying map info: do atomic elements match?"
);
_ref1
=
this
.
path
;
for
(
i
in
_ref1
)
{
p
=
_ref1
[
i
];
if
(
!
p
.
atomic
)
{
continue
;
}
expected
=
this
.
_corpus
.
slice
(
p
.
start
,
p
.
end
);
ok
=
p
.
content
===
expected
;
if
(
!
ok
)
{
this
.
log
(
"Mismatch on "
+
i
+
": content is '"
+
p
.
content
+
"', range in corpus is '"
+
expected
+
"'."
);
}
ok
;
}
return
null
;
};
DomTextMapper
.
prototype
.
getPageIndex
=
function
()
{
return
0
;
};
DomTextMapper
.
prototype
.
getPageCount
=
function
()
{
return
1
;
};
DomTextMapper
.
prototype
.
getPageIndexForPos
=
function
()
{
return
0
;
};
DomTextMapper
.
prototype
.
isPageMapped
=
function
()
{
return
true
;
};
return
DomTextMapper
;
})();
}).
call
(
this
);
h/static/scripts/vendor/dom_text_matcher.coffee
deleted
100644 → 0
View file @
c55570c6
# Text search library
class
window
.
DomTextMatcher
constructor
:
(
@
corpus
)
->
# Search for text using exact string matching
#
# Parameters:
# pattern: what to search for
#
# distinct: forbid overlapping matches? (defaults to true)
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchExact
:
(
pattern
,
distinct
=
true
,
caseSensitive
=
false
)
->
if
not
@
pm
then
@
pm
=
new
window
.
DTM_ExactMatcher
@
pm
.
setDistinct
(
distinct
)
@
pm
.
setCaseSensitive
(
caseSensitive
)
@
_search
@
pm
,
pattern
# Search for text using regular expressions
#
# Parameters:
# pattern: what to search for
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchRegex
:
(
pattern
,
caseSensitive
=
false
)
->
if
not
@
rm
then
@
rm
=
new
window
.
DTM_RegexMatcher
@
rm
.
setCaseSensitive
(
caseSensitive
)
@
_search
@
rm
,
pattern
# Search for text using fuzzy text matching
#
# Parameters:
# pattern: what to search for
#
# pos: where to start searching
#
# caseSensitive: should the search be case sensitive? (defaults to false)
#
# matchDistance and
# matchThreshold:
# fine-tuning parameters for the d-m-p library.
# See http://code.google.com/p/google-diff-match-patch/wiki/API for details.
#
# For the details about the returned data structure,
# see the documentation of the search() method.
searchFuzzy
:
(
pattern
,
pos
,
caseSensitive
=
false
,
options
=
{})
->
@
ensureDMP
()
@
dmp
.
setMatchDistance
options
.
matchDistance
?
1000
@
dmp
.
setMatchThreshold
options
.
matchThreshold
?
0.5
@
dmp
.
setCaseSensitive
caseSensitive
@
_search
@
dmp
,
pattern
,
pos
,
options
searchFuzzyWithContext
:
(
prefix
,
suffix
,
pattern
,
expectedStart
=
null
,
expectedEnd
=
null
,
caseSensitive
=
false
,
options
=
{})
->
@
ensureDMP
()
# No context, to joy
unless
(
prefix
?
and
suffix
?
)
throw
new
Error
"Can not do a context-based fuzzy search
with missing context!"
# Get full document length
len
=
@
corpus
().
length
# Get a starting position for the prefix search
expectedPrefixStart
=
if
expectedStart
?
i
=
expectedStart
-
prefix
.
length
if
i
<
0
0
else
i
else
Math
.
floor
(
len
/
2
)
# Do the fuzzy search for the prefix
@
dmp
.
setMatchDistance
options
.
contextMatchDistance
?
len
*
2
@
dmp
.
setMatchThreshold
options
.
contextMatchThreshold
?
0.5
prefixResult
=
@
dmp
.
search
@
corpus
(),
prefix
,
expectedPrefixStart
# If the prefix is not found, give up
unless
prefixResult
.
length
then
return
matches
:
[]
# This is where the prefix was found
prefixStart
=
prefixResult
[
0
].
start
prefixEnd
=
prefixResult
[
0
].
end
# Let's find out where do we expect to find the suffix!
# We need the pattern's length.
patternLength
=
if
pattern
?
# If we have a pattern, use it's length
pattern
.
length
else
if
expectedStart
?
and
expectedEnd
?
# We don't have a pattern, but at least
# have valid expectedStart and expectedEnd values,
# get a length from that.
expectedEnd
-
expectedStart
else
# We have no idea about where the suffix could be.
# Let's just pull a number out of ... thin air.
64
# Get the part of text that is after the prefix
remainingText
=
@
corpus
().
substr
prefixEnd
# Calculate expected position
expectedSuffixStart
=
patternLength
# Do the fuzzy search for the suffix
suffixResult
=
@
dmp
.
search
remainingText
,
suffix
,
expectedSuffixStart
# If the suffix is not found, give up
unless
suffixResult
.
length
then
return
matches
:
[]
# This is where the suffix was found
suffixStart
=
prefixEnd
+
suffixResult
[
0
].
start
suffixEnd
=
prefixEnd
+
suffixResult
[
0
].
end
# This if the range between the prefix and the suffix
charRange
=
start
:
prefixEnd
end
:
suffixStart
# Get the configured threshold for the pattern matching
matchThreshold
=
options
.
patternMatchThreshold
?
0.5
# See how good a match we have
analysis
=
@
_analyzeMatch
pattern
,
charRange
,
true
# Should we try to find a better match by moving the
# initial match around a little bit, even if this has
# a negative impact on the similarity of the context?
if
pattern
?
and
options
.
flexContext
and
not
analysis
.
exact
# Do we have and exact match for the quote around here?
if
not
@
pm
then
@
pm
=
new
window
.
DTM_ExactMatcher
@
pm
.
setDistinct
false
@
pm
.
setCaseSensitive
false
flexMatches
=
@
pm
.
search
@
corpus
()[
prefixStart
..
suffixEnd
],
pattern
delete
candidate
bestError
=
2
for
flexMatch
in
flexMatches
# Calculate the range that matched the quote
flexRange
=
start
:
prefixStart
+
flexMatch
.
start
end
:
prefixStart
+
flexMatch
.
end
# Check how the prefix would fare
prefixRange
=
start
:
prefixStart
,
end
:
flexRange
.
start
a1
=
@
_analyzeMatch
prefix
,
prefixRange
,
true
prefixError
=
if
a1
.
exact
then
0
else
a1
.
comparison
.
errorLevel
# Check how the suffix would fare
suffixRange
=
start
:
flexRange
.
end
,
end
:
suffixEnd
a2
=
@
_analyzeMatch
suffix
,
suffixRange
,
true
suffixError
=
if
a2
.
exact
then
0
else
a2
.
comparison
.
errorLevel
# Did we at least one match?
if
a1
.
exact
or
a2
.
exact
# Yes, we did. Calculate the total error
totalError
=
prefixError
+
suffixError
# Is this better than our best bet?
if
totalError
<
bestError
# This is our best candidate so far. Store it.
candidate
=
flexRange
bestError
=
totalError
if
candidate
?
console
.
log
"flexContext adjustment: we found a better candidate!"
charRange
=
candidate
analysis
=
@
_analyzeMatch
pattern
,
charRange
,
true
# Do we have to compare what we found to a pattern?
if
(
not
pattern
?
)
or
# "No pattern, nothing to compare. Assume it's OK."
analysis
.
exact
or
# "Found text matches exactly to pattern"
(
analysis
.
comparison
.
errorLevel
<=
matchThreshold
)
# still acceptable
# Collect the results
match
=
{}
for
obj
in
[
charRange
,
analysis
]
for
k
,
v
of
obj
match
[
k
]
=
v
return
matches
:
[
match
]
# console.log "Rejecting the match, because error level is too high. (" +
# errorLevel + ")"
return
matches
:
[]
# ===== Private methods (never call from outside the module) =======
# Do some normalization to get a "canonical" form of a string.
# Used to even out some browser differences.
_normalizeString
:
(
string
)
->
(
string
.
replace
/\s{2,}/g
,
" "
).
trim
()
# Search for text with a custom matcher object
#
# Parameters:
# matcher: the object to use for doing the plain-text part of the search
# pattern: what to search for
# pos: where do we expect to find it
#
# A list of matches is returned.
#
# Each match has "start", "end", "found" and "nodes" fields.
# start and end specify where the pattern was found;
# "found" is the matching slice.
# Nodes is the list of matching nodes, with details about the matches.
#
# If no match is found, an empty list is returned.
_search
:
(
matcher
,
pattern
,
pos
,
options
=
{})
->
# Prepare and check the pattern
unless
pattern
?
then
throw
new
Error
"Can't search for null pattern!"
pattern
=
pattern
.
trim
()
unless
pattern
?
then
throw
new
Error
"Can't search an for empty pattern!"
fuzzyComparison
=
options
.
withFuzzyComparison
?
false
t1
=
@
timestamp
()
# Do the text search
textMatches
=
matcher
.
search
@
corpus
(),
pattern
,
pos
,
options
t2
=
@
timestamp
()
matches
=
[]
for
textMatch
in
textMatches
do
(
textMatch
)
=>
# See how good a match we have
analysis
=
@
_analyzeMatch
pattern
,
textMatch
,
fuzzyComparison
# Collect the results
match
=
{}
for
obj
in
[
textMatch
,
analysis
]
for
k
,
v
of
obj
match
[
k
]
=
v
matches
.
push
match
null
t3
=
@
timestamp
()
result
=
matches
:
matches
time
:
phase1_textMatching
:
t2
-
t1
phase2_matchMapping
:
t3
-
t2
total
:
t3
-
t1
result
timestamp
:
->
new
Date
().
getTime
()
# Read a match returned by the matcher engine, and compare it with the pattern
_analyzeMatch
:
(
pattern
,
charRange
,
useFuzzy
=
false
)
->
expected
=
@
_normalizeString
pattern
found
=
@
_normalizeString
@
corpus
()[
charRange
.
start
..
charRange
.
end
-
1
]
result
=
found
:
found
exact
:
found
is
expected
# If the match is not exact, check whether the changes are
# only case differences
unless
result
.
exact
then
result
.
exactExceptCase
=
expected
.
toLowerCase
()
is
found
.
toLowerCase
()
# if we are interested in fuzzy comparison, calculate that, too
if
not
result
.
exact
and
useFuzzy
@
ensureDMP
()
result
.
comparison
=
@
dmp
.
compare
expected
,
found
result
ensureDMP
:
->
unless
@
dmp
?
unless
window
.
DTM_DMPMatcher
?
throw
new
Error
"DTM_DMPMatcher is not available.
Have you loaded the text match engines?"
@
dmp
=
new
window
.
DTM_DMPMatcher
h/static/scripts/vendor/dom_text_matcher.js
0 → 100644
View file @
94465e93
// Generated by CoffeeScript 1.7.1
(
function
()
{
window
.
DomTextMatcher
=
(
function
()
{
function
DomTextMatcher
(
corpus
)
{
this
.
corpus
=
corpus
;
}
DomTextMatcher
.
prototype
.
searchExact
=
function
(
pattern
,
distinct
,
caseSensitive
)
{
if
(
distinct
==
null
)
{
distinct
=
true
;
}
if
(
caseSensitive
==
null
)
{
caseSensitive
=
false
;
}
if
(
!
this
.
pm
)
{
this
.
pm
=
new
window
.
DTM_ExactMatcher
;
}
this
.
pm
.
setDistinct
(
distinct
);
this
.
pm
.
setCaseSensitive
(
caseSensitive
);
return
this
.
_search
(
this
.
pm
,
pattern
);
};
DomTextMatcher
.
prototype
.
searchRegex
=
function
(
pattern
,
caseSensitive
)
{
if
(
caseSensitive
==
null
)
{
caseSensitive
=
false
;
}
if
(
!
this
.
rm
)
{
this
.
rm
=
new
window
.
DTM_RegexMatcher
;
}
this
.
rm
.
setCaseSensitive
(
caseSensitive
);
return
this
.
_search
(
this
.
rm
,
pattern
);
};
DomTextMatcher
.
prototype
.
searchFuzzy
=
function
(
pattern
,
pos
,
caseSensitive
,
options
)
{
var
_ref
,
_ref1
;
if
(
caseSensitive
==
null
)
{
caseSensitive
=
false
;
}
if
(
options
==
null
)
{
options
=
{};
}
this
.
ensureDMP
();
this
.
dmp
.
setMatchDistance
((
_ref
=
options
.
matchDistance
)
!=
null
?
_ref
:
1000
);
this
.
dmp
.
setMatchThreshold
((
_ref1
=
options
.
matchThreshold
)
!=
null
?
_ref1
:
0.5
);
this
.
dmp
.
setCaseSensitive
(
caseSensitive
);
return
this
.
_search
(
this
.
dmp
,
pattern
,
pos
,
options
);
};
DomTextMatcher
.
prototype
.
searchFuzzyWithContext
=
function
(
prefix
,
suffix
,
pattern
,
expectedStart
,
expectedEnd
,
caseSensitive
,
options
)
{
var
a1
,
a2
,
analysis
,
bestError
,
candidate
,
charRange
,
expectedPrefixStart
,
expectedSuffixStart
,
flexMatch
,
flexMatches
,
flexRange
,
i
,
k
,
len
,
match
,
matchThreshold
,
obj
,
patternLength
,
prefixEnd
,
prefixError
,
prefixRange
,
prefixResult
,
prefixStart
,
remainingText
,
suffixEnd
,
suffixError
,
suffixRange
,
suffixResult
,
suffixStart
,
totalError
,
v
,
_i
,
_j
,
_len
,
_len1
,
_ref
,
_ref1
,
_ref2
,
_ref3
;
if
(
expectedStart
==
null
)
{
expectedStart
=
null
;
}
if
(
expectedEnd
==
null
)
{
expectedEnd
=
null
;
}
if
(
caseSensitive
==
null
)
{
caseSensitive
=
false
;
}
if
(
options
==
null
)
{
options
=
{};
}
this
.
ensureDMP
();
if
(
!
((
prefix
!=
null
)
&&
(
suffix
!=
null
)))
{
throw
new
Error
(
"Can not do a context-based fuzzy search with missing context!"
);
}
len
=
this
.
corpus
().
length
;
expectedPrefixStart
=
expectedStart
!=
null
?
(
i
=
expectedStart
-
prefix
.
length
,
i
<
0
?
0
:
i
)
:
Math
.
floor
(
len
/
2
);
this
.
dmp
.
setMatchDistance
((
_ref
=
options
.
contextMatchDistance
)
!=
null
?
_ref
:
len
*
2
);
this
.
dmp
.
setMatchThreshold
((
_ref1
=
options
.
contextMatchThreshold
)
!=
null
?
_ref1
:
0.5
);
prefixResult
=
this
.
dmp
.
search
(
this
.
corpus
(),
prefix
,
expectedPrefixStart
);
if
(
!
prefixResult
.
length
)
{
return
{
matches
:
[]
};
}
prefixStart
=
prefixResult
[
0
].
start
;
prefixEnd
=
prefixResult
[
0
].
end
;
patternLength
=
pattern
!=
null
?
pattern
.
length
:
(
expectedStart
!=
null
)
&&
(
expectedEnd
!=
null
)
?
expectedEnd
-
expectedStart
:
64
;
remainingText
=
this
.
corpus
().
substr
(
prefixEnd
);
expectedSuffixStart
=
patternLength
;
suffixResult
=
this
.
dmp
.
search
(
remainingText
,
suffix
,
expectedSuffixStart
);
if
(
!
suffixResult
.
length
)
{
return
{
matches
:
[]
};
}
suffixStart
=
prefixEnd
+
suffixResult
[
0
].
start
;
suffixEnd
=
prefixEnd
+
suffixResult
[
0
].
end
;
charRange
=
{
start
:
prefixEnd
,
end
:
suffixStart
};
matchThreshold
=
(
_ref2
=
options
.
patternMatchThreshold
)
!=
null
?
_ref2
:
0.5
;
analysis
=
this
.
_analyzeMatch
(
pattern
,
charRange
,
true
);
if
((
pattern
!=
null
)
&&
options
.
flexContext
&&
!
analysis
.
exact
)
{
if
(
!
this
.
pm
)
{
this
.
pm
=
new
window
.
DTM_ExactMatcher
;
}
this
.
pm
.
setDistinct
(
false
);
this
.
pm
.
setCaseSensitive
(
false
);
flexMatches
=
this
.
pm
.
search
(
this
.
corpus
().
slice
(
prefixStart
,
+
suffixEnd
+
1
||
9
e9
),
pattern
);
delete
candidate
;
bestError
=
2
;
for
(
_i
=
0
,
_len
=
flexMatches
.
length
;
_i
<
_len
;
_i
++
)
{
flexMatch
=
flexMatches
[
_i
];
flexRange
=
{
start
:
prefixStart
+
flexMatch
.
start
,
end
:
prefixStart
+
flexMatch
.
end
};
prefixRange
=
{
start
:
prefixStart
,
end
:
flexRange
.
start
};
a1
=
this
.
_analyzeMatch
(
prefix
,
prefixRange
,
true
);
prefixError
=
a1
.
exact
?
0
:
a1
.
comparison
.
errorLevel
;
suffixRange
=
{
start
:
flexRange
.
end
,
end
:
suffixEnd
};
a2
=
this
.
_analyzeMatch
(
suffix
,
suffixRange
,
true
);
suffixError
=
a2
.
exact
?
0
:
a2
.
comparison
.
errorLevel
;
if
(
a1
.
exact
||
a2
.
exact
)
{
totalError
=
prefixError
+
suffixError
;
if
(
totalError
<
bestError
)
{
candidate
=
flexRange
;
bestError
=
totalError
;
}
}
}
if
(
candidate
!=
null
)
{
console
.
log
(
"flexContext adjustment: we found a better candidate!"
);
charRange
=
candidate
;
analysis
=
this
.
_analyzeMatch
(
pattern
,
charRange
,
true
);
}
}
if
((
pattern
==
null
)
||
analysis
.
exact
||
(
analysis
.
comparison
.
errorLevel
<=
matchThreshold
))
{
match
=
{};
_ref3
=
[
charRange
,
analysis
];
for
(
_j
=
0
,
_len1
=
_ref3
.
length
;
_j
<
_len1
;
_j
++
)
{
obj
=
_ref3
[
_j
];
for
(
k
in
obj
)
{
v
=
obj
[
k
];
match
[
k
]
=
v
;
}
}
return
{
matches
:
[
match
]
};
}
return
{
matches
:
[]
};
};
DomTextMatcher
.
prototype
.
_normalizeString
=
function
(
string
)
{
return
(
string
.
replace
(
/
\s{2,}
/g
,
" "
)).
trim
();
};
DomTextMatcher
.
prototype
.
_search
=
function
(
matcher
,
pattern
,
pos
,
options
)
{
var
fuzzyComparison
,
matches
,
result
,
t1
,
t2
,
t3
,
textMatch
,
textMatches
,
_fn
,
_i
,
_len
,
_ref
;
if
(
options
==
null
)
{
options
=
{};
}
if
(
pattern
==
null
)
{
throw
new
Error
(
"Can't search for null pattern!"
);
}
pattern
=
pattern
.
trim
();
if
(
pattern
==
null
)
{
throw
new
Error
(
"Can't search an for empty pattern!"
);
}
fuzzyComparison
=
(
_ref
=
options
.
withFuzzyComparison
)
!=
null
?
_ref
:
false
;
t1
=
this
.
timestamp
();
textMatches
=
matcher
.
search
(
this
.
corpus
(),
pattern
,
pos
,
options
);
t2
=
this
.
timestamp
();
matches
=
[];
_fn
=
(
function
(
_this
)
{
return
function
(
textMatch
)
{
var
analysis
,
k
,
match
,
obj
,
v
,
_j
,
_len1
,
_ref1
;
analysis
=
_this
.
_analyzeMatch
(
pattern
,
textMatch
,
fuzzyComparison
);
match
=
{};
_ref1
=
[
textMatch
,
analysis
];
for
(
_j
=
0
,
_len1
=
_ref1
.
length
;
_j
<
_len1
;
_j
++
)
{
obj
=
_ref1
[
_j
];
for
(
k
in
obj
)
{
v
=
obj
[
k
];
match
[
k
]
=
v
;
}
}
matches
.
push
(
match
);
return
null
;
};
})(
this
);
for
(
_i
=
0
,
_len
=
textMatches
.
length
;
_i
<
_len
;
_i
++
)
{
textMatch
=
textMatches
[
_i
];
_fn
(
textMatch
);
}
t3
=
this
.
timestamp
();
result
=
{
matches
:
matches
,
time
:
{
phase1_textMatching
:
t2
-
t1
,
phase2_matchMapping
:
t3
-
t2
,
total
:
t3
-
t1
}
};
return
result
;
};
DomTextMatcher
.
prototype
.
timestamp
=
function
()
{
return
new
Date
().
getTime
();
};
DomTextMatcher
.
prototype
.
_analyzeMatch
=
function
(
pattern
,
charRange
,
useFuzzy
)
{
var
expected
,
found
,
result
;
if
(
useFuzzy
==
null
)
{
useFuzzy
=
false
;
}
expected
=
this
.
_normalizeString
(
pattern
);
found
=
this
.
_normalizeString
(
this
.
corpus
().
slice
(
charRange
.
start
,
+
(
charRange
.
end
-
1
)
+
1
||
9
e9
));
result
=
{
found
:
found
,
exact
:
found
===
expected
};
if
(
!
result
.
exact
)
{
result
.
exactExceptCase
=
expected
.
toLowerCase
()
===
found
.
toLowerCase
();
}
if
(
!
result
.
exact
&&
useFuzzy
)
{
this
.
ensureDMP
();
result
.
comparison
=
this
.
dmp
.
compare
(
expected
,
found
);
}
return
result
;
};
DomTextMatcher
.
prototype
.
ensureDMP
=
function
()
{
if
(
this
.
dmp
==
null
)
{
if
(
window
.
DTM_DMPMatcher
==
null
)
{
throw
new
Error
(
"DTM_DMPMatcher is not available. Have you loaded the text match engines?"
);
}
return
this
.
dmp
=
new
window
.
DTM_DMPMatcher
;
}
};
return
DomTextMatcher
;
})();
}).
call
(
this
);
h/static/scripts/vendor/page_text_mapper_core.coffee
deleted
100644 → 0
View file @
c55570c6
# Common functions for all page-based document mapper modules
class
window
.
PageTextMapperCore
CONTEXT_LEN
:
32
# Get the page index for a given character position
getPageIndexForPos
:
(
pos
)
->
for
info
in
@
pageInfo
if
info
.
start
<=
pos
<
info
.
end
return
info
.
index
console
.
log
"Not on page "
+
info
.
index
return
-
1
# A new page was rendered
_onPageRendered
:
(
index
)
=>
#console.log "Allegedly rendered page #" + index
# Is it really rendered?
unless
@
_isPageRendered
(
index
)
and
@
pageInfo
[
index
]
# console.log "Page #" + index + " is not really rendered yet."
setTimeout
(
=>
@
_onPageRendered
index
),
1000
return
# Collect info about the new DOM subtree
@
_mapPage
@
pageInfo
[
index
]
# Determine whether a given page has been rendered and mapped
isPageMapped
:
(
index
)
->
return
@
pageInfo
[
index
]
?
.
domMapper
?
# Create the mappings for a given page
_mapPage
:
(
info
)
->
# console.log "Mapping page", info.index
info
.
node
=
@
getRootNodeForPage
info
.
index
info
.
domMapper
=
new
DomTextMapper
(
"d-t-m for page #"
+
info
.
index
)
info
.
domMapper
.
setRootNode
info
.
node
info
.
domMapper
.
documentChanged
()
if
@
requiresSmartStringPadding
info
.
domMapper
.
setExpectedContent
info
.
content
info
.
domMapper
.
scan
()
renderedContent
=
info
.
domMapper
.
getCorpus
()
if
renderedContent
isnt
info
.
content
console
.
log
"Oops. Mismatch between rendered and extracted text, while mapping page #"
+
info
.
index
+
"!"
console
.
trace
()
console
.
log
"Rendered: "
+
renderedContent
console
.
log
"Extracted: "
+
info
.
content
# Announce the newly available page
setTimeout
->
event
=
document
.
createEvent
"UIEvents"
event
.
initUIEvent
"docPageMapped"
,
false
,
false
,
window
,
0
event
.
pageIndex
=
info
.
index
window
.
dispatchEvent
event
# Update the mappings for a given page
_updateMap
:
(
info
)
->
#console.log "Updating mappings for page #" + info.index
info
.
domMapper
.
documentChanged
()
info
.
domMapper
.
scan
()
# Delete the mappings for a given page
_unmapPage
:
(
info
)
->
delete
info
.
domMapper
# Announce the unavailable page
event
=
document
.
createEvent
"UIEvents"
event
.
initUIEvent
"docPageUnmapped"
,
false
,
false
,
window
,
0
event
.
pageIndex
=
info
.
index
window
.
dispatchEvent
event
# Announce scrolling
_onScroll
:
->
event
=
document
.
createEvent
"UIEvents"
event
.
initUIEvent
"docPageScrolling"
,
false
,
false
,
window
,
0
window
.
dispatchEvent
event
# Look up info about a give DOM node, uniting page and node info
getInfoForNode
:
(
node
)
->
pageData
=
@
getPageForNode
node
# Give up if the given page is not mapped yet
return
null
unless
pageData
.
domMapper
nodeData
=
pageData
.
domMapper
.
getInfoForNode
node
# Copy info about the node
info
=
{}
for
k
,
v
of
nodeData
info
[
k
]
=
v
# Correct the chatacter offsets with that of the page
info
.
start
+=
pageData
.
start
info
.
end
+=
pageData
.
start
info
.
pageIndex
=
pageData
.
index
info
# Look up the start offset of a give DOM node, uniting page and node info
getStartPosForNode
:
(
node
)
->
pageData
=
@
getPageForNode
node
nodeStart
=
pageData
.
domMapper
.
getStartPosForNode
node
pageData
.
start
+
nodeStart
# Look up the end offset of a give DOM node, uniting page and node info
getEndPosForNode
:
(
node
)
->
pageData
=
@
getPageForNode
node
nodeEnd
=
pageData
.
domMapper
.
getEndPosForNode
node
pageData
.
start
+
nodeEnd
# Return some data about a given character range
getMappingsForCharRange
:
(
start
,
end
,
pages
)
->
#console.log "Get mappings for char range [" + start + "; " + end + "], for pages " + pages + "."
# Check out which pages are these on
startIndex
=
@
getPageIndexForPos
start
endIndex
=
@
getPageIndexForPos
end
#console.log "These are on pages [" + startIndex + ".." + endIndex + "]."
# Function to get the relevant section inside a given page
getSection
=
(
index
)
=>
info
=
@
pageInfo
[
index
]
# Calculate in-page offsets
realStart
=
(
Math
.
max
info
.
start
,
start
)
-
info
.
start
realEnd
=
(
Math
.
min
info
.
end
,
end
)
-
info
.
start
# Get the range inside the page
mappings
=
info
.
domMapper
.
getMappingsForCharRange
realStart
,
realEnd
mappings
.
sections
[
0
]
# Get the section for all involved pages
sections
=
{}
for
index
in
pages
?
[
startIndex
..
endIndex
]
sections
[
index
]
=
getSection
index
# Return the data
sections
:
sections
getCorpus
:
->
unless
@
_corpus
throw
new
Error
"Hey! Called getCorpus() before corpus defined!"
@
_corpus
getContextForCharRange
:
(
start
,
end
)
->
prefixStart
=
Math
.
max
0
,
start
-
@
CONTEXT_LEN
prefixLen
=
start
-
prefixStart
prefix
=
@
_corpus
.
substr
prefixStart
,
prefixLen
suffix
=
@
_corpus
.
substr
end
,
@
CONTEXT_LEN
[
prefix
.
trim
(),
suffix
.
trim
()]
# Call this in scan, when you have the page contents
_onHavePageContents
:
->
# Join all the text together
@
_corpus
=
(
info
.
content
for
info
in
@
pageInfo
).
join
" "
# Go over the pages, and calculate some basic info
pos
=
0
@
pageInfo
.
forEach
(
info
,
i
)
=>
info
.
len
=
info
.
content
.
length
info
.
start
=
pos
info
.
end
=
(
pos
+=
info
.
len
+
1
)
# Call this in scan, after resolving the promise
_onAfterScan
:
->
# Go over the pages again, and map the rendered ones
@
pageInfo
.
forEach
(
info
,
i
)
=>
if
@
_isPageRendered
i
@
_mapPage
info
h/static/scripts/vendor/text_match_engines.coffee
deleted
100644 → 0
View file @
c55570c6
# Naive text matcher
class
window
.
DTM_ExactMatcher
constructor
:
->
@
distinct
=
true
@
caseSensitive
=
false
setDistinct
:
(
value
)
->
@
distinct
=
value
setCaseSensitive
:
(
value
)
->
@
caseSensitive
=
value
search
:
(
text
,
pattern
)
->
# console.log "Searching for '" + pattern + "' in '" + text + "'."
pLen
=
pattern
.
length
results
=
[]
index
=
0
unless
@
caseSensitive
text
=
text
.
toLowerCase
()
pattern
=
pattern
.
toLowerCase
()
while
(
i
=
text
.
indexOf
pattern
)
>
-
1
do
=>
# console.log "Found '" + pattern + "' @ " + i +
# " (=" + (index + i) + ")"
results
.
push
start
:
index
+
i
end
:
index
+
i
+
pLen
if
@
distinct
text
=
text
.
substr
i
+
pLen
index
+=
i
+
pLen
else
text
=
text
.
substr
i
+
1
index
+=
i
+
1
results
class
window
.
DTM_RegexMatcher
constructor
:
->
@
caseSensitive
=
false
setCaseSensitive
:
(
value
)
->
@
caseSensitive
=
value
search
:
(
text
,
pattern
)
->
re
=
new
RegExp
pattern
,
if
@
caseSensitive
then
"g"
else
"gi"
{
start
:
m
.
index
,
end
:
m
.
index
+
m
[
0
].
length
}
while
m
=
re
.
exec
text
# diff-match-patch - based text matcher
class
window
.
DTM_DMPMatcher
constructor
:
->
@
dmp
=
new
diff_match_patch
@
dmp
.
Diff_Timeout
=
0
@
caseSensitive
=
false
_reverse
:
(
text
)
->
text
.
split
(
""
).
reverse
().
join
""
# Use this to get the max allowed pattern length.
# Trying to use a longer pattern will give an error.
getMaxPatternLength
:
->
@
dmp
.
Match_MaxBits
# The following example is a classic dilemma.
# There are two potential matches, one is close to the expected location
# but contains a one character error, the other is far from the expected
# location but is exactly the pattern sought after:
#
# match_main("abc12345678901234567890abbc", "abc", 26)
#
# Which result is returned (0 or 24) is determined by the
# MatchDistance property.
#
# An exact letter match which is 'distance' characters away
# from the fuzzy location would score as a complete mismatch.
# For example, a distance of '0' requires the match be at the exact
# location specified, whereas a threshold of '1000' would require
# a perfect match to be within 800 characters of the expected location
# to be found using a 0.8 threshold (see below).
#
# The larger MatchDistance is, the slower search may take to compute.
#
# This variable defaults to 1000.
setMatchDistance
:
(
distance
)
->
@
dmp
.
Match_Distance
=
distance
getMatchDistance
:
->
@
dmp
.
Match_Distance
# MatchThreshold determines the cut-off value for a valid match.
#
# If Match_Threshold is closer to 0, the requirements for accuracy
# increase. If Match_Threshold is closer to 1 then it is more likely
# that a match will be found. The larger Match_Threshold is, the slower
# search may take to compute.
#
# This variable defaults to 0.5.
setMatchThreshold
:
(
threshold
)
->
@
dmp
.
Match_Threshold
=
threshold
getMatchThreshold
:
->
@
dmp
.
Match_Threshold
getCaseSensitive
:
->
caseSensitive
setCaseSensitive
:
(
value
)
->
@
caseSensitive
=
value
# Given a text to search, a pattern to search for and an
# expected location in the text near which to find the pattern,
# return the location which matches closest.
#
# The function will search for the best match based on both the number
# of character errors between the pattern and the potential match,
# as well as the distance between the expected location and the
# potential match.
#
# If no match is found, the function returns null.
search
:
(
text
,
pattern
,
expectedStartLoc
=
0
,
options
=
{})
->
# console.log "In dtm search. text: '" + text + "', pattern: '" + pattern +
# "', expectedStartLoc: " + expectedStartLoc + ", options:"
# console.log options
if
expectedStartLoc
<
0
throw
new
Error
"Can't search at negative indices!"
if
expectedStartLoc
isnt
Math
.
floor
expectedStartLoc
throw
new
Error
"Expected start location must be an integer."
unless
@
caseSensitive
text
=
text
.
toLowerCase
()
pattern
=
pattern
.
toLowerCase
()
pLen
=
pattern
.
length
maxLen
=
@
getMaxPatternLength
()
if
pLen
<=
maxLen
result
=
@
searchForSlice
text
,
pattern
,
expectedStartLoc
else
startSlice
=
pattern
.
substr
0
,
maxLen
startPos
=
@
searchForSlice
text
,
startSlice
,
expectedStartLoc
if
startPos
?
startLen
=
startPos
.
end
-
startPos
.
start
endSlice
=
pattern
.
substr
pLen
-
maxLen
,
maxLen
endLoc
=
startPos
.
start
+
pLen
-
maxLen
endPos
=
@
searchForSlice
text
,
endSlice
,
endLoc
if
endPos
?
endLen
=
endPos
.
end
-
endPos
.
start
matchLen
=
endPos
.
end
-
startPos
.
start
startIndex
=
startPos
.
start
endIndex
=
endPos
.
end
if
pLen
*
0.5
<=
matchLen
<=
pLen
*
1.5
result
=
start
:
startIndex
end
:
endPos
.
end
# data:
# startError: startPos.data.error
# endError: endPos.data.error
# uncheckedMidSection: Math.max 0, matchLen - startLen - endLen
# lengthError: matchLen - pLen
# else
# console.log "Sorry, matchLen (" + matchLen + ") is not between " +
# 0.5*pLen + " and " + 1.5*pLen
# else
# console.log "endSlice ('" + endSlice + "') not found"
# else
# console.log "startSlice ('" + startSlice + "') not found"
unless
result
?
then
return
[]
if
options
.
withLevenhstein
or
options
.
withDiff
found
=
text
.
substr
result
.
start
,
result
.
end
-
result
.
start
result
.
diff
=
@
dmp
.
diff_main
pattern
,
found
if
options
.
withLevenshstein
result
.
lev
=
@
dmp
.
diff_levenshtein
result
.
diff
if
options
.
withDiff
@
dmp
.
diff_cleanupSemantic
result
.
diff
result
.
diffHTML
=
@
dmp
.
diff_prettyHtml
result
.
diff
[
result
]
# Compare two string slices, get Levenhstein and visual diff
compare
:
(
text1
,
text2
)
->
unless
(
text1
?
and
text2
?
)
throw
new
Error
"Can not compare non-existing strings!"
result
=
{}
result
.
diff
=
@
dmp
.
diff_main
text1
,
text2
result
.
lev
=
@
dmp
.
diff_levenshtein
result
.
diff
result
.
errorLevel
=
result
.
lev
/
text1
.
length
@
dmp
.
diff_cleanupSemantic
result
.
diff
result
.
diffHTML
=
@
dmp
.
diff_prettyHtml
result
.
diff
result
# ============= Private part ==========================================
# You don't need to call the functions below this point manually
searchForSlice
:
(
text
,
slice
,
expectedStartLoc
)
->
# console.log "searchForSlice: '" + text + "', '" + slice + "', " +
# expectedStartLoc
r1
=
@
dmp
.
match_main
text
,
slice
,
expectedStartLoc
startIndex
=
r1
.
index
if
startIndex
is
-
1
then
return
null
txet
=
@
_reverse
text
nrettap
=
@
_reverse
slice
expectedEndLoc
=
startIndex
+
slice
.
length
expectedDneLoc
=
text
.
length
-
expectedEndLoc
r2
=
@
dmp
.
match_main
txet
,
nrettap
,
expectedDneLoc
dneIndex
=
r2
.
index
endIndex
=
text
.
length
-
dneIndex
result
=
start
:
startIndex
end
:
endIndex
h/static/scripts/vendor/text_match_engines.js
0 → 100644
View file @
94465e93
// Generated by CoffeeScript 1.7.1
(
function
()
{
window
.
DTM_ExactMatcher
=
(
function
()
{
function
DTM_ExactMatcher
()
{
this
.
distinct
=
true
;
this
.
caseSensitive
=
false
;
}
DTM_ExactMatcher
.
prototype
.
setDistinct
=
function
(
value
)
{
return
this
.
distinct
=
value
;
};
DTM_ExactMatcher
.
prototype
.
setCaseSensitive
=
function
(
value
)
{
return
this
.
caseSensitive
=
value
;
};
DTM_ExactMatcher
.
prototype
.
search
=
function
(
text
,
pattern
)
{
var
i
,
index
,
pLen
,
results
;
pLen
=
pattern
.
length
;
results
=
[];
index
=
0
;
if
(
!
this
.
caseSensitive
)
{
text
=
text
.
toLowerCase
();
pattern
=
pattern
.
toLowerCase
();
}
while
((
i
=
text
.
indexOf
(
pattern
))
>
-
1
)
{
(
function
(
_this
)
{
return
(
function
()
{
results
.
push
({
start
:
index
+
i
,
end
:
index
+
i
+
pLen
});
if
(
_this
.
distinct
)
{
text
=
text
.
substr
(
i
+
pLen
);
return
index
+=
i
+
pLen
;
}
else
{
text
=
text
.
substr
(
i
+
1
);
return
index
+=
i
+
1
;
}
});
})(
this
)();
}
return
results
;
};
return
DTM_ExactMatcher
;
})();
window
.
DTM_RegexMatcher
=
(
function
()
{
function
DTM_RegexMatcher
()
{
this
.
caseSensitive
=
false
;
}
DTM_RegexMatcher
.
prototype
.
setCaseSensitive
=
function
(
value
)
{
return
this
.
caseSensitive
=
value
;
};
DTM_RegexMatcher
.
prototype
.
search
=
function
(
text
,
pattern
)
{
var
m
,
re
,
_results
;
re
=
new
RegExp
(
pattern
,
this
.
caseSensitive
?
"g"
:
"gi"
);
_results
=
[];
while
(
m
=
re
.
exec
(
text
))
{
_results
.
push
({
start
:
m
.
index
,
end
:
m
.
index
+
m
[
0
].
length
});
}
return
_results
;
};
return
DTM_RegexMatcher
;
})();
window
.
DTM_DMPMatcher
=
(
function
()
{
function
DTM_DMPMatcher
()
{
this
.
dmp
=
new
diff_match_patch
;
this
.
dmp
.
Diff_Timeout
=
0
;
this
.
caseSensitive
=
false
;
}
DTM_DMPMatcher
.
prototype
.
_reverse
=
function
(
text
)
{
return
text
.
split
(
""
).
reverse
().
join
(
""
);
};
DTM_DMPMatcher
.
prototype
.
getMaxPatternLength
=
function
()
{
return
this
.
dmp
.
Match_MaxBits
;
};
DTM_DMPMatcher
.
prototype
.
setMatchDistance
=
function
(
distance
)
{
return
this
.
dmp
.
Match_Distance
=
distance
;
};
DTM_DMPMatcher
.
prototype
.
getMatchDistance
=
function
()
{
return
this
.
dmp
.
Match_Distance
;
};
DTM_DMPMatcher
.
prototype
.
setMatchThreshold
=
function
(
threshold
)
{
return
this
.
dmp
.
Match_Threshold
=
threshold
;
};
DTM_DMPMatcher
.
prototype
.
getMatchThreshold
=
function
()
{
return
this
.
dmp
.
Match_Threshold
;
};
DTM_DMPMatcher
.
prototype
.
getCaseSensitive
=
function
()
{
return
caseSensitive
;
};
DTM_DMPMatcher
.
prototype
.
setCaseSensitive
=
function
(
value
)
{
return
this
.
caseSensitive
=
value
;
};
DTM_DMPMatcher
.
prototype
.
search
=
function
(
text
,
pattern
,
expectedStartLoc
,
options
)
{
var
endIndex
,
endLen
,
endLoc
,
endPos
,
endSlice
,
found
,
matchLen
,
maxLen
,
pLen
,
result
,
startIndex
,
startLen
,
startPos
,
startSlice
;
if
(
expectedStartLoc
==
null
)
{
expectedStartLoc
=
0
;
}
if
(
options
==
null
)
{
options
=
{};
}
if
(
expectedStartLoc
<
0
)
{
throw
new
Error
(
"Can't search at negative indices!"
);
}
if
(
expectedStartLoc
!==
Math
.
floor
(
expectedStartLoc
))
{
throw
new
Error
(
"Expected start location must be an integer."
);
}
if
(
!
this
.
caseSensitive
)
{
text
=
text
.
toLowerCase
();
pattern
=
pattern
.
toLowerCase
();
}
pLen
=
pattern
.
length
;
maxLen
=
this
.
getMaxPatternLength
();
if
(
pLen
<=
maxLen
)
{
result
=
this
.
searchForSlice
(
text
,
pattern
,
expectedStartLoc
);
}
else
{
startSlice
=
pattern
.
substr
(
0
,
maxLen
);
startPos
=
this
.
searchForSlice
(
text
,
startSlice
,
expectedStartLoc
);
if
(
startPos
!=
null
)
{
startLen
=
startPos
.
end
-
startPos
.
start
;
endSlice
=
pattern
.
substr
(
pLen
-
maxLen
,
maxLen
);
endLoc
=
startPos
.
start
+
pLen
-
maxLen
;
endPos
=
this
.
searchForSlice
(
text
,
endSlice
,
endLoc
);
if
(
endPos
!=
null
)
{
endLen
=
endPos
.
end
-
endPos
.
start
;
matchLen
=
endPos
.
end
-
startPos
.
start
;
startIndex
=
startPos
.
start
;
endIndex
=
endPos
.
end
;
if
((
pLen
*
0.5
<=
matchLen
&&
matchLen
<=
pLen
*
1.5
))
{
result
=
{
start
:
startIndex
,
end
:
endPos
.
end
};
}
}
}
}
if
(
result
==
null
)
{
return
[];
}
if
(
options
.
withLevenhstein
||
options
.
withDiff
)
{
found
=
text
.
substr
(
result
.
start
,
result
.
end
-
result
.
start
);
result
.
diff
=
this
.
dmp
.
diff_main
(
pattern
,
found
);
if
(
options
.
withLevenshstein
)
{
result
.
lev
=
this
.
dmp
.
diff_levenshtein
(
result
.
diff
);
}
if
(
options
.
withDiff
)
{
this
.
dmp
.
diff_cleanupSemantic
(
result
.
diff
);
result
.
diffHTML
=
this
.
dmp
.
diff_prettyHtml
(
result
.
diff
);
}
}
return
[
result
];
};
DTM_DMPMatcher
.
prototype
.
compare
=
function
(
text1
,
text2
)
{
var
result
;
if
(
!
((
text1
!=
null
)
&&
(
text2
!=
null
)))
{
throw
new
Error
(
"Can not compare non-existing strings!"
);
}
result
=
{};
result
.
diff
=
this
.
dmp
.
diff_main
(
text1
,
text2
);
result
.
lev
=
this
.
dmp
.
diff_levenshtein
(
result
.
diff
);
result
.
errorLevel
=
result
.
lev
/
text1
.
length
;
this
.
dmp
.
diff_cleanupSemantic
(
result
.
diff
);
result
.
diffHTML
=
this
.
dmp
.
diff_prettyHtml
(
result
.
diff
);
return
result
;
};
DTM_DMPMatcher
.
prototype
.
searchForSlice
=
function
(
text
,
slice
,
expectedStartLoc
)
{
var
dneIndex
,
endIndex
,
expectedDneLoc
,
expectedEndLoc
,
nrettap
,
r1
,
r2
,
result
,
startIndex
,
txet
;
r1
=
this
.
dmp
.
match_main
(
text
,
slice
,
expectedStartLoc
);
startIndex
=
r1
.
index
;
if
(
startIndex
===
-
1
)
{
return
null
;
}
txet
=
this
.
_reverse
(
text
);
nrettap
=
this
.
_reverse
(
slice
);
expectedEndLoc
=
startIndex
+
slice
.
length
;
expectedDneLoc
=
text
.
length
-
expectedEndLoc
;
r2
=
this
.
dmp
.
match_main
(
txet
,
nrettap
,
expectedDneLoc
);
dneIndex
=
r2
.
index
;
endIndex
=
text
.
length
-
dneIndex
;
return
result
=
{
start
:
startIndex
,
end
:
endIndex
};
};
return
DTM_DMPMatcher
;
})();
}).
call
(
this
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment