Commit 109c1b97 authored by csillag's avatar csillag

Use the 'flexContext' option for nicer fuzzy anchoring

* Updated dom-text-matcher to c93abc20
* Updated Annotator to 41b88f9f (maintenance branch)

This version of Annotator uses a new method for adjusting the results of the two-phase fuzzy matching algorythm (used for anchoring). For details, see here: https://github.com/hypothesis/dom-text-matcher/commit/c93abc20bedd4450fd4eefe7d7d487aba324a088

Fixes #681.
parent 12795901
/* /*
** Annotator 1.2.6-dev-5fafbdf ** Annotator 1.2.6-dev-f85315e
** https://github.com/okfn/annotator/ ** https://github.com/okfn/annotator/
** **
** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning. ** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
** Dual licensed under the MIT and GPLv3 licenses. ** Dual licensed under the MIT and GPLv3 licenses.
** https://github.com/okfn/annotator/blob/master/LICENSE ** https://github.com/okfn/annotator/blob/master/LICENSE
** **
** Built at: 2013-08-28 00:28:44Z ** Built at: 2013-08-28 02:11:19Z
*/ */
...@@ -1102,7 +1102,8 @@ ...@@ -1102,7 +1102,8 @@
options = { options = {
contextMatchDistance: this.domMapper.getDocLength() * 2, contextMatchDistance: this.domMapper.getDocLength() * 2,
contextMatchThreshold: 0.5, contextMatchThreshold: 0.5,
patternMatchThreshold: 0.5 patternMatchThreshold: 0.5,
flexContext: true
}; };
result = this.domMatcher.searchFuzzyWithContext(prefix, suffix, quote, expectedStart, expectedEnd, false, null, options); result = this.domMatcher.searchFuzzyWithContext(prefix, suffix, quote, expectedStart, expectedEnd, false, null, options);
if (!result.matches.length) { if (!result.matches.length) {
...@@ -1110,7 +1111,7 @@ ...@@ -1110,7 +1111,7 @@
return null; return null;
} }
match = result.matches[0]; match = result.matches[0];
console.log("Fuzzy found match:"); console.log("2-phase fuzzy found match:");
console.log(match); console.log(match);
browserRange = new Range.BrowserRange(match.realRange); browserRange = new Range.BrowserRange(match.realRange);
normalizedRange = browserRange.normalize(this.wrapper[0]); normalizedRange = browserRange.normalize(this.wrapper[0]);
...@@ -1146,7 +1147,7 @@ ...@@ -1146,7 +1147,7 @@
return null; return null;
} }
match = result.matches[0]; match = result.matches[0];
console.log("Fuzzy found match:"); console.log("1-phase fuzzy found match:");
console.log(match); console.log(match);
browserRange = new Range.BrowserRange(match.realRange); browserRange = new Range.BrowserRange(match.realRange);
normalizedRange = browserRange.normalize(this.wrapper[0]); normalizedRange = browserRange.normalize(this.wrapper[0]);
......
...@@ -146,7 +146,8 @@ class window.DomTextMatcher ...@@ -146,7 +146,8 @@ class window.DomTextMatcher
# If the prefix is not found, give up # If the prefix is not found, give up
unless prefixResult.length then return matches: [] unless prefixResult.length then return matches: []
# This is where the prefix ends # This is where the prefix was found
prefixStart = prefixResult[0].start
prefixEnd = prefixResult[0].end prefixEnd = prefixResult[0].end
# Let's find out where do we expect to find the suffix! # Let's find out where do we expect to find the suffix!
...@@ -176,9 +177,11 @@ class window.DomTextMatcher ...@@ -176,9 +177,11 @@ class window.DomTextMatcher
# If the suffix is not found, give up # If the suffix is not found, give up
unless suffixResult.length then return matches: [] unless suffixResult.length then return matches: []
# This is where the suffix starts # This is where the suffix was found
suffixStart = prefixEnd + suffixResult[0].start suffixStart = prefixEnd + suffixResult[0].start
suffixEnd = prefixEnd + suffixResult[0].end
# This if the range between the prefix and the suffix
charRange = charRange =
start: prefixEnd start: prefixEnd
end: suffixStart end: suffixStart
...@@ -189,11 +192,58 @@ class window.DomTextMatcher ...@@ -189,11 +192,58 @@ class window.DomTextMatcher
# See how good a match we have # See how good a match we have
analysis = @analyzeMatch pattern, charRange, true analysis = @analyzeMatch pattern, charRange, true
# Should we try to find a better match by moving the
# initial match around a little bit, even if this has
# a negative impact on the similarity of the context?
if pattern? and options.flexContext and not analysis.exact
# Do we have and exact match for the quote around here?
if not @pm then @pm = new window.DTM_ExactMatcher
@pm.setDistinct false
@pm.setCaseSensitive false
flexMatches = @pm.search @mapper.corpus[prefixStart..suffixEnd], pattern
delete candidate
bestError = 2
for flexMatch in flexMatches
# Calculate the range that matched the quote
flexRange =
start: prefixStart + flexMatch.start
end: prefixStart + flexMatch.end
# Check how the prefix would fare
prefixRange = start: prefixStart, end: flexRange.start
a1 = @analyzeMatch prefix, prefixRange, true
prefixError = if a1.exact then 0 else a1.comparison.errorLevel
# Check how the suffix would fare
suffixRange = start: flexRange.end, end: suffixEnd
a2 = @analyzeMatch suffix, suffixRange, true
suffixError = if a2.exact then 0 else a2.comparison.errorLevel
# Did we at least one match?
if a1.exact or a2.exact
# Yes, we did. Calculate the total error
totalError = prefixError + suffixError
# Is this better than our best bet?
if totalError < bestError
# This is our best candidate so far. Store it.
candidate = flexRange
bestError = totalError
if candidate?
console.log "flexContext adjustment: we found a better candidate!"
charRange = candidate
analysis = @analyzeMatch pattern, charRange, true
# Do we have to compare what we found to a pattern? # Do we have to compare what we found to a pattern?
if (not pattern?) or # "No pattern, nothing to compare. Assume it's OK." if (not pattern?) or # "No pattern, nothing to compare. Assume it's OK."
analysis.exact or # "Found text matches exactly to pattern" analysis.exact or # "Found text matches exactly to pattern"
(analysis.comparison.errorLevel <= matchThreshold) # still acceptable (analysis.comparison.errorLevel <= matchThreshold) # still acceptable
mappings = @mapper.getMappingsForCharRange prefixEnd, suffixStart mappings = @mapper.getMappingsForCharRange charRange.start, charRange.end
# Collect the results # Collect the results
match = {} match = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment