Use the 'flexContext' option for nicer fuzzy anchoring

* Updated dom-text-matcher to c93abc20 * Updated Annotator to 41b88f9f (maintenance branch) This version of Annotator uses a new method for adjusting the results of the two-phase fuzzy matching algorythm (used for anchoring). For details, see here: https://github.com/hypothesis/dom-text-matcher/commit/c93abc20bedd4450fd4eefe7d7d487aba324a088 Fixes #681.

Use the 'flexContext' option for nicer fuzzy anchoring
* Updated dom-text-matcher to c93abc20 * Updated Annotator to 41b88f9f (maintenance branch) This version of Annotator uses a new method for adjusting the results of the two-phase fuzzy matching algorythm (used for anchoring). For details, see here: https://github.com/hypothesis/dom-text-matcher/commit/c93abc20bedd4450fd4eefe7d7d487aba324a088 Fixes #681.
109c1b97 · csillag · 12795901 · 109c1b97 · 109c1b97
Commit 109c1b97 authored Aug 28, 2013 by csillag
Hide whitespace changes
Inline Side-by-side

Showing with 59 additions and 8 deletions

annotator.js h/lib/annotator.js +6 -5

dom_text_matcher.coffee h/lib/dom_text_matcher.coffee +53 -3

No files found.
--- a/h/lib/annotator.js
+++ b/h/lib/annotator.js
 /*
-** Annotator 1.2.6-dev-5fafbdf
+** Annotator 1.2.6-dev-f85315e
 ** https://github.com/okfn/annotator/
 **
 ** Copyright 2012 Aron Carroll, Rufus Pollock, and Nick Stenning.
 ** Dual licensed under the MIT and GPLv3 licenses.
 ** https://github.com/okfn/annotator/blob/master/LICENSE
 **
-** Built at: 2013-08-28 00:28:44Z
+** Built at: 2013-08-28 02:11:19Z
 */


@@ -1102,7 +1102,8 @@
      options = {
        contextMatchDistance: this.domMapper.getDocLength() * 2,
        contextMatchThreshold: 0.5,
-        patternMatchThreshold: 0.5
+        patternMatchThreshold: 0.5,
+        flexContext: true
      };
      result = this.domMatcher.searchFuzzyWithContext(prefix, suffix, quote, expectedStart, expectedEnd, false, null, options);
      if (!result.matches.length) {
@@ -1110,7 +1111,7 @@
        return null;
      }
      match = result.matches[0];
-      console.log("Fuzzy found match:");
+      console.log("2-phase fuzzy found match:");
      console.log(match);
      browserRange = new Range.BrowserRange(match.realRange);
      normalizedRange = browserRange.normalize(this.wrapper[0]);
@@ -1146,7 +1147,7 @@
        return null;
      }
      match = result.matches[0];
-      console.log("Fuzzy found match:");
+      console.log("1-phase fuzzy found match:");
      console.log(match);
      browserRange = new Range.BrowserRange(match.realRange);
      normalizedRange = browserRange.normalize(this.wrapper[0]);

--- a/h/lib/dom_text_matcher.coffee
+++ b/h/lib/dom_text_matcher.coffee
@@ -146,7 +146,8 @@ class window.DomTextMatcher
    # If the prefix is not found, give up
    unless prefixResult.length then return matches: []

-    # This is where the prefix ends
+    # This is where the prefix was found
+    prefixStart = prefixResult[0].start
    prefixEnd = prefixResult[0].end

    # Let's find out where do we expect to find the suffix!
@@ -176,9 +177,11 @@ class window.DomTextMatcher
    # If the suffix is not found, give up
    unless suffixResult.length then return matches: []

-    # This is where the suffix starts
+    # This is where the suffix was found
    suffixStart = prefixEnd + suffixResult[0].start
+    suffixEnd = prefixEnd + suffixResult[0].end

+    # This if the range between the prefix and the suffix
    charRange =
      start: prefixEnd
      end: suffixStart
@@ -189,11 +192,58 @@ class window.DomTextMatcher
    # See how good a match we have
    analysis = @analyzeMatch pattern, charRange, true

+    # Should we try to find a better match by moving the
+    # initial match around a little bit, even if this has
+    # a negative impact on the similarity of the context?
+    if pattern? and options.flexContext and not analysis.exact
+      # Do we have and exact match for the quote around here?
+
+      if not @pm then @pm = new window.DTM_ExactMatcher
+      @pm.setDistinct false
+      @pm.setCaseSensitive false
+
+      flexMatches = @pm.search @mapper.corpus[prefixStart..suffixEnd], pattern
+      delete candidate
+      bestError = 2
+
+      for flexMatch in flexMatches
+
+        # Calculate the range that matched the quote
+        flexRange =
+          start: prefixStart + flexMatch.start
+          end: prefixStart + flexMatch.end
+
+        # Check how the prefix would fare
+        prefixRange = start: prefixStart, end: flexRange.start
+        a1 = @analyzeMatch prefix, prefixRange, true
+        prefixError = if a1.exact then 0 else a1.comparison.errorLevel
+
+        # Check how the suffix would fare
+        suffixRange = start: flexRange.end, end: suffixEnd
+        a2 = @analyzeMatch suffix, suffixRange, true
+        suffixError = if a2.exact then 0 else a2.comparison.errorLevel
+
+        # Did we at least one match?
+        if a1.exact or a2.exact
+          # Yes, we did. Calculate the total error
+          totalError = prefixError + suffixError
+
+          # Is this better than our best bet?
+          if totalError < bestError
+            # This is our best candidate so far. Store it.
+            candidate = flexRange
+            bestError = totalError
+
+      if candidate?
+        console.log "flexContext adjustment: we found a better candidate!"
+        charRange = candidate
+        analysis = @analyzeMatch pattern, charRange, true
+
    # Do we have to compare what we found to a pattern?
    if (not pattern?) or # "No pattern, nothing to compare. Assume it's OK."
        analysis.exact or # "Found text matches exactly to pattern"
        (analysis.comparison.errorLevel <= matchThreshold) # still acceptable
-      mappings = @mapper.getMappingsForCharRange prefixEnd, suffixStart
+      mappings = @mapper.getMappingsForCharRange charRange.start, charRange.end

      # Collect the results
      match = {}