Commit d4d8c0bf authored by Robert Knight's avatar Robert Knight

Refactor annotation filtering to improve typing and documentation

Replace the loosely typed `Checker` interface in `view-filter.js`, which
used `any` for field values, with a `Matcher` interface which is
parametrized by the type of parsed query terms/field values. In order to
do this it was necessary to make normalization of query terms/field
values part of the `Matcher` interface.

In the process, some additional simplifications/clean-up has been done:

 - Remove the `autofalse` check. This was a very minor optimization and many of
   these functions were broken in the sense that they could never return false,
   in order to short-circuit filtering, given the current types of `Annotation`
   object fields.

 - Fix a very minor bug where user query terms that contained a suffix
   of the username followed by a prefix of the display name could match.
   The query term should be tested against the username and display name
   separately.

 - Eliminate unused `field` argument to TermFilter constructor
parent 90cb2511
......@@ -80,11 +80,17 @@ describe('sidebar/helpers/view-filter', () => {
describe('"any" field', () => {
it('finds matches in any field', () => {
const defaultFields = {
text: '',
tags: [],
uri: 'https://example.com',
target: [{}],
};
const annotations = [
{ id: 1, text: poem.tiger, target: [{}] },
{ id: 4, user: 'lion', target: [{}] },
{ id: 2, user: 'Tyger', target: [{}] },
{ id: 3, tags: ['Tyger'], target: [{}] },
{ id: 1, ...defaultFields, text: poem.tiger },
{ id: 4, ...defaultFields, user: 'lion' },
{ id: 2, ...defaultFields, user: 'Tyger' },
{ id: 3, ...defaultFields, tags: ['Tyger'] },
];
const filters = { any: { terms: ['Tyger'], operator: 'and' } };
......@@ -187,6 +193,12 @@ describe('sidebar/helpers/view-filter', () => {
assert.deepEqual(result, [anns[1].id, anns[2].id, anns[3].id]);
});
it('matches username and display name independently', () => {
const anns = [annotationWithUser('dean31', 'James Dean')];
const result = filterAnnotations(anns, userQuery('dean31 james'));
assert.equal(result.length, 0);
});
it('ignores display name if not set', () => {
const anns = [annotationWithUser('msmith')];
const result = filterAnnotations(anns, userQuery('null'));
......@@ -231,6 +243,8 @@ describe('sidebar/helpers/view-filter', () => {
id: 1,
tags: ['foo'],
target: [{}],
text: '',
url: 'https://example.com',
};
const filters = {
any: {
......
......@@ -21,52 +21,43 @@ import * as unicodeUtils from '../util/unicode';
*/
/**
* @typedef Checker
* @prop {(ann: Annotation) => boolean} autofalse
* @prop {(ann: Annotation) => any|any[]} value
* @prop {(term: any, value: any) => boolean} match
* A Matcher specifies how to test whether an annotation matches a query term
* for a specific field.
*
* @template [T=string] - Type of parsed query terms and field values
* @typedef Matcher
* @prop {(ann: Annotation) => T[]} fieldValues - Extract the field values to be
* matched against a query term
* @prop {(value: T, term: T) => boolean} matches - Test whether a query term
* matches a field value. Both value and term will have been normalized using
* `normalize`.
* @prop {(val: T) => T} normalize - Normalize a parsed term or field value for
* comparison
*/
/** @param {Annotation} ann */
function displayName(ann) {
if (!ann.user_info) {
return '';
}
return ann.user_info.display_name || '';
}
/**
* Normalize a field value or query term for comparison.
* Normalize a string query term or field value.
*
* @template {string|number} T
* @param {T} val
* @param {string} val
*/
function normalize(val) {
if (typeof val !== 'string') {
return val;
}
function normalizeStr(val) {
return unicodeUtils.fold(unicodeUtils.normalize(val)).toLowerCase();
}
/**
* Filter that matches annotations against a single field & term.
*
* eg. "quote:foo" or "text:bar"
* Filter that matches annotations against a single query term.
*
* @template TermType
* @implements {Filter}
*/
class TermFilter {
/**
* @param {string} field - Name of field to match
* @param {string|number} term - Query term or value. Most fields use string
* terms, but eg. `since` is parsed into a number first.
* @param {Checker} checker - Functions for extracting term values from
* an annotation and checking whether they match a query term.
* @param {TermType} term
* @param {Matcher<TermType>} matcher
*/
constructor(field, term, checker) {
this.field = field;
this.term = term;
this.checker = checker;
constructor(term, matcher) {
this.term = matcher.normalize(term);
this.matcher = matcher;
}
/**
......@@ -75,18 +66,10 @@ class TermFilter {
* @param {Annotation} ann
*/
matches(ann) {
const checker = this.checker;
if (checker.autofalse && checker.autofalse(ann)) {
return false;
}
let value = checker.value(ann);
if (Array.isArray(value)) {
value = value.map(normalize);
} else {
value = normalize(value);
}
return checker.match(this.term, value);
const matcher = this.matcher;
return matcher
.fieldValues(ann)
.some(value => matcher.matches(matcher.normalize(value), this.term));
}
}
......@@ -120,50 +103,45 @@ class BinaryOpFilter {
}
/**
* Functions for extracting field values from annotations and testing whether
* they match a query term.
* Create a matcher that tests whether a query term appears anywhere in a
* string field value.
*
* [facet_name]:
* autofalse: a function for a preliminary false match result
* value: a function to extract to facet value for the annotation.
* match: a function to check if the extracted value matches the facet value
* @param {(ann: Annotation) => string[]} fieldValues
* @return {Matcher}
*/
function stringFieldMatcher(fieldValues) {
return {
fieldValues,
matches: (value, term) => value.includes(term),
normalize: normalizeStr,
};
}
/**
* Map of field name (from a parsed query) to matcher for that field.
*
* @type {Record<string,Checker>}
* @type {Record<string, Matcher|Matcher<number>>}
*/
const fieldMatchers = {
quote: {
autofalse: ann => (ann.references || []).length > 0,
value: ann => quote(ann) || '',
match: (term, value) => value.indexOf(term) > -1,
},
quote: stringFieldMatcher(ann => [quote(ann) ?? '']),
/** @type {Matcher<number>} */
since: {
autofalse: ann => typeof ann.updated !== 'string',
value: ann => new Date(ann.updated),
match(term, value) {
const delta = (Date.now() - value) / 1000;
return delta <= term;
},
},
tag: {
autofalse: ann => !Array.isArray(ann.tags),
value: ann => ann.tags,
match: (term, value) => value.includes(term),
fieldValues: ann => [new Date(ann.updated).valueOf()],
matches: (updatedTime, age) => {
const delta = (Date.now() - updatedTime) / 1000;
return delta <= age;
},
text: {
autofalse: ann => typeof ann.text !== 'string',
value: ann => ann.text,
match: (term, value) => value.indexOf(term) > -1,
},
uri: {
autofalse: ann => typeof ann.uri !== 'string',
value: ann => ann.uri,
match: (term, value) => value.indexOf(term) > -1,
},
user: {
autofalse: ann => typeof ann.user !== 'string',
value: ann => ann.user + ' ' + displayName(ann),
match: (term, value) => value.indexOf(term) > -1,
normalize: timestamp => timestamp,
},
tag: stringFieldMatcher(ann => ann.tags),
text: stringFieldMatcher(ann => [ann.text]),
uri: stringFieldMatcher(ann => [ann.uri]),
user: stringFieldMatcher(ann => [
ann.user,
ann.user_info?.display_name ?? '',
]),
};
/**
......@@ -174,28 +152,36 @@ const fieldMatchers = {
* @return {string[]} IDs of matching annotations.
*/
export function filterAnnotations(annotations, filters) {
/**
* @template TermType
* @param {string} field
* @param {TermType} term
*/
const makeTermFilter = (field, term) =>
new TermFilter(
term,
// Suppress error about potential mismatch of query term type
// and what the matcher expects. We assume these match up.
/** @type {Matcher<any>} */ (fieldMatchers[field])
);
// Convert the input filter object into a filter tree, expanding "any"
// filters.
const fieldFilters = Object.entries(filters)
.filter(([, filter]) => filter.terms.length > 0)
.map(([field, filter]) => {
const terms = filter.terms.map(normalize);
let termFilters;
if (field === 'any') {
const anyFields = ['quote', 'text', 'tag', 'user'];
termFilters = terms.map(
termFilters = filter.terms.map(
term =>
new BinaryOpFilter(
'or',
anyFields.map(
field => new TermFilter(field, term, fieldMatchers[field])
)
anyFields.map(field => makeTermFilter(field, term))
)
);
} else {
termFilters = terms.map(
term => new TermFilter(field, term, fieldMatchers[field])
);
termFilters = filter.terms.map(term => makeTermFilter(field, term));
}
return new BinaryOpFilter(filter.operator, termFilters);
});
......
......@@ -7,33 +7,42 @@ import { useRootThread } from '../../components/hooks/use-root-thread';
import { ServiceContext } from '../../service-context';
import { createSidebarStore } from '../../store';
const defaultFields = {
$orphan: false,
tags: [],
target: [{ source: 'https://example.com' }],
references: [],
uri: 'https://example.com',
user: 'acct:foo@hypothes.is',
};
const fixtures = {
annotations: [
{
$orphan: false,
...defaultFields,
created: 50,
id: '1',
references: [],
target: [{ selector: [] }],
text: 'first annotation',
updated: 300,
},
{
$orphan: false,
...defaultFields,
created: 200,
id: '2',
references: [],
text: 'second annotation',
target: [{ selector: [] }],
updated: 200,
},
{
$orphan: false,
...defaultFields,
created: 100,
id: '3',
references: ['2'],
text: 'reply to first annotation',
updated: 100,
user: 'acct:bar@hypothes.is',
},
],
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment