Commit 160feeb8 authored by Robert Knight's avatar Robert Knight

Split backend search / filter fields and support more focus filters

Refactor query parsing functions in preparation for adding new focus
modes:

 - Use different sets of field names when parsing queries for the sidebar's
   filter bar and backend search. The supported filters are different.

 - Make the `focusFilters` argument to `parseFilterQuery` more general so we can
   support adding in additional filters. The immediate use case is adding in
   `cfi` and `page` filters to support new focus modes.
parent 11f47be6
/** /**
* Parse annotation filter queries into structured representations. * Names of supported fields that can be specified via `{filename}:{term}`
* * in {@link parseFilterQuery}.
* Provides methods to parse Lucene-style queries ("foo tag: bar")
* into structured representations which are then used by other services to
* filter annotations displayed to the user or fetched from the API.
*/ */
export type FilterField =
| 'cfi'
| 'quote'
| 'page'
| 'since'
| 'tag'
| 'text'
| 'uri'
| 'user';
const filterFields = [ const filterFields: FilterField[] = [
'cfi', 'cfi',
'group',
'quote', 'quote',
'page', 'page',
'since', 'since',
...@@ -18,20 +23,41 @@ const filterFields = [ ...@@ -18,20 +23,41 @@ const filterFields = [
'user', 'user',
]; ];
/**
* Names of fields that can be used in `{field}:{term}` queries with
* {@link parseHypothesisSearchQuery}.
*/
export type SearchField = 'group' | 'quote' | 'tag' | 'text' | 'uri' | 'user';
const searchFields: SearchField[] = [
'group',
'quote',
'tag',
'text',
'uri',
'user',
];
/** /**
* Splits a search term into filter and data. * Splits a search term into filter and data.
* *
* ie. 'user:johndoe' -> ['user', 'johndoe'] * eg. 'user:johndoe' -> ['user', 'johndoe']
* 'example:text' -> [null, 'example:text'] * 'example:text' -> [null, 'example:text']
*
* @param term - The query term to parse
* @param fieldNames - The set of recognized field names
*/ */
function splitTerm(term: string): [null | string, string] { function splitTerm(
term: string,
fieldNames: string[],
): [null | string, string] {
const filter = term.slice(0, term.indexOf(':')); const filter = term.slice(0, term.indexOf(':'));
if (!filter) { if (!filter) {
// The whole term is data // The whole term is data
return [null, term]; return [null, term];
} }
if (filterFields.includes(filter)) { if (fieldNames.includes(filter)) {
const data = term.slice(filter.length + 1); const data = term.slice(filter.length + 1);
return [filter, data]; return [filter, data];
} else { } else {
...@@ -61,12 +87,12 @@ function removeSurroundingQuotes(text: string) { ...@@ -61,12 +87,12 @@ function removeSurroundingQuotes(text: string) {
/** /**
* Tokenize a search query. * Tokenize a search query.
* *
* Split `searchText` into an array of non-empty tokens. Terms not contained * Split `query` into an array of non-empty tokens. Terms not contained
* within quotes are split on whitespace. Terms inside single or double quotes * within quotes are split on whitespace. Terms inside single or double quotes
* are returned as whole tokens, with the surrounding quotes removed. * are returned as whole tokens, with the surrounding quotes removed.
*/ */
function tokenize(searchText: string): string[] { function tokenize(query: string, fieldNames: string[]): string[] {
const tokenMatches = searchText.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g); const tokenMatches = query.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g);
if (!tokenMatches) { if (!tokenMatches) {
return []; return [];
} }
...@@ -77,7 +103,7 @@ function tokenize(searchText: string): string[] { ...@@ -77,7 +103,7 @@ function tokenize(searchText: string): string[] {
.map(token => { .map(token => {
// Strip quotes from field values. // Strip quotes from field values.
// eg. `tag:"foo bar"` => `tag:foo bar`. // eg. `tag:"foo bar"` => `tag:foo bar`.
const [filter, data] = splitTerm(token); const [filter, data] = splitTerm(token, fieldNames);
if (filter) { if (filter) {
return filter + ':' + removeSurroundingQuotes(data); return filter + ':' + removeSurroundingQuotes(data);
} else { } else {
...@@ -91,15 +117,15 @@ function tokenize(searchText: string): string[] { ...@@ -91,15 +117,15 @@ function tokenize(searchText: string): string[] {
* be used when constructing queries to the Hypothesis search API. * be used when constructing queries to the Hypothesis search API.
*/ */
export function parseHypothesisSearchQuery( export function parseHypothesisSearchQuery(
searchText: string, query: string,
): Record<string, string[]> { ): Record<string, string[]> {
const obj = {} as Record<string, string[]>; const obj = {} as Record<string, string[]>;
const backendFilter = (field: string) => (field === 'tag' ? 'tags' : field); const backendFilter = (field: string) => (field === 'tag' ? 'tags' : field);
const terms = tokenize(searchText); const terms = tokenize(query, searchFields);
for (const term of terms) { for (const term of terms) {
let [field, data] = splitTerm(term); let [field, data] = splitTerm(term, searchFields);
if (!field) { if (!field) {
field = 'any'; field = 'any';
data = term; data = term;
...@@ -117,13 +143,15 @@ export function parseHypothesisSearchQuery( ...@@ -117,13 +143,15 @@ export function parseHypothesisSearchQuery(
} }
export type Facet = { export type Facet = {
/**
* Whether to require annotations match all values in `terms` ("and") or at
* least one value in `terms` ("or").
*/
operator: 'and' | 'or'; operator: 'and' | 'or';
terms: string[] | number[]; terms: string[] | number[];
}; };
export type FocusFilter = { export type ParsedQuery = Record<FilterField | 'any', Facet>;
user?: string;
};
/** /**
* Parse a user-provided query into a map of filter field to term. * Parse a user-provided query into a map of filter field to term.
...@@ -131,25 +159,35 @@ export type FocusFilter = { ...@@ -131,25 +159,35 @@ export type FocusFilter = {
* Terms that are not associated with any particular field are stored under the * Terms that are not associated with any particular field are stored under the
* "any" property. * "any" property.
* *
* @param searchText - Filter query to parse * @param query - Filter query to parse
* @param focusFilters - Additional query terms to merge with the results of * @param addedFilters - Additional query terms to merge with the results of
* parsing `searchText`. * parsing `query`.
*/ */
export function parseFilterQuery( export function parseFilterQuery(
searchText: string, query: string,
focusFilters: FocusFilter = {}, addedFilters: Partial<Record<FilterField, string>> = {},
): Record<string, Facet> { ): ParsedQuery {
const initialTerms = (field: FilterField) => {
const init = addedFilters[field];
if (typeof init === 'string') {
return [init];
} else {
return [];
}
};
const cfi = initialTerms('cfi');
const page = initialTerms('page');
const quote = initialTerms('quote');
const tag = initialTerms('tag');
const text = initialTerms('text');
const uri = initialTerms('uri');
const user = initialTerms('user');
const any = []; const any = [];
const cfi = []; const since: number[] = [];
const page = [];
const quote = []; const terms = tokenize(query, filterFields);
const since = [];
const tag = [];
const text = [];
const uri = [];
const user = focusFilters.user ? [focusFilters.user] : [];
const terms = tokenize(searchText);
for (const term of terms) { for (const term of terms) {
const filter = term.slice(0, term.indexOf(':')); const filter = term.slice(0, term.indexOf(':'));
......
...@@ -180,6 +180,26 @@ describe('sidebar/helpers/query-parser', () => { ...@@ -180,6 +180,26 @@ describe('sidebar/helpers/query-parser', () => {
}, },
}, },
}, },
// "group:" and "any:" are currently not treated as search fields in
// the sidebar filter.
{
query: 'group:abcd',
expectedFilter: {
any: {
operator: 'and',
terms: ['group:abcd'],
},
},
},
{
query: 'any:foo',
expectedFilter: {
any: {
operator: 'and',
terms: ['any:foo'],
},
},
},
].forEach(({ query, expectedFilter }) => { ].forEach(({ query, expectedFilter }) => {
it('parses a query', () => { it('parses a query', () => {
const filter = parseFilterQuery(query); const filter = parseFilterQuery(query);
...@@ -241,21 +261,90 @@ describe('sidebar/helpers/query-parser', () => { ...@@ -241,21 +261,90 @@ describe('sidebar/helpers/query-parser', () => {
}); });
}); });
it('adds additional filters to result', () => { [
const filter = parseFilterQuery('', { // Standalone filters
user: 'fakeusername', {
}); addedFilters: { user: 'fakeusername' },
// Remove empty facets. expected: {
Object.keys(filter).forEach(k => { user: {
if (filter[k].terms.length === 0) { operator: 'or',
delete filter[k]; terms: ['fakeusername'],
} },
}); },
assert.deepEqual(filter, { },
user: { {
operator: 'or', addedFilters: { page: '2-4' },
terms: ['fakeusername'], expected: {
page: {
operator: 'or',
terms: ['2-4'],
},
},
},
{
addedFilters: { cfi: '/2/2-/2/6' },
expected: {
cfi: {
operator: 'or',
terms: ['/2/2-/2/6'],
},
}, },
},
// Combined filters
{
addedFilters: { user: 'fakeusername' },
query: 'user:otheruser',
expected: {
user: {
operator: 'or',
terms: ['fakeusername', 'otheruser'],
},
},
},
{
addedFilters: { user: 'fakeusername' },
query: 'foo',
expected: {
any: {
operator: 'and',
terms: ['foo'],
},
user: {
operator: 'or',
terms: ['fakeusername'],
},
},
},
{
addedFilters: { page: '2-4' },
query: 'page:6',
expected: {
page: {
operator: 'or',
terms: ['2-4', '6'],
},
},
},
{
addedFilters: { cfi: '/2/2-/2/6' },
query: 'cfi:/4/2-/4/4',
expected: {
cfi: {
operator: 'or',
terms: ['/2/2-/2/6', '/4/2-/4/4'],
},
},
},
].forEach(({ addedFilters, query, expected }) => {
it('adds additional filters to result', () => {
const filter = parseFilterQuery(query ?? '', addedFilters);
// Remove empty facets.
Object.keys(filter).forEach(k => {
if (filter[k].terms.length === 0) {
delete filter[k];
}
});
assert.deepEqual(filter, expected);
}); });
}); });
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment