Commit 160feeb8 authored by Robert Knight's avatar Robert Knight

Split backend search / filter fields and support more focus filters

Refactor query parsing functions in preparation for adding new focus
modes:

 - Use different sets of field names when parsing queries for the sidebar's
   filter bar and backend search. The supported filters are different.

 - Make the `focusFilters` argument to `parseFilterQuery` more general so we can
   support adding in additional filters. The immediate use case is adding in
   `cfi` and `page` filters to support new focus modes.
parent 11f47be6
/**
* Parse annotation filter queries into structured representations.
*
* Provides methods to parse Lucene-style queries ("foo tag: bar")
* into structured representations which are then used by other services to
* filter annotations displayed to the user or fetched from the API.
* Names of supported fields that can be specified via `{filename}:{term}`
* in {@link parseFilterQuery}.
*/
export type FilterField =
| 'cfi'
| 'quote'
| 'page'
| 'since'
| 'tag'
| 'text'
| 'uri'
| 'user';
const filterFields = [
const filterFields: FilterField[] = [
'cfi',
'group',
'quote',
'page',
'since',
......@@ -18,20 +23,41 @@ const filterFields = [
'user',
];
/**
* Names of fields that can be used in `{field}:{term}` queries with
* {@link parseHypothesisSearchQuery}.
*/
export type SearchField = 'group' | 'quote' | 'tag' | 'text' | 'uri' | 'user';
const searchFields: SearchField[] = [
'group',
'quote',
'tag',
'text',
'uri',
'user',
];
/**
* Splits a search term into filter and data.
*
* ie. 'user:johndoe' -> ['user', 'johndoe']
* eg. 'user:johndoe' -> ['user', 'johndoe']
* 'example:text' -> [null, 'example:text']
*
* @param term - The query term to parse
* @param fieldNames - The set of recognized field names
*/
function splitTerm(term: string): [null | string, string] {
function splitTerm(
term: string,
fieldNames: string[],
): [null | string, string] {
const filter = term.slice(0, term.indexOf(':'));
if (!filter) {
// The whole term is data
return [null, term];
}
if (filterFields.includes(filter)) {
if (fieldNames.includes(filter)) {
const data = term.slice(filter.length + 1);
return [filter, data];
} else {
......@@ -61,12 +87,12 @@ function removeSurroundingQuotes(text: string) {
/**
* Tokenize a search query.
*
* Split `searchText` into an array of non-empty tokens. Terms not contained
* Split `query` into an array of non-empty tokens. Terms not contained
* within quotes are split on whitespace. Terms inside single or double quotes
* are returned as whole tokens, with the surrounding quotes removed.
*/
function tokenize(searchText: string): string[] {
const tokenMatches = searchText.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g);
function tokenize(query: string, fieldNames: string[]): string[] {
const tokenMatches = query.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g);
if (!tokenMatches) {
return [];
}
......@@ -77,7 +103,7 @@ function tokenize(searchText: string): string[] {
.map(token => {
// Strip quotes from field values.
// eg. `tag:"foo bar"` => `tag:foo bar`.
const [filter, data] = splitTerm(token);
const [filter, data] = splitTerm(token, fieldNames);
if (filter) {
return filter + ':' + removeSurroundingQuotes(data);
} else {
......@@ -91,15 +117,15 @@ function tokenize(searchText: string): string[] {
* be used when constructing queries to the Hypothesis search API.
*/
export function parseHypothesisSearchQuery(
searchText: string,
query: string,
): Record<string, string[]> {
const obj = {} as Record<string, string[]>;
const backendFilter = (field: string) => (field === 'tag' ? 'tags' : field);
const terms = tokenize(searchText);
const terms = tokenize(query, searchFields);
for (const term of terms) {
let [field, data] = splitTerm(term);
let [field, data] = splitTerm(term, searchFields);
if (!field) {
field = 'any';
data = term;
......@@ -117,13 +143,15 @@ export function parseHypothesisSearchQuery(
}
export type Facet = {
/**
* Whether to require annotations match all values in `terms` ("and") or at
* least one value in `terms` ("or").
*/
operator: 'and' | 'or';
terms: string[] | number[];
};
export type FocusFilter = {
user?: string;
};
export type ParsedQuery = Record<FilterField | 'any', Facet>;
/**
* Parse a user-provided query into a map of filter field to term.
......@@ -131,25 +159,35 @@ export type FocusFilter = {
* Terms that are not associated with any particular field are stored under the
* "any" property.
*
* @param searchText - Filter query to parse
* @param focusFilters - Additional query terms to merge with the results of
* parsing `searchText`.
* @param query - Filter query to parse
* @param addedFilters - Additional query terms to merge with the results of
* parsing `query`.
*/
export function parseFilterQuery(
searchText: string,
focusFilters: FocusFilter = {},
): Record<string, Facet> {
query: string,
addedFilters: Partial<Record<FilterField, string>> = {},
): ParsedQuery {
const initialTerms = (field: FilterField) => {
const init = addedFilters[field];
if (typeof init === 'string') {
return [init];
} else {
return [];
}
};
const cfi = initialTerms('cfi');
const page = initialTerms('page');
const quote = initialTerms('quote');
const tag = initialTerms('tag');
const text = initialTerms('text');
const uri = initialTerms('uri');
const user = initialTerms('user');
const any = [];
const cfi = [];
const page = [];
const quote = [];
const since = [];
const tag = [];
const text = [];
const uri = [];
const user = focusFilters.user ? [focusFilters.user] : [];
const terms = tokenize(searchText);
const since: number[] = [];
const terms = tokenize(query, filterFields);
for (const term of terms) {
const filter = term.slice(0, term.indexOf(':'));
......
......@@ -180,6 +180,26 @@ describe('sidebar/helpers/query-parser', () => {
},
},
},
// "group:" and "any:" are currently not treated as search fields in
// the sidebar filter.
{
query: 'group:abcd',
expectedFilter: {
any: {
operator: 'and',
terms: ['group:abcd'],
},
},
},
{
query: 'any:foo',
expectedFilter: {
any: {
operator: 'and',
terms: ['any:foo'],
},
},
},
].forEach(({ query, expectedFilter }) => {
it('parses a query', () => {
const filter = parseFilterQuery(query);
......@@ -241,21 +261,90 @@ describe('sidebar/helpers/query-parser', () => {
});
});
it('adds additional filters to result', () => {
const filter = parseFilterQuery('', {
user: 'fakeusername',
});
// Remove empty facets.
Object.keys(filter).forEach(k => {
if (filter[k].terms.length === 0) {
delete filter[k];
}
});
assert.deepEqual(filter, {
user: {
operator: 'or',
terms: ['fakeusername'],
[
// Standalone filters
{
addedFilters: { user: 'fakeusername' },
expected: {
user: {
operator: 'or',
terms: ['fakeusername'],
},
},
},
{
addedFilters: { page: '2-4' },
expected: {
page: {
operator: 'or',
terms: ['2-4'],
},
},
},
{
addedFilters: { cfi: '/2/2-/2/6' },
expected: {
cfi: {
operator: 'or',
terms: ['/2/2-/2/6'],
},
},
},
// Combined filters
{
addedFilters: { user: 'fakeusername' },
query: 'user:otheruser',
expected: {
user: {
operator: 'or',
terms: ['fakeusername', 'otheruser'],
},
},
},
{
addedFilters: { user: 'fakeusername' },
query: 'foo',
expected: {
any: {
operator: 'and',
terms: ['foo'],
},
user: {
operator: 'or',
terms: ['fakeusername'],
},
},
},
{
addedFilters: { page: '2-4' },
query: 'page:6',
expected: {
page: {
operator: 'or',
terms: ['2-4', '6'],
},
},
},
{
addedFilters: { cfi: '/2/2-/2/6' },
query: 'cfi:/4/2-/4/4',
expected: {
cfi: {
operator: 'or',
terms: ['/2/2-/2/6', '/4/2-/4/4'],
},
},
},
].forEach(({ addedFilters, query, expected }) => {
it('adds additional filters to result', () => {
const filter = parseFilterQuery(query ?? '', addedFilters);
// Remove empty facets.
Object.keys(filter).forEach(k => {
if (filter[k].terms.length === 0) {
delete filter[k];
}
});
assert.deepEqual(filter, expected);
});
});
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment