Commit b68dae47 authored by Robert Knight's avatar Robert Knight

Use cursor-based pagination through search API results

Optimize fetching pages of results from the backend by using
cursor-based paging (via `search_after`) rather than offset-based paging
(via `offset`, `limit`). Cursor-based paging is more efficient to handle in
the backend as the starting position of a page within the overall result set increases.

This also fixes a race condition where results at the boundary between
pages might be fetched twice or not fetched if new matches for the query
become available while results are fetched.
parent d6820c4b
......@@ -70,12 +70,12 @@ export default class SearchClient extends TinyEmitter {
* Fetch a batch of annotations starting from `offset`.
*
* @param {SearchQuery} query
* @param {number} offset
* @param {string} [searchAfter]
*/
async _getBatch(query, offset) {
async _getBatch(query, searchAfter) {
/** @type {SearchQuery} */
const searchQuery = {
limit: this._chunkSize,
offset: offset,
sort: this._sortBy,
order: this._sortOrder,
_separate_replies: this._separateReplies,
......@@ -83,6 +83,10 @@ export default class SearchClient extends TinyEmitter {
...query,
};
if (searchAfter) {
searchQuery.search_after = searchAfter;
}
try {
const results = await this._searchFn(searchQuery);
if (this._canceled) {
......@@ -122,15 +126,15 @@ export default class SearchClient extends TinyEmitter {
this._results = this._results.concat(chunk);
}
// Check if there are additional pages of results to fetch. In addition to
// checking the `total` figure from the server, we also require that at
// least one result was returned in the current page, otherwise we would
// end up repeating the same query for the next page. If the server's
// `total` count is incorrect for any reason, that will lead to the client
// polling the server indefinitely.
const nextOffset = offset + results.rows.length;
if (results.total > nextOffset && chunk.length > 0) {
this._getBatch(query, nextOffset);
// If the current batch was full, there might be additional batches available.
const nextBatchAvailable = chunk.length === this._chunkSize;
// Get the cursor for the start of the next batch.
const nextSearchAfter =
chunk.length > 0 ? chunk[chunk.length - 1][this._sortBy] : null;
if (nextBatchAvailable && nextSearchAfter) {
this._getBatch(query, nextSearchAfter);
} else {
if (!this._incremental) {
this.emit('results', this._results);
......@@ -161,7 +165,7 @@ export default class SearchClient extends TinyEmitter {
get(query) {
this._results = [];
this._resultCount = null;
this._getBatch(query, 0);
this._getBatch(query);
}
/**
......
......@@ -10,23 +10,59 @@ function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
describe('SearchClient', () => {
const RESULTS = [
{ id: 'one' },
{ id: 'two' },
{ id: 'three' },
{ id: 'four' },
];
const RESULTS = [
{ id: 'one', created: '2020-01-01', updated: '2020-01-01' },
{ id: 'two', created: '2020-01-02', updated: '2020-01-02' },
{ id: 'three', created: '2020-01-03', updated: '2020-01-03' },
{ id: 'four', created: '2020-01-04', updated: '2020-01-04' },
];
/**
* Fake implementation of the `/api/search` API
*/
async function executeSearch(params) {
assert.isTrue(['asc', 'desc'].includes(params.order));
assert.isTrue(['created', 'updated'].includes(params.sort));
assert.typeOf(params.limit, 'number');
let rows = params.search_after
? RESULTS.filter(ann => {
if (params.order === 'asc') {
return ann[params.sort] > params.search_after;
} else {
return ann[params.sort] < params.search_after;
}
})
: RESULTS.slice();
rows = rows
.sort((a, b) => {
const keyA = a[params.sort];
const keyB = b[params.sort];
if (keyA === keyB) {
return 0;
}
if (params.order === 'asc') {
return keyA < keyB ? -1 : 1;
} else {
return keyA > keyB ? -1 : 1;
}
})
.slice(0, params.limit);
return {
rows,
total: RESULTS.length,
};
}
describe('SearchClient', () => {
let fakeSearchFn;
beforeEach(() => {
fakeSearchFn = sinon.spy(function (params) {
return Promise.resolve({
rows: RESULTS.slice(params.offset, params.offset + params.limit),
total: RESULTS.length,
});
});
fakeSearchFn = sinon.spy(executeSearch);
});
it('emits "results"', () => {
......@@ -82,30 +118,6 @@ describe('SearchClient', () => {
});
});
it('stops fetching chunks if the results array is empty', () => {
// Simulate a situation where the `total` count for the server is incorrect
// and we appear to have reached the end of the result list even though
// `total` implies that there should be more results available.
//
// In that case the client should stop trying to fetch additional pages.
fakeSearchFn = sinon.spy(() => {
return Promise.resolve({
rows: [],
total: 1000,
});
});
const client = new SearchClient(fakeSearchFn, { chunkSize: 2 });
const onResults = sinon.stub();
client.on('results', onResults);
client.get({ uri: 'http://example.com' });
return awaitEvent(client, 'end').then(() => {
assert.calledWith(onResults, []);
assert.calledOnce(fakeSearchFn);
});
});
it('emits "results" once in non-incremental mode', () => {
const client = new SearchClient(fakeSearchFn, {
chunkSize: 2,
......
......@@ -145,12 +145,13 @@
* for the complete list and usage of each.
*
* @typedef SearchQuery
* @prop {number} [limit]
* @prop {number} [offset]
* @prop {string[]} [uri]
* @prop {string} [group]
* @prop {string} [references]
* @prop {number} [offset]
* @prop {number} [limit]
* @prop {string} [order]
* @prop {string} [references]
* @prop {string} [search_after]
* @prop {string} [sort]
* @prop {boolean} [_separate_replies] - Unofficial param that causes replies
* to be returned in a separate `replies` field
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment