[search-index] Limit search index size

Summary:
This diff modifies the SearchIndexer class to handle limiting the search
index size. It does this by periodically re-evaluating the window of
the n most recent items in a particular index where n is the max size of
the index. It then unindexes the items which are marked as indexed but
are no longer in the window and indexes the things that are in the window
but aren't marked as indexed.

Test Plan:
Run locally with a reduced thread index size, verify that the index
includes the most recent items and that it is the correct size. Also verify that
the queries used properly use fast sqlite indices.

Reviewers: evan, juan

Reviewed By: evan

Differential Revision: https://phab.nylas.com/D3741
This commit is contained in:
Mark Hahnenberg 2017-01-19 11:43:43 -08:00
parent 3710438296
commit 5730d23da8
10 changed files with 151 additions and 84 deletions

View file

@ -8,6 +8,10 @@ const INDEX_VERSION = 1;
class ContactSearchIndexer extends ModelSearchIndexer {
get MaxIndexSize() {
return 5000;
}
get ModelClass() {
return Contact;
}

View file

@ -5,6 +5,10 @@ const INDEX_VERSION = 1
class EventSearchIndexer extends ModelSearchIndexer {
get MaxIndexSize() {
return 5000;
}
get ConfigKey() {
return 'eventSearchIndexVersion';
}

View file

@ -1,11 +1,11 @@
import _ from 'underscore';
import {
DatabaseStore,
} from 'nylas-exports'
const FRACTION_CPU_AVAILABLE = 0.05;
const MAX_TIME_SLICE_MILLIS = 100;
const CHUNK_SIZE = 10;
const MIN_TIMEOUT = 100;
const FRACTION_CPU_AVAILABLE = 0.05;
const MIN_TIMEOUT = 1000;
const MAX_TIMEOUT = 5 * 60 * 1000; // 5 minutes
export default class SearchIndexer {
@ -16,31 +16,78 @@ export default class SearchIndexer {
this._lastTimeStop = null;
}
registerSearchableModel(klass, indexCallback) {
this._searchableModels[klass.name] = {klass, cb: indexCallback};
registerSearchableModel({modelClass, indexSize, indexCallback, unindexCallback}) {
this._searchableModels[modelClass.name] = {modelClass, indexSize, indexCallback, unindexCallback};
}
unregisterSearchableModel(klass) {
delete this._searchableModels[klass.name];
unregisterSearchableModel(modelClass) {
delete this._searchableModels[modelClass.name];
}
async _getNewItemsToIndex() {
const results = await Promise.all(Object.keys(this._searchableModels).map((modelName) => {
const modelClass = this._searchableModels[modelName].klass;
const query = DatabaseStore.findAll(modelClass)
.where(modelClass.attributes.isSearchIndexed.equal(false))
.order(modelClass.attributes.id.ascending())
.limit(CHUNK_SIZE);
// console.info(query.sql());
return query;
async _getIndexCutoff(modelClass, indexSize) {
const query = DatabaseStore.findAll(modelClass)
.order(modelClass.naturalSortOrder())
.offset(indexSize)
.limit(1)
// console.info('SearchIndexer: _getIndexCutoff query', query.sql());
const models = await query;
return models[0];
}
_getNewUnindexed(modelClass, indexSize, cutoff) {
const whereConds = [modelClass.attributes.isSearchIndexed.equal(false)];
if (cutoff) {
whereConds.push(modelClass.sortOrderAttribute().greaterThan(cutoff[modelClass.sortOrderAttribute().modelKey]));
}
const query = DatabaseStore.findAll(modelClass)
.where(whereConds)
.limit(CHUNK_SIZE)
.order(modelClass.naturalSortOrder())
// console.info('SearchIndexer: _getNewUnindexed query', query.sql());
return query;
}
_getOldIndexed(modelClass, cutoff) {
// If there's no cutoff then that means we haven't reached the max index size yet.
if (!cutoff) {
return Promise.resolve([]);
}
const whereConds = [
modelClass.attributes.isSearchIndexed.equal(true),
modelClass.sortOrderAttribute().lessThanOrEqualTo(cutoff[modelClass.sortOrderAttribute().modelKey]),
];
const query = DatabaseStore.findAll(modelClass)
.where(whereConds)
.limit(CHUNK_SIZE)
.order(modelClass.naturalSortOrder())
// console.info('SearchIndexer: _getOldIndexed query', query.sql());
return query;
}
async _getIndexDiff() {
const results = await Promise.all(Object.keys(this._searchableModels).map(async (modelName) => {
const {modelClass, indexSize} = this._searchableModels[modelName];
const cutoff = await this._getIndexCutoff(modelClass, indexSize);
const [toIndex, toUnindex] = await Promise.all([
this._getNewUnindexed(modelClass, indexSize, cutoff),
this._getOldIndexed(modelClass, cutoff),
]);
// console.info('SearchIndexer: ', modelClass.name);
// console.info('SearchIndexer: _getIndexCutoff cutoff', cutoff);
// console.info('SearchIndexer: _getIndexDiff toIndex', toIndex.map((model) => [model.isSearchIndexed, model.subject]));
// console.info('SearchIndexer: _getIndexDiff toUnindex', toUnindex.map((model) => [model.isSearchIndexed, model.subject]));
return [toIndex, toUnindex];
}));
return results.reduce((acc, curr) => acc.concat(curr), []);
const [toIndex, toUnindex] = _.unzip(results).map((l) => _.flatten(l))
return {toIndex, toUnindex};
}
_indexItems(items) {
for (const item of items) {
this._searchableModels[item.constructor.name].cb(item);
}
return Promise.all([items.map((item) => this._searchableModels[item.constructor.name].indexCallback(item))]);
}
_unindexItems(items) {
return Promise.all([items.map((item) => this._searchableModels[item.constructor.name].unindexCallback(item))]);
}
notifyHasIndexingToDo() {
@ -65,42 +112,26 @@ export default class SearchIndexer {
setTimeout(() => this.run(), this._computeNextTimeout());
}
run() {
async run() {
if (!this._hasIndexingToDo) {
return;
}
const start = new Date();
let current = new Date();
let firstIter = true;
let numItemsIndexed = 0;
const indexNextChunk = (unindexedItems) => {
if (firstIter) {
this._lastTimeStart = start;
firstIter = false;
}
if (unindexedItems.length === 0) {
this._hasIndexingToDo = false;
this._lastTimeStop = new Date();
// console.info(`Finished indexing ${numItemsIndexed} items, took ${current.getTime() - start.getTime()} ms`);
return;
}
this._indexItems(unindexedItems);
numItemsIndexed += unindexedItems.length;
current = new Date();
if (current.getTime() - start.getTime() <= MAX_TIME_SLICE_MILLIS) {
this._getNewItemsToIndex().then(indexNextChunk);
return;
}
const {toIndex, toUnindex} = await this._getIndexDiff();
if (toIndex.length !== 0 || toUnindex.length !== 0) {
await Promise.all([
this._indexItems(toIndex),
this._unindexItems(toUnindex),
]);
this._lastTimeStart = start;
this._lastTimeStop = new Date();
// console.info(`SearchIndexer: Finished indexing ${numItemsIndexed} items, took ${current.getTime() - start.getTime()} ms`);
// console.info(`SearchIndexer: ${toIndex.length} items indexed, ${toUnindex.length} items unindexed, took ${this._lastTimeStop.getTime() - this._lastTimeStart.getTime()} ms`);
this._scheduleRun();
};
this._getNewItemsToIndex().then(indexNextChunk);
} else {
// const stop = new Date();
// console.info(`SearchIndexer: No changes to index, took ${stop.getTime() - start.getTime()} ms`);
this._hasIndexingToDo = false;
}
}
}

View file

@ -6,7 +6,6 @@ import {
DatabaseStore,
} from 'nylas-exports'
const INDEX_SIZE = 10000
const MAX_INDEX_SIZE = 30000
const CHUNKS_PER_ACCOUNT = 10
const INDEXING_WAIT = 1000
@ -22,7 +21,12 @@ class ThreadSearchIndexStore {
activate(indexer) {
this.indexer = indexer;
this.indexer.registerSearchableModel(Thread, (model) => this.updateThreadIndex(model));
this.indexer.registerSearchableModel({
modelClass: Thread,
indexSize: MAX_INDEX_SIZE,
indexCallback: (model) => this.updateThreadIndex(model),
unindexCallback: (model) => this.unindexThread(model),
});
const date = Date.now();
console.log('Thread Search: Initializing thread search index...')
@ -141,11 +145,7 @@ class ThreadSearchIndexStore {
buildIndex = (accountIds) => {
if (!accountIds || accountIds.length === 0) { return Promise.resolve() }
const sizePerAccount = Math.floor(INDEX_SIZE / accountIds.length)
return Promise.resolve(accountIds)
.each((accountId) => (
this.indexThreadsForAccount(accountId, sizePerAccount)
))
this.indexer.notifyHasIndexingToDo();
}
clearIndex() {
@ -160,27 +160,6 @@ class ThreadSearchIndexStore {
.filter((accId) => DatabaseStore.isIndexEmptyForAccount(accId, Thread))
}
indexThreadsForAccount(accountId, indexSize) {
const chunkSize = Math.floor(indexSize / CHUNKS_PER_ACCOUNT)
const chunks = Promise.resolve(_.times(CHUNKS_PER_ACCOUNT, () => chunkSize))
return chunks.each((size, idx) => {
return DatabaseStore.findAll(Thread)
.where({accountId})
.limit(size)
.offset(size * idx)
.order(Thread.attributes.lastMessageReceivedTimestamp.descending())
.background()
.then((threads) => {
return Promise.all(
threads.map(this.indexThread)
).then(() => {
return new Promise((resolve) => setTimeout(resolve, INDEXING_WAIT))
})
})
})
}
indexThread = (thread) => {
return (
this.getIndexData(thread)

View file

@ -31,4 +31,24 @@ export default class AttributeString extends Attribute {
this._assertPresentAndQueryable('like', val);
return new Matcher(this, 'like', val);
}
lessThan(val) {
this._assertPresentAndQueryable('lessThanOrEqualTo', val);
return new Matcher(this, '<', val);
}
lessThanOrEqualTo(val) {
this._assertPresentAndQueryable('lessThanOrEqualTo', val);
return new Matcher(this, '<=', val);
}
greaterThan(val) {
this._assertPresentAndQueryable('greaterThanOrEqualTo', val);
return new Matcher(this, '>', val);
}
greaterThanOrEqualTo(val) {
this._assertPresentAndQueryable('greaterThanOrEqualTo', val);
return new Matcher(this, '>=', val);
}
}

View file

@ -98,6 +98,14 @@ export default class Contact extends Model {
static searchFields = ['content'];
static sortOrderAttribute = () => {
return Contact.attributes.id
}
static naturalSortOrder = () => {
return Contact.sortOrderAttribute().descending()
}
static fromString(string, {accountId} = {}) {
const emailRegex = RegExpUtils.emailRegex();
const match = emailRegex.exec(string);

View file

@ -132,6 +132,14 @@ export default class Event extends Model {
static searchFields = ['title', 'description', 'location', 'participants']
static sortOrderAttribute = () => {
return Event.attributes.id
}
static naturalSortOrder = () => {
return Event.sortOrderAttribute().descending()
}
// We use moment to parse the date so we can more easily pick up the
// current timezone of the current locale.
// We also create a start and end times that span the full day without

View file

@ -115,8 +115,12 @@ class Thread extends ModelWithMetadata {
}),
})
static sortOrderAttribute = () => {
return Thread.attributes.lastMessageReceivedTimestamp
}
static naturalSortOrder = () => {
return Thread.attributes.lastMessageReceivedTimestamp.descending()
return Thread.sortOrderAttribute().descending()
}
static additionalSQLiteConfig = {
@ -145,6 +149,7 @@ class Thread extends ModelWithMetadata {
'CREATE INDEX IF NOT EXISTS ThreadUnifiedStarredIndex ON `Thread` (last_message_received_timestamp DESC) WHERE starred = 1 AND in_all_mail = 1',
'CREATE INDEX IF NOT EXISTS ThreadIsSearchIndexedIndex ON `Thread` (is_search_indexed, id)',
'CREATE INDEX IF NOT EXISTS ThreadIsSearchIndexedLastMessageReceivedIndex ON `Thread` (is_search_indexed, last_message_received_timestamp)',
],
}

View file

@ -741,11 +741,10 @@ class DatabaseStore extends NylasStore {
isIndexEmptyForAccount(accountId, modelKlass) {
const modelTable = modelKlass.name
const searchTable = `${modelTable}Search`
const sql = (
`SELECT \`${searchTable}\`.\`content_id\` FROM \`${searchTable}\` INNER JOIN \`${modelTable}\`
ON \`${modelTable}\`.id = \`${searchTable}\`.\`content_id\` WHERE \`${modelTable}\`.\`account_id\` = ?
LIMIT 1`
`SELECT \`${modelTable}\`.\`id\` FROM \`${modelTable}\` WHERE
\`${modelTable}\`.is_search_indexed = 1 AND
\`${modelTable}\`.\`account_id\` = ? LIMIT 1`
);
return this._query(sql, [accountId]).then(result => result.length === 0);
}

View file

@ -9,6 +9,10 @@ export default class ModelSearchIndexer {
this.indexer = null;
}
get MaxIndexSize() {
throw new Error("Override me and return a number")
}
get ConfigKey() {
throw new Error("Override me and return a string config key")
}
@ -27,7 +31,12 @@ export default class ModelSearchIndexer {
activate(indexer) {
this.indexer = indexer;
this.indexer.registerSearchableModel(this.ModelClass, (model) => this._indexModel(model));
this.indexer.registerSearchableModel({
modelClass: this.ModelClass,
indexSize: this.MaxIndexSize,
indexCallback: (model) => this._indexModel(model),
unindexCallback: (model) => this._unindexModel(model),
});
this._initializeIndex();
this.unsubscribers = [