FTS5 search indexing is moving to c++

This commit is contained in:
Ben Gotow 2017-06-25 10:45:13 -07:00
parent ddb7d8d793
commit 29e7afce51
10 changed files with 0 additions and 607 deletions

View file

@ -2465,8 +2465,6 @@
94875052: src/searchable-components/search-constants.js
6217392: internal_packages/screenshot-mode/lib/main.js
3672501: keymaps/base.json
38810714: internal_packages/thread-search-index/lib/main.js
38818707: internal_packages/thread-search-index/lib/search-index-store.js
6218172: internal_packages/deltas/lib/main.js
6218486: internal_packages/deltas/lib/account-delta-connection-pool.js
6226311: internal_packages/deltas/lib/nylas-long-connection.js

View file

@ -1,38 +0,0 @@
import {
Contact,
ModelSearchIndexer,
} from 'nylas-exports';
const INDEX_VERSION = 1;
class ContactSearchIndexer extends ModelSearchIndexer {
get MaxIndexSize() {
return 100000;
}
get ModelClass() {
return Contact;
}
get ConfigKey() {
return "contactSearchIndexVersion";
}
get IndexVersion() {
return INDEX_VERSION;
}
getIndexDataForModel(contact) {
return {
content: [
contact.name ? contact.name : '',
contact.email ? contact.email : '',
contact.email ? contact.email.replace('@', ' ') : '',
].join(' '),
};
}
}
export default new ContactSearchIndexer()

View file

@ -1,37 +0,0 @@
import {Event, ModelSearchIndexer} from 'nylas-exports'
const INDEX_VERSION = 1
class EventSearchIndexer extends ModelSearchIndexer {
get MaxIndexSize() {
return 5000;
}
get ConfigKey() {
return 'eventSearchIndexVersion';
}
get IndexVersion() {
return INDEX_VERSION;
}
get ModelClass() {
return Event;
}
getIndexDataForModel(event) {
const {title, description, location, participants} = event
return {
title,
location,
description,
participants: participants
.map((c) => `${c.name || ''} ${c.email || ''}`)
.join(' '),
}
}
}
export default new EventSearchIndexer()

View file

@ -1,17 +0,0 @@
import ThreadSearchIndexStore from './thread-search-index-store'
import ContactSearchIndexer from './contact-search-indexer'
// import EventSearchIndexer from './event-search-indexer'
export function activate() {
ThreadSearchIndexStore.activate()
ContactSearchIndexer.activate()
// TODO Calendar feature has been punted, we will disable this indexer for now
// EventSearchIndexer.activate(indexer)
}
export function deactivate() {
ThreadSearchIndexStore.deactivate()
ContactSearchIndexer.deactivate()
// EventSearchIndexer.deactivate()
}

View file

@ -1,229 +0,0 @@
import _ from 'underscore'
import {
Utils,
Thread,
AccountStore,
DatabaseStore,
SearchIndexScheduler,
} from 'nylas-exports'
const MAX_INDEX_SIZE = 100000
const MESSAGE_BODY_LENGTH = 50000
const INDEX_VERSION = 2
class ThreadSearchIndexStore {
constructor() {
this.unsubscribers = []
this.indexer = SearchIndexScheduler;
this.threadsWaitingToBeIndexed = new Set();
}
activate() {
this.indexer.registerSearchableModel({
modelClass: Thread,
indexSize: MAX_INDEX_SIZE,
indexCallback: (model) => this.updateThreadIndex(model),
unindexCallback: (model) => this.unindexThread(model),
});
const date = Date.now();
console.log('Thread Search: Initializing thread search index...')
this.accountIds = _.pluck(AccountStore.accounts(), 'id')
this.initializeIndex()
.then(() => {
NylasEnv.config.set('threadSearchIndexVersion', INDEX_VERSION)
return Promise.resolve()
})
.then(() => {
console.log(`Thread Search: Index built successfully in ${((Date.now() - date) / 1000)}s`)
this.unsubscribers = [
AccountStore.listen(this.onAccountsChanged),
DatabaseStore.listen(this.onDataChanged),
]
})
}
_isInvalidSize(size) {
return !size || size > MAX_INDEX_SIZE || size === 0;
}
/**
* We only want to build the entire index if:
* - It doesn't exist yet
* - It is too big
* - We bumped the index version
*
* Otherwise, we just want to index accounts that haven't been indexed yet.
* An account may not have been indexed if it is added and the app is closed
* before sync completes
*/
initializeIndex() {
if (NylasEnv.config.get('threadSearchIndexVersion') !== INDEX_VERSION) {
return this.clearIndex()
.then(() => this.buildIndex(this.accountIds))
}
return this.buildIndex(this.accountIds);
}
/**
* When accounts change, we are only interested in knowing if an account has
* been added or removed
*
* - If an account has been added, we want to index its threads, but wait
* until that account has been successfully synced
*
* - If an account has been removed, we want to remove its threads from the
* index
*
* If the application is closed before sync is completed, the new account will
* be indexed via `initializeIndex`
*/
onAccountsChanged = () => {
_.defer(() => {
const latestIds = _.pluck(AccountStore.accounts(), 'id')
if (_.isEqual(this.accountIds, latestIds)) {
return;
}
const date = Date.now()
console.log(`Thread Search: Updating thread search index for accounts ${latestIds}`)
const newIds = _.difference(latestIds, this.accountIds)
const removedIds = _.difference(this.accountIds, latestIds)
const promises = []
if (newIds.length > 0) {
promises.push(this.buildIndex(newIds))
}
if (removedIds.length > 0) {
promises.push(
Promise.all(removedIds.map(id => DatabaseStore.unindexModelsForAccount(id, Thread)))
)
}
this.accountIds = latestIds
Promise.all(promises)
.then(() => {
console.log(`Thread Search: Index updated successfully in ${((Date.now() - date) / 1000)}s`)
})
})
}
/**
* When a thread gets updated we will update the search index with the data
* from that thread if the account it belongs to is not being currently
* synced.
*
* When the account is successfully synced, its threads will be added to the
* index either via `onAccountsChanged` or via `initializeIndex` when the app
* starts
*/
onDataChanged = (change) => {
if (change.objectClass !== Thread.name) {
return;
}
_.defer(async () => {
const {objects, type} = change
const threads = objects;
let promises = []
if (type === 'persist') {
const threadsToIndex = _.uniq(threads.filter(t => !this.threadsWaitingToBeIndexed.has(t.id)), false /* isSorted */, t => t.id);
const threadsIndexed = threads.filter(t => t.isSearchIndexed && this.threadsWaitingToBeIndexed.has(t.id));
for (const thread of threadsIndexed) {
this.threadsWaitingToBeIndexed.delete(thread.id);
}
if (threadsToIndex.length > 0) {
threadsToIndex.forEach(thread => {
// Mark already indexed threads as unindexed so that we re-index them
// with updates
thread.isSearchIndexed = false;
this.threadsWaitingToBeIndexed.add(thread.id);
})
await DatabaseStore.inTransaction(t => t.persistModels(threadsToIndex, {silent: true, affectsJoins: false}));
this.indexer.notifyHasIndexingToDo();
}
} else if (type === 'unpersist') {
promises = threads.map(thread => this.unindexThread(thread,
{isBeingUnpersisted: true}))
}
Promise.all(promises)
})
}
buildIndex = (accountIds) => {
if (!accountIds || accountIds.length === 0) { return Promise.resolve() }
this.indexer.notifyHasIndexingToDo();
return Promise.resolve()
}
clearIndex() {
return (
DatabaseStore.dropSearchIndex(Thread)
.then(() => DatabaseStore.createSearchIndex(Thread))
)
}
indexThread = (thread) => {
return (
this.getIndexData(thread)
.then((indexData) => (
DatabaseStore.indexModel(thread, indexData)
))
)
}
updateThreadIndex = (thread) => {
return (
this.getIndexData(thread)
.then((indexData) => (
DatabaseStore.updateModelIndex(thread, indexData)
))
)
}
unindexThread = (thread, opts) => {
return DatabaseStore.unindexModel(thread, opts)
}
getIndexData(thread) {
return thread.messages().then((messages) => {
return {
bodies: messages
.map(({body, snippet}) => (!_.isString(body) ? {snippet} : {body}))
.map(({body, snippet}) => (
snippet || Utils.extractTextFromHtml(body, {maxLength: MESSAGE_BODY_LENGTH}).replace(/(\s)+/g, ' ')
)).join(' '),
to: messages.map(({to, cc, bcc}) => (
_.uniq(to.concat(cc).concat(bcc).map(({name, email}) => `${name} ${email}`))
)).join(' '),
from: messages.map(({from}) => (
from.map(({name, email}) => `${name} ${email}`)
)).join(' '),
};
}).then(({bodies, to, from}) => {
const categories = (
thread.categories
.map(({displayName}) => displayName)
.join(' ')
)
return {
categories: categories,
to_: to,
from_: from,
body: bodies,
subject: thread.subject,
};
});
}
deactivate() {
this.unsubscribers.forEach(unsub => unsub())
}
}
export default new ThreadSearchIndexStore()

View file

@ -1,14 +0,0 @@
{
"name": "search-index",
"version": "0.1.0",
"main": "./lib/main",
"description": "Keeps search index up to date",
"license": "GPL-3.0",
"private": true,
"engines": {
"nylas": "*"
},
"windowTypes": {
"work": true
}
}

View file

@ -112,20 +112,6 @@ class Thread extends ModelWithMetadata {
queryable: true,
modelKey: 'inAllMail',
}),
isSearchIndexed: Attributes.Boolean({
queryable: true,
modelKey: 'isSearchIndexed',
defaultValue: false,
loadFromColumn: true,
}),
// This corresponds to the rowid in the FTS table. We need to use the FTS
// rowid when updating and deleting items in the FTS table because otherwise
// these operations would be way too slow on large FTS tables.
searchIndexId: Attributes.Number({
modelKey: 'searchIndexId',
}),
})
static sortOrderAttribute = () => {
@ -136,10 +122,6 @@ class Thread extends ModelWithMetadata {
return Thread.sortOrderAttribute().descending()
}
static searchable = true
static searchFields = ['subject', 'to_', 'from_', 'categories', 'body']
async messages({includeHidden} = {}) {
const messages = await DatabaseStore.findAll(Message)
.where({threadId: this.id})

View file

@ -199,8 +199,6 @@ lazyLoad(`MailRulesProcessor`, 'mail-rules-processor');
lazyLoad(`MailboxPerspective`, 'mailbox-perspective');
lazyLoad(`DeltaProcessor`, 'services/delta-processor');
lazyLoad(`NativeNotifications`, 'native-notifications');
lazyLoad(`ModelSearchIndexer`, 'services/model-search-indexer');
lazyLoad(`SearchIndexScheduler`, 'services/search-index-scheduler');
lazyLoad(`SanitizeTransformer`, 'services/sanitize-transformer');
lazyLoad(`QuotedHTMLTransformer`, 'services/quoted-html-transformer');
lazyLoad(`InlineStyleTransformer`, 'services/inline-style-transformer');

View file

@ -1,110 +0,0 @@
import DatabaseStore from '../flux/stores/database-store'
import SearchIndexScheduler from './search-index-scheduler'
const INDEXING_PAGE_SIZE = 1000;
const INDEXING_PAGE_DELAY = 1000;
export default class ModelSearchIndexer {
constructor() {
this.unsubscribers = []
this.indexer = SearchIndexScheduler;
}
get MaxIndexSize() {
throw new Error("Override me and return a number")
}
get ConfigKey() {
throw new Error("Override me and return a string config key")
}
get IndexVersion() {
throw new Error("Override me and return an IndexVersion")
}
get ModelClass() {
throw new Error("Override me and return a class constructor")
}
getIndexDataForModel() {
throw new Error("Override me and return a hash with a `content` array")
}
activate() {
this.indexer.registerSearchableModel({
modelClass: this.ModelClass,
indexSize: this.MaxIndexSize,
indexCallback: (model) => this._indexModel(model),
unindexCallback: (model) => this._unindexModel(model),
});
this._initializeIndex();
this.unsubscribers = [
// TODO listen for changes in AccountStore
DatabaseStore.listen(this._onDataChanged),
() => this.indexer.unregisterSearchableModel(this.ModelClass),
];
}
deactivate() {
this.unsubscribers.forEach(unsub => unsub())
}
_initializeIndex() {
if (NylasEnv.config.get(this.ConfigKey) !== this.IndexVersion) {
return DatabaseStore.dropSearchIndex(this.ModelClass)
.then(() => DatabaseStore.createSearchIndex(this.ModelClass))
.then(() => this._buildIndex())
}
return Promise.resolve()
}
_buildIndex(offset = 0) {
const {ModelClass, IndexVersion, ConfigKey} = this
return DatabaseStore.findAll(ModelClass)
.limit(INDEXING_PAGE_SIZE)
.offset(offset)
.background()
.then((models) => {
if (models.length === 0) {
NylasEnv.config.set(ConfigKey, IndexVersion)
return;
}
Promise.each(models, (model) => {
return DatabaseStore.indexModel(model, this.getIndexDataForModel(model))
})
.then(() => {
setTimeout(() => {
this._buildIndex(offset + models.length);
}, INDEXING_PAGE_DELAY);
});
});
}
_indexModel(model) {
DatabaseStore.indexModel(model, this.getIndexDataForModel(model))
}
_unindexModel(model) {
DatabaseStore.unindexModel(model)
}
/**
* When a model gets updated we will update the search index with the
* data from that model if the account it belongs to is not being
* currently synced.
*/
_onDataChanged = (change) => {
if (change.objectClass !== this.ModelClass.name) {
return;
}
change.objects.forEach((model) => {
if (change.type === 'persist') {
this.indexer.notifyHasIndexingToDo();
} else {
this._unindexModel(model);
}
});
}
}

View file

@ -1,140 +0,0 @@
import _ from 'underscore';
import {
DatabaseStore,
} from 'nylas-exports'
const CHUNK_SIZE = 10;
const FRACTION_CPU_AVAILABLE = 0.05;
const MIN_TIMEOUT = 1000;
const MAX_TIMEOUT = 5 * 60 * 1000; // 5 minutes
class SearchIndexScheduler {
constructor() {
this._searchableModels = {};
this._hasIndexingToDo = false;
this._lastTimeStart = null;
this._lastTimeStop = null;
}
registerSearchableModel({modelClass, indexSize, indexCallback, unindexCallback}) {
this._searchableModels[modelClass.name] = {modelClass, indexSize, indexCallback, unindexCallback};
}
unregisterSearchableModel(modelClass) {
delete this._searchableModels[modelClass.name];
}
async _getIndexCutoff(modelClass, indexSize) {
const query = DatabaseStore.findAll(modelClass)
.order(modelClass.naturalSortOrder())
.offset(indexSize)
.limit(1)
.silenceQueryPlanDebugOutput()
// console.info('SearchIndexScheduler: _getIndexCutoff query', query.sql());
const models = await query;
return models[0];
}
_getNewUnindexed(modelClass, indexSize, cutoff) {
const whereConds = [modelClass.attributes.isSearchIndexed.equal(false)];
if (cutoff) {
whereConds.push(modelClass.sortOrderAttribute().greaterThan(cutoff[modelClass.sortOrderAttribute().modelKey]));
}
const query = DatabaseStore.findAll(modelClass)
.where(whereConds)
.limit(CHUNK_SIZE)
.order(modelClass.naturalSortOrder())
// console.info('SearchIndexScheduler: _getNewUnindexed query', query.sql());
return query;
}
_getOldIndexed(modelClass, cutoff) {
// If there's no cutoff then that means we haven't reached the max index size yet.
if (!cutoff) {
return Promise.resolve([]);
}
const whereConds = [
modelClass.attributes.isSearchIndexed.equal(true),
modelClass.sortOrderAttribute().lessThanOrEqualTo(cutoff[modelClass.sortOrderAttribute().modelKey]),
];
const query = DatabaseStore.findAll(modelClass)
.where(whereConds)
.limit(CHUNK_SIZE)
.order(modelClass.naturalSortOrder())
// console.info('SearchIndexScheduler: _getOldIndexed query', query.sql());
return query;
}
async _getIndexDiff() {
const results = await Promise.all(Object.keys(this._searchableModels).map(async (modelName) => {
const {modelClass, indexSize} = this._searchableModels[modelName];
const cutoff = await this._getIndexCutoff(modelClass, indexSize);
const [toIndex, toUnindex] = await Promise.all([
this._getNewUnindexed(modelClass, indexSize, cutoff),
this._getOldIndexed(modelClass, cutoff),
]);
// console.info('SearchIndexScheduler: ', modelClass.name);
// console.info('SearchIndexScheduler: _getIndexCutoff cutoff', cutoff);
// console.info('SearchIndexScheduler: _getIndexDiff toIndex', toIndex.map((model) => [model.isSearchIndexed, model.subject]));
// console.info('SearchIndexScheduler: _getIndexDiff toUnindex', toUnindex.map((model) => [model.isSearchIndexed, model.subject]));
return [toIndex, toUnindex];
}));
const [toIndex, toUnindex] = _.unzip(results).map((l) => _.flatten(l))
return {toIndex, toUnindex};
}
_indexItems(items) {
return Promise.all([items.map((item) => this._searchableModels[item.constructor.name].indexCallback(item))]);
}
_unindexItems(items) {
return Promise.all([items.map((item) => this._searchableModels[item.constructor.name].unindexCallback(item))]);
}
notifyHasIndexingToDo() {
if (this._hasIndexingToDo) {
return;
}
this._hasIndexingToDo = true;
this._scheduleRun();
}
_computeNextTimeout() {
if (!this._lastTimeStop || !this._lastTimeStart) {
return MIN_TIMEOUT;
}
const spanMillis = this._lastTimeStop.getTime() - this._lastTimeStart.getTime();
const multiplier = 1.0 / FRACTION_CPU_AVAILABLE;
return Math.min(Math.max(spanMillis * multiplier, MIN_TIMEOUT), MAX_TIMEOUT);
}
_scheduleRun() {
// console.info(`SearchIndexScheduler: setting timeout for ${this._computeNextTimeout()} ms`);
setTimeout(() => this.run(), this._computeNextTimeout());
}
async run() {
if (!this._hasIndexingToDo) {
return;
}
const start = new Date();
const {toIndex, toUnindex} = await this._getIndexDiff();
if (toIndex.length !== 0 || toUnindex.length !== 0) {
await Promise.all([
this._indexItems(toIndex),
this._unindexItems(toUnindex),
]);
this._lastTimeStart = start;
this._lastTimeStop = new Date();
// console.info(`SearchIndexScheduler: ${toIndex.length} items indexed, ${toUnindex.length} items unindexed, took ${this._lastTimeStop.getTime() - this._lastTimeStart.getTime()} ms`);
this._scheduleRun();
} else {
// const stop = new Date();
// console.info(`SearchIndexScheduler: No changes to index, took ${stop.getTime() - start.getTime()} ms`);
this._hasIndexingToDo = false;
}
}
}
export default new SearchIndexScheduler()