From fe0ccb11bd3192c224615fe09efe47d04c29cfe9 Mon Sep 17 00:00:00 2001 From: mdecimus Date: Thu, 22 Aug 2024 12:33:23 +0200 Subject: [PATCH] Do not insert empty keywords in FTS index --- crates/store/src/fts/index.rs | 30 ++++++++++++++++++++---------- crates/store/src/write/key.rs | 2 +- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/crates/store/src/fts/index.rs b/crates/store/src/fts/index.rs index 4b0c6c63..6795a80a 100644 --- a/crates/store/src/fts/index.rs +++ b/crates/store/src/fts/index.rs @@ -95,11 +95,14 @@ impl<'x, T: Into + Display + Clone + std::fmt::Debug> FtsDocument<'x, T> { } pub fn index_keyword(&mut self, field: Field, text: impl Into>) { - self.parts.push(Text { - field, - text: text.into(), - typ: Type::Keyword, - }); + let text = text.into(); + if !text.is_empty() { + self.parts.push(Text { + field, + text, + typ: Type::Keyword, + }); + } } } @@ -146,11 +149,14 @@ impl Store { position += 10; } Type::Keyword => { - let field = u8::from(text.field); - tokens - .entry(BitmapHash::new(text.text.as_ref())) - .or_default() - .insert_keyword(TokenType::word(field)); + let value = text.text.as_ref(); + if !value.is_empty() { + let field = u8::from(text.field); + tokens + .entry(BitmapHash::new(value)) + .or_default() + .insert_keyword(TokenType::word(field)); + } } } } @@ -268,6 +274,10 @@ impl Store { hash[..len].copy_from_slice(&key[U32_LEN..U32_LEN + len]); (hash, len as u8) } + 0 => { + // Temporary fix for empty keywords + (hash, 0) + } invalid => { return Err(trc::Error::corrupted_key(key, None, trc::location!()) .ctx(trc::Key::Reason, "Invalid bitmap key length") diff --git a/crates/store/src/write/key.rs b/crates/store/src/write/key.rs index d045d987..86649255 100644 --- a/crates/store/src/write/key.rs +++ b/crates/store/src/write/key.rs @@ -250,7 +250,7 @@ impl ValueClass { let serializer = serializer.write(account_id).write( hash.hash .get(0..std::cmp::min(hash.len as usize, 8)) - .unwrap(), + .unwrap_or_default(), ); if hash.len >= 8 {