Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UBER-921: Improve full text search #3848

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions models/recruit/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,15 @@ export class TApplicant extends TTask implements Applicant {
startDate!: Timestamp | null

@Prop(TypeRef(contact.mixin.Employee), recruit.string.AssignedRecruiter)
@Index(IndexKind.Indexed)
declare assignee: Ref<Employee> | null

@Prop(TypeRef(task.class.State), task.string.TaskState, { _id: recruit.attribute.State })
@Index(IndexKind.Indexed)
declare status: Ref<State>

@Prop(TypeRef(task.class.DoneState), task.string.TaskStateDone, { _id: recruit.attribute.DoneState })
@Index(IndexKind.Indexed)
declare doneState: Ref<DoneState>
}

Expand Down
2 changes: 2 additions & 0 deletions models/task/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@ export class TLostState extends TDoneState implements LostState {}
@UX(task.string.Task, task.icon.Task, task.string.Task)
export class TTask extends TAttachedDoc implements Task {
@Prop(TypeRef(core.class.Status), task.string.TaskState, { _id: task.attribute.State })
@Index(IndexKind.Indexed)
status!: Ref<Status>

@Prop(TypeRef(task.class.DoneState), task.string.TaskStateDone, { _id: task.attribute.DoneState })
@Index(IndexKind.Indexed)
doneState!: Ref<DoneState> | null

@Prop(TypeString(), task.string.TaskNumber)
Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/classes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ export enum IndexKind {
FullText,
/**
* For attribute with this annotation should be created an index in mongo database
*
* Also mean to include into Elastic search.
*/
Indexed
}
Expand Down
7 changes: 7 additions & 0 deletions packages/core/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ export function isFullTextAttribute (attr: AnyAttribute): boolean {
)
}

/**
* @public
*/
export function isIndexedAttribute (attr: AnyAttribute): boolean {
return attr.index === IndexKind.Indexed
}

/**
* @public
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<span class="font-medium">Summary:</span>
{#each summary.split('\n') as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
<span class="select-text" class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
{:else if indexDoc}
{#each attributes as attr}
Expand All @@ -77,13 +77,13 @@
{#if search.length > 0}
<span class="font-medium">Result:</span>
{#each doc.filter((line) => line.toLowerCase().includes(search.toLowerCase())) as line}
<span class:highlight={true}>{line}</span>
<span class="select-text" class:highlight={true}>{line}</span>
{/each}
<br />
{/if}
{#each doc as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
<span class="select-text" class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
</div>
{/each}
Expand Down
24 changes: 19 additions & 5 deletions server/core/src/fulltext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ import core, {
Class,
Doc,
DocIndexState,
docKey,
DocumentQuery,
FindOptions,
FindResult,
Hierarchy,
IndexKind,
isFullTextAttribute,
isIndexedAttribute,
MeasureContext,
ObjQueryType,
Ref,
Expand Down Expand Up @@ -135,10 +137,22 @@ export class FullTextIndex implements WithFind {
}
try {
for (const [k, attr] of attrs) {
if (attr.index === IndexKind.FullText) {
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
const vv = (query as any)[k]
if (vv != null) {
findQuery[k] = vv
if (
k === '_class' ||
k === 'modifiedBy' ||
k === 'modifiedOn' ||
k === 'space' ||
k === 'attachedTo' ||
k === 'attachedToClass'
) {
findQuery[k] = vv
} else {
const docKeyValue = docKey(attr.name, { _class: attr.attributeOf })
findQuery[docKeyValue] = vv
}
}
}
if (attr.type._class === core.class.Collection) {
Expand All @@ -165,12 +179,12 @@ export class FullTextIndex implements WithFind {
return true
})

const fullTextLimit = options?.limit ?? 200
const fullTextLimit = Math.min(5000, (options?.limit ?? 200) * 100)
BykhovDenis marked this conversation as resolved.
Show resolved Hide resolved

let { docs, pass } = await this.indexer.search(classes, findQuery, fullTextLimit)

if (docs.length === 0 && pass) {
docs = await this.adapter.search(classes, query, fullTextLimit)
docs = await this.adapter.search(classes, findQuery, fullTextLimit)
}
const indexedDocMap = new Map<Ref<Doc>, IndexedDoc>()

Expand Down
4 changes: 2 additions & 2 deletions server/core/src/indexer/content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import core, {
import { MinioService } from '@hcengineering/minio'
import { ContentTextAdapter, IndexedDoc } from '../types'
import { contentStageId, DocUpdateHandler, fieldStateId, FullTextPipeline, FullTextPipelineStage } from './types'
import { docKey, docUpdKey, getFullTextAttributes } from './utils'
import { docKey, docUpdKey, getFullTextIndexableAttributes } from './utils'

/**
* @public
Expand Down Expand Up @@ -80,7 +80,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
}

async updateContent (doc: DocIndexState, pipeline: FullTextPipeline): Promise<void> {
const attributes = getFullTextAttributes(pipeline.hierarchy, doc.objectClass)
const attributes = getFullTextIndexableAttributes(pipeline.hierarchy, doc.objectClass)
// Copy content attributes as well.
const update: DocumentUpdate<DocIndexState> = {}

Expand Down
4 changes: 2 additions & 2 deletions server/core/src/indexer/field.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import {
docKey,
docUpdKey,
getContent,
getFullTextAttributes,
getFullTextIndexableAttributes,
getFullTextContext,
isFullTextAttribute,
loadIndexStageStage
Expand Down Expand Up @@ -112,7 +112,7 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const docs = await this.dbStorage.findAll(metrics, objClass, {
_id: { $in: Array.from(valueIds.keys()) }
})
const attributes = getFullTextAttributes(pipeline.hierarchy, objClass)
const attributes = getFullTextIndexableAttributes(pipeline.hierarchy, objClass)

// Child docs.

Expand Down
5 changes: 3 additions & 2 deletions server/core/src/indexer/fulltextPush.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ import core, {
DocumentQuery,
DocumentUpdate,
extractDocKey,
IndexKind,
isFullTextAttribute,
isIndexedAttribute,
MeasureContext,
Ref,
ServerStorage,
Expand Down Expand Up @@ -110,7 +111,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
if (
attrObj !== null &&
attrObj !== undefined &&
attrObj.index === IndexKind.FullText &&
(isFullTextAttribute(attrObj) || isIndexedAttribute(attrObj)) &&
(attrObj.type._class === core.class.RefTo ||
(attrObj.type._class === core.class.ArrOf && (attrObj.type as ArrOf<any>).of._class === core.class.RefTo))
) {
Expand Down
2 changes: 1 addition & 1 deletion server/core/src/indexer/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ export const contentStageId = 'cnt-v2b'
/**
* @public
*/
export const fieldStateId = 'fld-v5'
export const fieldStateId = 'fld-v6'

/**
* @public
Expand Down
11 changes: 6 additions & 5 deletions server/core/src/indexer/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import core, {
Hierarchy,
IndexStageState,
isFullTextAttribute,
isIndexedAttribute,
Obj,
Ref,
Space,
Expand All @@ -45,11 +46,11 @@ import { FullTextPipeline } from './types'
/**
* @public
*/
export function getFullTextAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Obj>>): AnyAttribute[] {
export function getFullTextIndexableAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Obj>>): AnyAttribute[] {
const allAttributes = hierarchy.getAllAttributes(clazz)
const result: AnyAttribute[] = []
for (const [, attr] of allAttributes) {
if (isFullTextAttribute(attr)) {
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
result.push(attr)
}
}
Expand All @@ -59,7 +60,7 @@ export function getFullTextAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Ob
.filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN)
.forEach((m) => {
for (const [, v] of hierarchy.getAllAttributes(m, clazz)) {
if (isFullTextAttribute(v)) {
if (isFullTextAttribute(v) || isIndexedAttribute(v)) {
result.push(v)
}
}
Expand Down Expand Up @@ -119,10 +120,10 @@ export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boo
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const attrs = getFullTextAttributes(hierarchy, c)
const attrs = getFullTextIndexableAttributes(hierarchy, c)
for (const d of hierarchy.getDescendants(c)) {
if (hierarchy.isMixin(d)) {
attrs.push(...getFullTextAttributes(hierarchy, d))
attrs.push(...getFullTextIndexableAttributes(hierarchy, d))
}
}

Expand Down
34 changes: 18 additions & 16 deletions server/elastic/src/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,26 +133,28 @@ class ElasticAdapter implements FullTextAdapter {
}
}

if (query.space != null) {
if (typeof query.space === 'object') {
if (query.space.$in !== undefined) {
for (const [q, v] of Object.entries(query)) {
if (!q.startsWith('$')) {
if (typeof v === 'object') {
if (v.$in !== undefined) {
request.bool.should.push({
terms: {
[q]: v.$in,
boost: 100.0
}
})
}
} else {
request.bool.should.push({
terms: {
space: query.space.$in.map((c) => c.toLowerCase()),
boost: 2.0
term: {
[q]: {
value: v,
boost: 100.0,
case_insensitive: true
}
}
})
}
} else {
request.bool.should.push({
term: {
space: {
value: query.space,
boost: 2.0,
case_insensitive: true
}
}
})
}
}

Expand Down