ylliX - Online Advertising Network
Vertex AI - Antrophic and Mistral models: Why does it require Imegen access?

Full Text Search Improve Query Performance


Description:

I have a data set around 4 – 5 Million documents, where I need to configure Full Text Search Capability with minimum response time.
I configured the FTS index as below.

{
 "name": "full_text_index",
 "type": "fulltext-index",
 "params": {
  "mapping": {
   "types": {
    "_default.native": {
     "enabled": true,
     "dynamic": true,
     "default_analyzer": "standard",
     "properties": {
      "text": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "text",
         "type": "text",
         "analyzer": "simple",
         "store": false,
         "index": true,
         "include_term_vectors": true,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "tenant": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "tenant",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "status": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "status",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "locale": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "locale",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "lastUpdateTime": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "lastUpdateTime",
         "type": "number",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": true
        }
       ]
      },
      "productIds": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "productIds",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "id": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "id",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "summary": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "summary",
         "type": "text",
         "analyzer": "simple",
         "store": false,
         "index": true,
         "include_term_vectors": true,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      }
     }
    }
   },
   "default_mapping": {
    "enabled": false,
    "dynamic": true
   },
   "default_type": "_default",
   "default_analyzer": "standard",
   "default_datetime_parser": "dateTimeOptional",
   "default_field": "",
   "store_dynamic": false,
   "index_dynamic": false,
   "docvalues_dynamic": false
  },
  "store": {
   "indexType": "scorch",
   "kvStoreName": ""
  },
  "doc_config": {
   "docid_prefix_delim": "",
   "docid_regexp": "",
   "mode": "scope.collection.type_field",
   "type_field": "type"
  }
 },
 "sourceType": "couchbase",
 "sourceName": "Sample",
 "sourceUUID": "be04daad7edfa09f20ecf781c0817483",
 "sourceParams": {},
 "planParams": {
  "maxPartitionsPerPIndex": 1024,
  "numReplicas": 0,
  "indexPartitions": 12
 },
 "uuid": ""
}

Document Description:
tenant, status, locale are string attributes where I need a full match, hence used keyword analyser
productIds is list of IDs where I need a full match, hence used keyword analyser
lastUpdateTime is long value where I need to query by range and sort in descending order
Id is a string, where I need to query for full match or a partial match as a wildcard like suffix match (Ex: *documentId)
text and summary are text attributes where I need to match phrases or normal word match.

I have created index as above screenshot with index partition as 12 without using any custom analyser of filter.

Search Query:

{
    "query": {
        "conjuncts": [
            {
                "disjuncts": [
                    {
                        "wildcard": "*{{searchText}}",
                        "field": "id"
                    },
                    {
                        "match_phrase": "{{searchText}}",
                        "field": "text"
                    },
                    {
                        "match_phrase": "{{searchText}}",
                        "field": "summary"
                    },
                    {
                        "match": "{{searchText}}",
                        "field": "prod"
                    }
                ]
            },
            {
                "term": "abc-123",
                "field": "tenant"
            },
            {
                "disjuncts": [
                    {
                        "term": "en",
                        "field": "locale"
                    }
                ]
            },
            {
                "disjuncts": [
                    {
                        "term": "Approved",
                        "field": "status"
                    },
                    {
                        "term": "Rejected",
                        "field": "status"
                    }
                ]
            },
            {
                "field": "lastUpdateTime",
                "min": 1603799414000,
                "max": 1730029814000,
                "inclusive_min": true,
                "inclusive_max": true
            }
        ]
    },
    "sort": [
        "-lastUpdateTime"
    ],
    "size": 10,
    "from": 0
}

My query looks above, the {{searchText}} place holder will be replaced with my dynamic input from UI and other query attributes are filled based on user type and filter params.

Problem:
Currently with above index configuration and querying for 4-5 million documents I am able to get the data in 400ms – 500ms. Even though I do not store any data in index for retrieval as it increases my index size in disk.

I need to get response within 50ms. Is is possible to achieve such low latency. If yes, Can anyone help me here to get query data with faster retrieval.



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *