Search This Blog

Sunday 10 January 2021

Elasticsearch - Boost, aggs

Boosting is the process by which you can modify the relevance of a document. You can boost a document while you are indexing it or when you query for the document.we definitely recommend you use the query-time boosting because it’s the most flexible and allows you to change your mind about what fields or terms are important.

Create Elasticsearch Index

PUT product-index
{
  "settings": {
    "analysis": {
      "normalizer": {
        "lowercaseNormalizer": {
          "type": "custom",
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "ProductName": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
     "ProductCode": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
      "ProductDescription": {
        "type": "text"
      },
      "ProductFamily": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
      "Color": {
        "type": "keyword"
      }
    }
  }
}       
       

Adding sample data/records

        
#Add first Document in elasticsearch 
PUT /product-index/_doc/1
{
  "ProductName": "CBC Anti Virus",
  "ProductCode": "Network",
  "ProductDescription": "APAC",
  "ProductFamily": "Sales Cloud",
  "Color": "Silver"
}


PUT /product-index/_doc/2
{
  "ProductName": "CBC Esoteric",
  "ProductDescription": "CBC Panel - 1",
  "ProductFamily": "Hardware",
  "Color": "BLUE"
}

PUT /product-index/_doc/3
{
  "ProductName": "CBC Oncology",
  "ProductCode": "CBC Health",
  "ProductFamily": "Software"
}

PUT /product-index/_doc/4
{
  "ProductName": "CBC Panel",
  "ProductCode": "Secure Wifi",
  "ProductDescription": "Only One Charge Type"
}

PUT /product-index/_doc/5
{
  "ProductName": "CBC Panel (Invitro)",
  "ProductCode": "Secure Wifi",
  "ProductDescription": "CBC Panel",
  "ProductFamily": "Hardware",
  "Color": "GREEN"
}


PUT /product-index/_doc/6
{
  "ProductName": "CBC Panel - Beijing",
  "ProductCode": "CBC Panel",
  "ProductDescription": "Only One Charge Type",
  "ProductFamily": "Software",
  "Color": "BLUE"
}


PUT /product-index/_doc/7
{
  "ProductName": "Esoteric",
  "ProductCode": "CBC",
  "ProductFamily": "Marketing Cloud",
  "Color": "WHITE"
}


PUT /product-index/_doc/8
{
  "ProductName": "Oncology",
  "ProductCode": "Network",
  "ProductDescription": "CBC",
  "ProductFamily": "Sales Cloud",
  "Color": "WHITE"
}

        

View data/regcords

GET /product-index/_search


Searching with relevancy

Boosting the score of a particular query or field as belows

#Try 1
POST /product-index/_search
{
  "query": {
    "match": {
      "ProductFamily": {
        "query": "hArdware",
        "boost": 2
      }
    }
  }
}


#Try 2
POST /product-index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "*Wifi*",
            "default_field": "ProductCode",
            "boost": 3
          }
        },
        {
          "match": {
            "ProductFamily": {
              "query": "hArdware",
              "boost": 4
            }
          }
        }
      ]
    }
  }
}

#Try 3
GET /product-index/_search
{
  "query": {
    "query_string": {
      "query": "*Secure* OR *panel*",
      "default_field": "ProductCode",
      "boost": 1
    }
  }
}

#Try 4
POST /product-index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "*CBC*",
            "default_field": "ProductName",
            "boost": 100
          }
        },
         {
          "query_string": {
            "query": "*hard*",
            "default_field": "ProductFamily",
            "boost": 91
          }
        },
        {
          "query_string": {
            "query": "*one*",
            "default_field": "ProductDescription",
            "boost": 92
          }
        },
        {
          "query_string": {
            "query": "*Blue*",
            "default_field": "Color",
            "boost": 90
          }
        },
        {
          "query_string": {
            "query": "*Health*",
            "default_field": "ProductCode",
            "boost": 93
          }
        }
      ]
    }
  }
}


#Try 5
POST /product-index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "*cbc*",
            "default_field": "ProductName",
            "boost": 49
          }
        },
        {
          "query_string": {
            "query": "*cbc*",
            "default_field": "ProductCode",
            "boost": 7
          }
        },
        {
          "query_string": {
            "query": "*cbc*",
            "fields": [ "ProductDescription", "ProductFamily"]
          }
        }
      ]
    }
  }
}

#Try 6
GET /product-index/_search
{
  "query" : {
    "query_string" : {
      "query" : "*cbc*",
      "fields"  : ["*"]
    }
  }
}


#Try 7
POST /product-index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "*cbc*",
            "default_field": "ProductName",
            "boost": 49
          }
        },
        {
          "query_string": {
            "query": "*cbc*",
            "default_field": "ProductCode",
            "boost": 7
          }
        },
        {
          "query_string": {
            "query": "*cbc*",
            "fields": [ "*"]
          }
        }
      ]
    }
  }
}



#Try Demonstration for aggregation
GET /product-index/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "match": { "ProductFamily" : "hArdware"}
        }
      ]
    }
  },
  "aggs": {
    "Family": {
      "terms": {
        "field": "ProductFamily"
      }
    }
  }
}

#Try Another example of aggregation
GET /product-index/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "match": {
            "ProductFamily": "hArdware"
          }
        },
        {
          "bool": {
            "should": [
              {
                "query_string": {
                  "query": "*cbc* OR *network*",
                  "fields": [
                    "ProductName",
                    "ProductCode",
                    "ProductDescription",
                    "ProductFamily",
                    "Color"
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "Family": {
      "terms": {
        "field": "ProductFamily"
      }
    },
    "Color": {
      "terms": {
        "field": "Color"
      }
    }
  },
  "_source": [
    "ProductName",
    "ProductCode",
    "ProductDescription",
    "ProductFamily",
    "Color"
    ]
}

       


Reference

Tuesday 5 January 2021

Elasticsearch case-insensitive searches

A simple normalizer called lowercase ships with elasticsearch and can be used.

Create Elasticsearch Index

PUT product-index
{
  "settings": {
    "analysis": {
      "normalizer": {
        "lowercaseNormalizer": {
          "type": "custom",
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "ProductName": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
     "ProductCode": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
      "ProductDescription": {
        "type": "text"
      },
      "ProductFamily": {
        "type": "keyword",
        "normalizer": "lowercaseNormalizer"
      },
      "Color": {
        "type": "keyword"
      }
    }
  }
}       
       

Adding sample data/records

        
#Add first Document in elasticsearch 
PUT /product-index/_doc/1
{
  "ProductName": "CBC Anti Virus",
  "ProductCode": "Network",
  "ProductDescription": "APAC",
  "ProductFamily": "Sales Cloud",
  "Color": "Silver"
}


PUT /product-index/_doc/2
{
  "ProductName": "CBC Esoteric",
  "ProductDescription": "CBC Panel - 1",
  "ProductFamily": "Hardware",
  "Color": "BLUE"
}

PUT /product-index/_doc/3
{
  "ProductName": "CBC Oncology",
  "ProductCode": "CBC Health",
  "ProductFamily": "Software"
}

PUT /product-index/_doc/4
{
  "ProductName": "CBC Panel",
  "ProductCode": "Secure Wifi",
  "ProductDescription": "Only One Charge Type"
}

PUT /product-index/_doc/5
{
  "ProductName": "CBC Panel (Invitro)",
  "ProductCode": "Secure Wifi",
  "ProductDescription": "CBC Panel",
  "ProductFamily": "Hardware",
  "Color": "GREEN"
}


PUT /product-index/_doc/6
{
  "ProductName": "CBC Panel - Beijing",
  "ProductCode": "CBC Panel",
  "ProductDescription": "Only One Charge Type",
  "ProductFamily": "Software",
  "Color": "BLUE"
}


PUT /product-index/_doc/7
{
  "ProductName": "Esoteric",
  "ProductCode": "CBC",
  "ProductFamily": "Marketing Cloud",
  "Color": "WHITE"
}


PUT /product-index/_doc/8
{
  "ProductName": "Oncology",
  "ProductCode": "Network",
  "ProductDescription": "CBC",
  "ProductFamily": "Sales Cloud",
  "Color": "WHITE"
}

        

View data/regcords

GET /product-index/_search


Search case-insensitive records

#Try 1
GET /product-index/_search
{
  "query": {
    "match": { "ProductFamily" : "hArdware"}
  }
}

#Try 2
GET /product-index/_search
{
  "query" : {
    "query_string" : {
      "query" : "*bei*",
      "fields"  : ["ProductName"]
    }
  }
}

#Try 3
GET /product-index/_search
{
  "query": {
    "query_string": {
      "query": "*Secure* OR *panel*",
      "default_field": "ProductCode"
    }
  }
}

#Try 4
GET /product-index/_search
{
  "query" : {
    "query_string" : {
      "query" : "*cHarge*",
      "fields"  : ["ProductDescription"]
    }
  }
}
       


Note

  • Text fields are analyzed.
  • For keyword fields we need to add custom elasticsearch setting for analysis.


Reference

Elasticsearch - Nodes, clusters, and shards

Elastic Stack Video - Load your gun in short time.   Beginner's Crash Course to Ela...

Recent Post