搜索支持繁简搜索
镜像:
FROM docker.elastic.co/elasticsearch/elasticsearch:5.5.3
RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.3/elasticsearch-analysis-ik-5.5.3.zip
RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-smartcn/analysis-smartcn-5.5.3.zip
RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch https://github.com/medcl/elasticsearch-analysis-stconvert/releases/download/v5.5.3/elasticsearch-analysis-stconvert-5.5.3.zip
docker run --rm --network host -e "discovery.type=single-node" -e "xpack.security.enabled=false" yzs/myes
创建自定义的分词器,繁简和ik
curl --location --request PUT 'http://localhost:9200/ts_analysis_ik' \
--header 'Content-Type: application/json' \
--data-raw '{
"settings": {
"analysis": {
"analyzer": {
"custom_stconvert_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": "tsconvert",
"char_filter": "tsconvert"
}
},
"filter": {
"tsconvert" : {
"type" : "stconvert",
"delimiter" : "#",
"keep_both" : false,
"convert_type" : "t2s"
}
},
"char_filter" : {
"tsconvert" : {
"type" : "stconvert",
"convert_type" : "t2s"
}
}
}
},
"mappings": {
"data": {
"properties": {
"slogan": {
"type": "text",
"analyzer": "custom_stconvert_analyzer",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
}
}
}
分词结果:
curl --location --request GET 'http://localhost:9200/ts_analysis_ik/_analyze' \
--header 'Content-Type: application/json' \
--header 'Content-Type: text/plain' \
--data-raw '{
"analyzer": "custom_stconvert_analyzer",
"text": "覺醒後的神關羽果然沒讓我失望!變得更帥更強了😍 #一招制敵"
}'
分词结果:
{
"tokens": [
{
"token": "觉醒",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 0
},
{
"token": "后",
"start_offset": 2,
"end_offset": 3,
"type": "CN_CHAR",
"position": 1
},
{
"token": "的",
"start_offset": 3,
"end_offset": 4,
"type": "CN_CHAR",
"position": 2
},
{
"token": "神",
"start_offset": 4,
"end_offset": 5,
"type": "CN_CHAR",
"position": 3
},
{
"token": "关羽",
"start_offset": 5,
"end_offset": 7,
"type": "CN_WORD",
"position": 4
},
{
"token": "果然",
"start_offset": 7,
"end_offset": 9,
"type": "CN_WORD",
"position": 5
},
{
"token": "没让",
"start_offset": 9,
"end_offset": 11,
"type": "CN_WORD",
"position": 6
},
{
"token": "我",
"start_offset": 11,
"end_offset": 12,
"type": "CN_CHAR",
"position": 7
},
{
"token": "失望",
"start_offset": 12,
"end_offset": 14,
"type": "CN_WORD",
"position": 8
},
{
"token": "变得",
"start_offset": 15,
"end_offset": 17,
"type": "CN_WORD",
"position": 9
},
{
"token": "更",
"start_offset": 17,
"end_offset": 18,
"type": "CN_CHAR",
"position": 10
},
{
"token": "帅",
"start_offset": 18,
"end_offset": 19,
"type": "CN_CHAR",
"position": 11
},
{
"token": "更强",
"start_offset": 19,
"end_offset": 21,
"type": "CN_WORD",
"position": 12
},
{
"token": "强了",
"start_offset": 20,
"end_offset": 22,
"type": "CN_WORD",
"position": 13
},
{
"token": "一招",
"start_offset": 26,
"end_offset": 28,
"type": "CN_WORD",
"position": 14
},
{
"token": "一",
"start_offset": 26,
"end_offset": 27,
"type": "TYPE_CNUM",
"position": 15
},
{
"token": "招",
"start_offset": 27,
"end_offset": 28,
"type": "COUNT",
"position": 16
},
{
"token": "制",
"start_offset": 28,
"end_offset": 29,
"type": "CN_CHAR",
"position": 17
},
{
"token": "敌",
"start_offset": 29,
"end_offset": 30,
"type": "CN_CHAR",
"position": 18
}
]
}
创建文档
curl --location --request POST 'http://localhost:9200/ts_analysis_ik/data/1' \curl --location --request POST 'http://localhost:9200/ts_analysis_ik/data/1' \--header 'Content-Type: application/json' \--header 'Content-Type: text/plain' \--data-raw '{ "slogan": "覺醒後的神關羽果然沒讓我失望!變得更帥更強了 #一招制敵"}'
搜索
curl --location --request GET 'http://localhost:9200/ts_analysis_ik/data/_search' \ --header 'Content-Type: application/json' \ --header 'Content-Type: text/plain' \ --data-raw '{ "query": { "multi_match": { "query": "关羽", "fields": ["slogan"] } } }'
结果:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.27638745,
"hits": [
{
"_index": "ts_analysis_ik",
"_type": "data",
"_id": "1",
"_score": 0.27638745,
"_source": {
"slogan": "覺醒後的神關羽果然沒讓我失望!變得更帥更強了😍 #一招制敵"
}
}
]
}
}