ES-安装指引与中文分词

Ubuntu 安装

使用 APT, 官网链接
安装后, 会在终端展示初始密码, 注意保存

需要注意的事情

这个安装方式,默认会开启 ssl, 这会导致无法通过 http 直接连接,需要关闭 ssl 才行
编辑 /etc/elasticsearch/elasticsearch.yml 将其中的 ssl 配置改为 false

# Enable encryption for HTTP API client connections, such as Kibana, Logstash, and Agents
xpack.security.http.ssl:
  enabled: false
  keystore.path: certs/http.p12

# Enable encryption and mutual authentication between cluster nodes
xpack.security.transport.ssl:
  enabled: false
  verification_mode: certificate
  keystore.path: certs/transport.p12
  truststore.path: certs/transport.p12
# Create a new cluster with the current node only
# Additional nodes can still join the cluster later
cluster.initial_master_nodes: ["walkerjun"]

简单的验证

浏览器

访问 http://localhost:9200

python

依赖:pip install elasticsearch

import ssl
from elasticsearch import Elasticsearch

# 假设你的用户名是 elastic,密码是 your_password
username = "elastic"
password = "Q9n6cbaeXGTUos2ppzWo"


# 连接到 Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}],
                   basic_auth=(username, password),
                   )

# 检查 Elasticsearch 连接是否成功
if es.ping():
    print('Connected to Elasticsearch')
else:
    print('Could not connect to Elasticsearch')

    exit()

# 创建索引
es.indices.create(index='my_index')

# 添加文档
doc = {
    'title': 'Elasticsearch Tutorial',
    'content': 'This is a tutorial about Elasticsearch.'
}
es.index(index='my_index', body=doc)

# 搜索文档
res = es.search(index='my_index',
                body={"query": {"match": {"title": "Elasticsearch"}}})
print(res)

中文分词支持

未安装中文分词插件时, 中文分词是这样的

from elasticsearch import Elasticsearch


        
class EsDemoHelper(object):
    username = "elastic"
    password = "Q9n6cbaeXGTUos2ppzWo"
    
    
    @classmethod
    def create_es_connection(cls):
        es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}],
                       basic_auth=(cls.username, cls.password),
                       )
        # 检查 Elasticsearch 连接是否成功
        if es.ping():
            print('Connected to Elasticsearch')
        else:
            print('Could not connect to Elasticsearch')

            exit()
        return es
        
    
if __name__ == "__main__":
    INDEX_NAME = "my_index_simple_demo"
    es = EsDemoHelper.create_es_connection()

    # 待分词的中文文本
    chinese_text = "这是一个关于中文分词的测试"

    # 构建分析请求体,使用默认分词器
    analyze_body = {
        "analyzer": "standard",
        "text": chinese_text
    }

    # 调用 Elasticsearch 的分析 API
    response = es.indices.analyze(body=analyze_body)

    # 打印分词结果
    print("分词结果:")
    for token in response['tokens']:
        print(token['token'])

输出

分词结果:
这
是
一
个
关
于
中
文
分
词
的
测
试

安装分词插件 ik

  1. 先查看当前es版本
walkerjun@walkerjun:/usr/share/elasticsearch/plugins$ curl -u elastic:Q9n6cbaeXGTUos2ppzWo -X GET "localhost:9200"
{
  "name" : "walkerjun",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "A97Glxz2RdWt0HSGmP7BaA",
  "version" : {
    "number" : "8.17.4",
    "build_flavor" : "default",
    "build_type" : "deb",
    "build_hash" : "c63c7f5f8ce7d2e4805b7b3d842e7e792d84dda1",
    "build_date" : "2025-03-20T15:39:59.811110136Z",
    "build_snapshot" : false,
    "lucene_version" : "9.12.0",
    "minimum_wire_compatibility_version" : "7.17.0",
    "minimum_index_compatibility_version" : "7.0.0"
  },
  "tagline" : "You Know, for Search"
}
  1. IK 分词器的 GitHub 仓库 中找到版本下载
    上面找到的是 8.17.4
cd /usr/share/elasticsearch/plugins
wget https://release.infinilabs.com/analysis-ik/stable/elasticsearch-analysis-ik-8.17.4.zip
unzip elasticsearch-analysis-ik-{your_es_version}.zip -d ik
# 注意要删除zip包, 不然es 无法重新启动
  1. 授权
    确保 Elasticsearch 有足够的权限访问插件目录
chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/plugins/ik
  1. 重启 es
sudo systemctl restart elasticsearch
  1. 重新验证
from elasticsearch import Elasticsearch


        
class EsDemoHelper(object):
    username = "elastic"
    password = "Q9n6cbaeXGTUos2ppzWo"
    
    
    @classmethod
    def create_es_connection(cls):
        es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}],
                       basic_auth=(cls.username, cls.password),
                       )
        # 检查 Elasticsearch 连接是否成功
        if es.ping():
            print('Connected to Elasticsearch')
        else:
            print('Could not connect to Elasticsearch')

            exit()
        return es
        
    
if __name__ == "__main__":
    INDEX_NAME = "my_index_simple_demo"
    es = EsDemoHelper.create_es_connection()

    # 待分词的中文文本
    chinese_text = "这是一个关于中文分词的测试"

    # 构建分析请求体,使用默认分词器
    analyze_body = {
        "analyzer": "ik_max_word",
        "text": chinese_text
    }

    # 调用 Elasticsearch 的分析 API
    response = es.indices.analyze(body=analyze_body)

    # 打印分词结果
    print("分词结果:")
    for token in response['tokens']:
        print(token['token'])

输出

分词结果:
这是
一个
一
个
关于
中文
分词
的
测试