修改数据结构

This commit is contained in:
2025-09-28 21:45:02 +08:00
parent 0ee24252ac
commit 657365f9de
4 changed files with 207 additions and 82 deletions

View File

@@ -15,7 +15,7 @@ AUTH = None # 如需认证则改为("用户名","密码")
es = Elasticsearch(["http://localhost:9200"])
# 定义索引名称和类型名称
index_name = "wordsearch2"
index_name = "wordsearch2666"
def create_index_with_mapping():
"""修正后的索引映射配置"""
@@ -23,22 +23,12 @@ def create_index_with_mapping():
mapping = {
"mappings": {
"properties": {
"id": {
"type": "text", # 改为text类型支持分词
"data": {
"type": "text", # 存储转换后的字符串,支持分词搜索
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"name": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"students": {"type": "keyword"}, # 仅保留type参数
"teacher": {"type": "keyword"}, # 仅保留type参数
"timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
"image": {"type": "keyword"}, # 存储图片路径或标识
}
}
}
@@ -54,8 +44,7 @@ def create_index_with_mapping():
def get_doc_id(data):
"""
根据关键字段生成唯一ID用于去重
可以根据实际需求调整字段组合
根据数据内容生成唯一ID用于去重
参数:
data (dict): 包含文档数据的字典
@@ -63,8 +52,10 @@ def get_doc_id(data):
返回:
str: 基于数据内容生成的MD5哈希值作为唯一ID
"""
# 组合关键字段生成唯一字符串
unique_str = f"{data['id']}{data['name']}{data['students']}{data['teacher']}"
# 使用data字段的内容生成唯一字符串
data_str = data.get('data', '')
image_str = data.get('image', '')
unique_str = f"{data_str}{image_str}"
# 使用MD5哈希生成唯一ID
return hashlib.md5(unique_str.encode('utf-8')).hexdigest()
@@ -184,47 +175,3 @@ def batch_write_data(data):
except requests.exceptions.HTTPError as e:
print(f"文档写入失败: {e.response.text}, 数据: {data}")
return False
def update_mapping():
# 定义新的映射配置
new_mapping = {
"properties": {
"id": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"name": {
"type": "text",
"analyzer": "ik_max_word"
},
"students": {
"type": "keyword"
},
"teacher": {
"type": "keyword"
}
}
}
# 执行PUT请求更新映射
try:
response = requests.put(
f"{ES_URL}/{index_name}/_mapping",
auth=AUTH,
json=new_mapping,
headers={"Content-Type": "application/json"}
)
response.raise_for_status()
print("索引映射更新成功")
print(response.json())
# 验证映射更新结果
verify = requests.get(
f"{ES_URL}/{index_name}/_mapping",
auth=AUTH
)
print("\n验证结果:")
print(verify.json())
except requests.exceptions.HTTPError as e:
print(f"请求失败: {e.response.text}")