修改数据结构
This commit is contained in:
		
							
								
								
									
										71
									
								
								ESConnect.py
									
									
									
									
									
								
							
							
						
						
									
										71
									
								
								ESConnect.py
									
									
									
									
									
								
							@@ -15,7 +15,7 @@ AUTH = None  # 如需认证则改为("用户名","密码")
 | 
			
		||||
es = Elasticsearch(["http://localhost:9200"])
 | 
			
		||||
 | 
			
		||||
# 定义索引名称和类型名称
 | 
			
		||||
index_name = "wordsearch2"
 | 
			
		||||
index_name = "wordsearch2666"
 | 
			
		||||
 | 
			
		||||
def create_index_with_mapping():
 | 
			
		||||
    """修正后的索引映射配置"""
 | 
			
		||||
@@ -23,22 +23,12 @@ def create_index_with_mapping():
 | 
			
		||||
    mapping = {
 | 
			
		||||
        "mappings": {
 | 
			
		||||
            "properties": {
 | 
			
		||||
                "id": {
 | 
			
		||||
                    "type": "text",  # 改为text类型支持分词
 | 
			
		||||
                "data": {
 | 
			
		||||
                    "type": "text",  # 存储转换后的字符串,支持分词搜索
 | 
			
		||||
                    "analyzer": "ik_max_word",
 | 
			
		||||
                    "search_analyzer": "ik_smart"
 | 
			
		||||
                },
 | 
			
		||||
                "name": {
 | 
			
		||||
                    "type": "text",
 | 
			
		||||
                    "analyzer": "ik_max_word",
 | 
			
		||||
                    "search_analyzer": "ik_smart"
 | 
			
		||||
                },
 | 
			
		||||
                "students": {"type": "keyword"},  # 仅保留type参数
 | 
			
		||||
                "teacher": {"type": "keyword"},    # 仅保留type参数
 | 
			
		||||
                "timestamp": {
 | 
			
		||||
                    "type": "date",
 | 
			
		||||
                    "format": "strict_date_optional_time||epoch_millis"
 | 
			
		||||
                }
 | 
			
		||||
                "image": {"type": "keyword"},  # 存储图片路径或标识
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -54,8 +44,7 @@ def create_index_with_mapping():
 | 
			
		||||
 | 
			
		||||
def get_doc_id(data):
 | 
			
		||||
    """
 | 
			
		||||
    根据关键字段生成唯一ID(用于去重)
 | 
			
		||||
    可以根据实际需求调整字段组合
 | 
			
		||||
    根据数据内容生成唯一ID(用于去重)
 | 
			
		||||
    
 | 
			
		||||
    参数:
 | 
			
		||||
        data (dict): 包含文档数据的字典
 | 
			
		||||
@@ -63,8 +52,10 @@ def get_doc_id(data):
 | 
			
		||||
    返回:
 | 
			
		||||
        str: 基于数据内容生成的MD5哈希值作为唯一ID
 | 
			
		||||
    """
 | 
			
		||||
    # 组合关键字段生成唯一字符串
 | 
			
		||||
    unique_str = f"{data['id']}{data['name']}{data['students']}{data['teacher']}"
 | 
			
		||||
    # 使用data字段的内容生成唯一字符串
 | 
			
		||||
    data_str = data.get('data', '')
 | 
			
		||||
    image_str = data.get('image', '')
 | 
			
		||||
    unique_str = f"{data_str}{image_str}"
 | 
			
		||||
    # 使用MD5哈希生成唯一ID
 | 
			
		||||
    return hashlib.md5(unique_str.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
@@ -184,47 +175,3 @@ def batch_write_data(data):
 | 
			
		||||
    except requests.exceptions.HTTPError as e:
 | 
			
		||||
        print(f"文档写入失败: {e.response.text}, 数据: {data}")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
def update_mapping():
 | 
			
		||||
    # 定义新的映射配置
 | 
			
		||||
    new_mapping = {
 | 
			
		||||
        "properties": {
 | 
			
		||||
            "id": {
 | 
			
		||||
                "type": "text",
 | 
			
		||||
                "analyzer": "ik_max_word",
 | 
			
		||||
                "search_analyzer": "ik_smart"
 | 
			
		||||
            },
 | 
			
		||||
            "name": {
 | 
			
		||||
                "type": "text",
 | 
			
		||||
                "analyzer": "ik_max_word"
 | 
			
		||||
            },
 | 
			
		||||
            "students": {
 | 
			
		||||
                "type": "keyword"
 | 
			
		||||
            },
 | 
			
		||||
            "teacher": {
 | 
			
		||||
                "type": "keyword"
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # 执行PUT请求更新映射
 | 
			
		||||
    try:
 | 
			
		||||
        response = requests.put(
 | 
			
		||||
            f"{ES_URL}/{index_name}/_mapping",
 | 
			
		||||
            auth=AUTH,
 | 
			
		||||
            json=new_mapping,
 | 
			
		||||
            headers={"Content-Type": "application/json"}
 | 
			
		||||
        )
 | 
			
		||||
        response.raise_for_status()
 | 
			
		||||
        print("索引映射更新成功")
 | 
			
		||||
        print(response.json())
 | 
			
		||||
 | 
			
		||||
        # 验证映射更新结果
 | 
			
		||||
        verify = requests.get(
 | 
			
		||||
            f"{ES_URL}/{index_name}/_mapping",
 | 
			
		||||
            auth=AUTH
 | 
			
		||||
        )
 | 
			
		||||
        print("\n验证结果:")
 | 
			
		||||
        print(verify.json())
 | 
			
		||||
    except requests.exceptions.HTTPError as e:
 | 
			
		||||
        print(f"请求失败: {e.response.text}")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user