上传文件至 /

2025-09-24 19:46:17 +08:00
commit 2fb1c74f7b
3 changed files with 468 additions and 0 deletions
--- a/app.py
+++ b/app.py
@@ -0,0 +1,228 @@
+import base64
+from flask import Flask, request, render_template, redirect, url_for, jsonify
+import os
+import uuid
+from PIL import Image
+import re
+import json
+from ESConnect import *
+from openai import OpenAI
+# import config
+
+# 创建Flask应用实例
+app = Flask(__name__)
+# app.config.from_object(config.Config)
+
+# OCR和信息提取函数，使用大模型API处理图片并提取结构化信息
+def ocr_and_extract_info(image_path):
+    """
+    使用大模型API进行OCR识别并提取图片中的结构化信息
+    
+    参数:
+        image_path (str): 图片文件路径
+        
+    返回:
+        dict: 包含提取信息的字典，格式为 {'id': '', 'name': '', 'students': '', 'teacher': ''}
+    """
+    def encode_image(image_path):
+        """
+        将图片编码为base64格式
+        
+        参数:
+            image_path (str): 图片文件路径
+            
+        返回:
+            str: base64编码的图片字符串
+        """
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+
+    # 将图片转换为base64编码
+    base64_image = encode_image(image_path)
+
+    # 初始化OpenAI客户端，使用百度AI Studio的API
+    client = OpenAI(
+        api_key="188f57db3766e02ed2c7e18373996d84f4112272",
+        # 含有 AI Studio 访问令牌的环境变量，https://aistudio.baidu.com/account/accessToken,
+        base_url="https://aistudio.baidu.com/llm/lmapi/v3",  # aistudio 大模型 api 服务域名
+    )
+
+    # 调用大模型API进行图片识别和信息提取
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {'role': 'system', 'content': '你是一个能理解图片和文本的助手，请根据用户提供的信息进行回答。'},
+            {'role': 'user', "content": [
+                {"type": "text", "text": "请识别这张图片中的信息，只显示json不显示其它信息便于解析"
+                                         "以JSON格式返回（id对应比赛名称或论文名称，name对应项目名称，students对应参赛学生，teacher对应指导老师,出现多个名字用列表存储）"
+                                         "：{'id':'', 'name':'','students':'','teacher':''}"},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{base64_image}"
+                    }
+                }
+            ]}
+        ],
+        model="ernie-4.5-turbo-vl-32k",  # 使用百度文心大模型
+    )
+
+    # 获取API返回的文本内容
+    response_text = chat_completion.choices[0].message.content
+
+    def parse_respound(text):
+        """
+        解析API返回的文本，提取JSON数据
+        
+        参数:
+            text (str): API返回的文本
+            
+        返回:
+            dict or None: 解析成功返回字典，失败返回None
+        """
+        # 尝试直接解析标准JSON
+        try:
+            result=json.loads(text)
+            if result:
+                print("success")
+                return result
+        except json.JSONDecodeError:
+            print("无法解析标准json")
+            pass
+
+        # 提取markdown代码块中的内容
+        code_block = re.search(r'```json\n(.*?)```', text, re.DOTALL)
+        if code_block:
+            try:
+                result=json.loads(code_block.group(1))
+                if result:
+                    print("success")
+                    return result
+            except json.JSONDecodeError:
+                print("无法解析markdown")
+                pass
+
+        # 尝试替换单引号并解析
+        try:
+            fixed_json = text.replace("'", "\"")
+            result=json.loads(fixed_json)
+            if(result):
+                print("success")
+                return result
+        except json.JSONDecodeError:
+            print("无法替换单引号")
+            pass
+
+    # 解析API返回的文本
+    result_data = parse_respound(response_text)
+    return result_data
+
+    """
+    模拟大模型识别图像并返回结构化JSON。
+    实际应调用Qwen-VL或其他OCR+解析服务。
+    """
+
+
+# 首页路由
+@app.route('/')
+def index():
+    """
+    渲染首页模板
+    
+    返回:
+        str: 渲染后的HTML页面
+    """
+    return render_template('index.html')
+
+# 图片上传路由
+@app.route('/upload', methods=['POST'])
+def upload_image():
+    """
+    处理图片上传请求，调用OCR识别并存储结果
+    
+    返回:
+        JSON: 上传成功或失败的响应
+    """
+    # 获取上传的文件
+    file = request.files.get('file')
+    if not file:
+        return jsonify({"error": "No file uploaded"}), 400
+
+    # 保存上传的图片
+    filename = f"{uuid.uuid4()}_{file.filename}"
+    image_path = os.path.join("image", filename)
+    file.save(image_path)
+
+    # 调用大模型进行识别
+    try:
+        data = ocr_and_extract_info(image_path)  # 替换为真实AI接口调用
+        insert_data(data)  # 存入ES
+        return jsonify({"message": "成功录入", "data": data})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+# 搜索路由
+@app.route('/search')
+def search():
+    """
+    处理搜索请求，从Elasticsearch中检索匹配的数据
+    
+    返回:
+        JSON: 搜索结果列表
+    """
+    keyword = request.args.get('q')
+    if not keyword:
+        return jsonify([])
+    results = search_by_any_field(keyword)
+    print(results)
+    return jsonify(results)
+
+# 结果页面路由
+@app.route('/results')
+def results_page():
+    """
+    渲染搜索结果页面
+    
+    返回:
+        str: 渲染后的HTML页面
+    """
+    return render_template('results.html')
+
+# 显示所有数据路由
+@app.route('/all')
+def show_all():
+    """
+    获取所有数据并渲染到页面
+    
+    返回:
+        str: 渲染后的HTML页面，包含所有数据
+    """
+    all_data = search_all()
+    return render_template('all.html', data=all_data)
+
+# 删除数据路由
+@app.route('/delete/<doc_id>', methods=['POST'])
+def delete_entry(doc_id):
+    """
+    根据文档ID删除数据
+    
+    参数:
+        doc_id (str): 要删除的文档ID
+        
+    返回:
+        重定向到所有数据页面或错误信息
+    """
+    if delete_by_id(doc_id):
+        return redirect(url_for('show_all'))
+    else:
+        return "删除失败", 500
+
+
+
+# 主程序入口
+if __name__ == '__main__':
+    # 创建Elasticsearch索引
+    create_index_with_mapping()  
+    # 创建图片存储目录
+    os.makedirs("image", exist_ok=True)
+    # 启动Flask应用
+    app.run(use_reloader=False)