# -*- coding: utf-8 -*-
# @Time : 2023/3/20 11:43
# @Author : bruxellse_li
# @File : app_run.py
# @Project : 从word中提取指定表格
from docx import Document
from flask import Flask, send_file, jsonify
from flask import request, Response
import requests
from extract_table import get_choose_table, get_other_table, get_other1_table
from extract_factor import get_text_from_docx, Extract, get_cover_content_from_docx, Other_Extract
import json, re
from utils.log import logger
import subprocess
from pathlib import Path
import traceback
from fdfs_client.client import *
from generate.gen_user_report_auto_generated import main_process
from generate.platform_generated import pl_process
from TextRewriting import get_list_result
from sentence_split import qx_correct, ner_correct
from utils.database_mysql import DatabaseMySQL

# 定义数据库链接基础信息
database_config = {
    'host': '114.115.205.50',
    # 'host': '114.116.44.11',
    'port': 3306,
    'user': 'root',
    # 'password': 'f7s0&7qqtK',
    'password': 'yotop@123456',
    'database': 'clb_project'
}
dbm = DatabaseMySQL(database_config=database_config)
temp_url = "http://114.115.215.96/"


app = Flask(__name__)
UPLOAD_FOLDER = r'data/'       # 上传路径
Path(UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
abs_path = os.path.split(os.path.realpath(__file__))[0]
ALLOWED_EXTENSIONS = set(['doc', 'docx'])   # 允许上传的文件类型

# 文件上传服务器定义
tracker_conf = get_tracker_conf("data/fdfs_client.conf")
client = Fdfs_client(tracker_conf)
# 跨域支持1
from flask_cors import CORS

CORS(app, supports_credentials=True)


def allowed_file(filename):
    # 验证上传的文件名是否符合要求，文件名必须带点并且符合允许上传的文件类型要求，两者都满足则返回 true
    if '.' in filename and filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS:
        return filename


def doc2docx(doc_path, docx_path):
    # 使用LibreOffice将doc文件转换为docx文件
    subprocess.call(['libreoffice', '--headless', '--convert-to', 'docx', doc_path, '--outdir', os.path.dirname(docx_path)])
    # 将转换后的docx文件重命名为目标文件名
    os.rename(os.path.splitext(doc_path)[0] + '.docx', docx_path)


@app.route('/extract_special_table', methods=['POST'])
def extract_special_table():
    # table_params ——['资产负债表', '收入费用表（1）', '收入费用表（2）']
    data = request.get_json()
    file_request = data["path"]
    filename = file_request.split("/")[-1]
    if ".doc" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        doc_path = os.path.join(UPLOAD_FOLDER, filename)

        # 获取文件路径前缀
        new_path = os.path.splitext(doc_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(doc_path, new_path)

        document = Document(new_path)
    elif ".docx" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)
        document = Document(os.path.join(UPLOAD_FOLDER, filename))
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    table_names = ["以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明"]
    data_result = get_other_table(document, table_names)
    if data_result["以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明"]:
        data_result = data_result
    else:
        temp_table_result = get_other_table(document, ["本单位无以名义金额计量的资产"])
        del data_result["以名义金额计量的资产名称、数量等情况，以及以名义金额计量理由的说明"]
        data_result.update(temp_table_result)
    os.remove(os.path.join(UPLOAD_FOLDER, filename))
    return Response(json.dumps(data_result, ensure_ascii=False), content_type='application/json')


@app.route('/extract_other_table1', methods=['POST'])
def extract_special_table1():
    # table_params ——['资产负债表', '收入费用表（1）', '收入费用表（2）']
    data = request.get_json()
    file_request = data["path"]
    filename = file_request.split("/")[-1]
    if ".doc" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        doc_path = os.path.join(UPLOAD_FOLDER, filename)

        # 获取文件路径前缀
        new_path = os.path.splitext(doc_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(doc_path, new_path)

        document = Document(new_path)
    elif ".docx" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)
        document = Document(os.path.join(UPLOAD_FOLDER, filename))
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    table_names = ['货币资金明细信息如下']
    data_result = get_other1_table(document, table_names)
    os.remove(os.path.join(UPLOAD_FOLDER, filename))
    return Response(json.dumps(data_result, ensure_ascii=False), content_type='application/json')


@app.route('/extract_table', methods=['POST'])
def extract_table():
    # table_params ——['资产负债表', '收入费用表（1）', '收入费用表（2）']
    data = request.get_json()
    file_request = data["path"]
    filename = file_request.split("/")[-1]
    if ".doc" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        doc_path = os.path.join(UPLOAD_FOLDER, filename)
        print(doc_path)
        # 获取文件路径前缀
        new_path = os.path.splitext(doc_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(doc_path, new_path)

        document = Document(new_path)
    elif ".docx" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)
        document = Document(os.path.join(UPLOAD_FOLDER, filename))
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    table_params = ['资产负债表', '收入费用表（1）', '收入费用表（2）']
    data = get_choose_table(document, table_params)
    # 解析上级关系
    # 处理资产负债表
    temp_list = data["资产负债表"]
    temp_dict = {}

    for temp in temp_list:
        temp_text = re.sub(":", "：", temp["项目"])
        pro_text = temp_text[-1]

        if pro_text == "：":
            temp_dict.update({"temp_key": temp_text})
            continue
        elif pro_text.isdigit():
            temp["项目"] = temp_text[:-1]
            temp["上级项目"] = temp_dict["temp_key"].strip("：")
        else:
            temp["上级项目"] = temp_dict["temp_key"].strip("：")

    # 处理收入费用表（1）
    temp_list_0 = data["收入费用表（1）"]
    temp_dict_0 = {"temp_key": "收入合计"}

    for temp_0 in temp_list_0:
        # 先判断字段名是否为数字
        temp_text = temp_0["项目"]
        pro_text = temp_text[-1]
        if pro_text.isdigit():
            temp_0["项目"] = temp_text[:-1]
        else:
            temp_0["项目"] = temp_text

        if temp_0["项目"].strip() == "收入合计":
            temp_dict_0.update({"temp_key": "费用合计"})
        else:
            if temp_0["项目"].strip() == "费用合计" or temp_0["项目"].strip() == "本年盈余":
                continue
            else:
                temp_0["上级项目"] = temp_dict_0["temp_key"]

    # 处理收入费用表（2）
    temp_list_1 = data["收入费用表（2）"]
    temp_dict_1 = {"temp_key": "收入合计"}
    for temp_1 in temp_list_1:
        # 先判断字段名是否为数字
        temp_text = temp_1["项目"]
        pro_text = temp_text[-1]

        if pro_text.isdigit():
            temp_1["项目"] = temp_text[:-1]
        else:
            temp_1["项目"] = temp_text

        if temp_1["项目"].strip() == "收入合计":
            temp_dict_1.update({"temp_key": "费用合计"})
        else:
            if temp_1["项目"].strip() == "费用合计" or temp_1["项目"].strip() == "本年盈余":
                continue
            else:
                temp_1["上级项目"] = temp_dict_1["temp_key"]
    os.remove(os.path.join(UPLOAD_FOLDER, filename))

    return Response(json.dumps(data, ensure_ascii=False), content_type='application/json')


@app.route('/extract_factor', methods=['POST'])
def extract_factor():
    data = request.get_json()
    file_request = data["path"]
    filename = file_request.split("/")[-1]
    if ".doc" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        doc_path = os.path.join(UPLOAD_FOLDER, filename)
        new_path = os.path.splitext(doc_path)[0] + '.docx'

        doc2docx(doc_path, new_path)
        document = get_text_from_docx(new_path)

    elif ".docx" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        document = get_text_from_docx(os.path.join(UPLOAD_FOLDER, filename))
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    data = Extract().extract_result(document)
    os.remove(os.path.join(UPLOAD_FOLDER, filename))
    return Response(json.dumps(data, ensure_ascii=False), content_type='application/json')


@app.route('/generate_report', methods=['POST'])
def generate_report():
    """
    template_path : 模板文件下载地址
    document_path: 半成品文件下载地址
    report_name: 报告名称
    data_object: 待填充数据
    :return:
    """
    data = request.get_json()
    template_request = data["template_path"]
    doc_request = data["document_path"]
    report_name = data["report_name"] + ".docx"
    data_object = data["object"]["data_object"]
    tables_dict = data["object"]["tables_dict"]
    current_filename = time.strftime('%Y_%m_%d-%H_%M_%S') + ".docx"
    save_path = UPLOAD_FOLDER + "/" + current_filename
    # 先判断是否是docx 格式
    template_filename = template_request.split("/")[-1]
    # 文件先下载再判断是否转换
    if ".doc" in template_request:
        r = requests.get(template_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, template_filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        temp_template_path = os.path.join(UPLOAD_FOLDER, template_filename)
        # 获取文件路径前缀
        template_path = os.path.splitext(temp_template_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(temp_template_path, template_path)

    elif ".docx" in template_request:
        r = requests.get(template_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, template_filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)
        template_path = os.path.join(UPLOAD_FOLDER, template_filename)
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    doc_filename = doc_request.split("/")[-1]
    if ".doc" in doc_request:
        r1 = requests.get(doc_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, doc_filename), "wb") as f1:
            for chunk in r1.iter_content(chunk_size=512):
                f1.write(chunk)
        temp_doc_path = os.path.join(UPLOAD_FOLDER, doc_filename)
        # 获取文件路径前缀
        doc_path = os.path.splitext(temp_doc_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(temp_doc_path, doc_path)

        half_work_path = doc_path
    elif ".docx" in doc_request:
        r1 = requests.get(doc_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, doc_filename), "wb") as f1:
            for chunk in r1.iter_content(chunk_size=512):
                f1.write(chunk)
        half_work_path = os.path.join(UPLOAD_FOLDER, doc_filename)
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    main_process(half_work_path, tables_dict, template_path, report_name, data_object, save_path)
    os.remove(os.path.join(UPLOAD_FOLDER, template_filename))
    os.remove(os.path.join(UPLOAD_FOLDER, doc_filename))
    send_path = os.path.join(UPLOAD_FOLDER, report_name)
    # "data/财务报告.docx"
    ret_upload = client.upload_by_filename(send_path)
    return ret_upload["Remote file_id"]


@app.route('/gx_app', methods=['POST'])
def gx_app():
    try:
        data = request.get_json()
        text = data["content"] if "content" in data else ""
        list_result = get_list_result(text)
        dict_result = {
                          'code': 300,
                          'msg': 'success',
                          'isHandleSuccess': True,
                          'result_data': list_result
                        }
    except Exception as e:
        traceback.print_exc()
        dict_result = {
                        'code': 500,
                        'msg': 'fail' + str(e),
                        'isHandleSuccess': False,
                        'result_data': None
                  }
    return jsonify(dict_result)


@app.route('/ner_error', methods=(['POST']))
def ner_error():
    data = request.get_json()
    text = data['content']
    param = data["param"].strip()
    try:
        content, right_result = ner_correct(text, param)
    except Exception as e:
        return json.dumps({'code': '500',
                           'message': 'failure' + str(e),
                           'resultData': None})
    return json.dumps({'code': '200',
                       'message': 'success',
                       'resultData': {'contentDetails': right_result,
                                      'correctContent': content}})


@app.route('/qx_error', methods=(['POST']))
def qx_error():
    data = request.get_json()
    text = data['content']
    try:
        content, right_result = qx_correct(text)
    except Exception as e:
        return json.dumps({'code': '500',
                           'message': 'failure' + str(e),
                           'resultData': None})
    return json.dumps({'code': '200',
                       'message': 'success',
                       'resultData': {'contentDetails': right_result,
                                      'correctContent': content}})


@app.route('/platform_report', methods=['POST'])
def platform_report():
    """
    task_id: 任务id
    :return:
    """
    data = request.get_json()
    task_id = data["task_id"].strip()
    # todo: 基于任务id来获取数据信息
    # dataset_sql = '''SELECT ds.id,ds.param_value,te.file_path FROM clb_report_task t inner join clb_report_template te on t.template_id = te.id
    #    inner join clb_report_data_set ds on te.data_set_id = ds.id
    #    where t.id  = {};'''.format(task_id)
    dataset_sql = """SELECT ds.id,te.file_path FROM clb_report_task t inner join clb_report_template te on t.template_id = te.id 
                            inner join clb_report_data_set ds on te.data_set_id = ds.id
                            where t.id  = {};""".format(task_id)
    dbm = DatabaseMySQL(database_config=database_config)
    dataset_result = dbm.query(sql=dataset_sql)[0]
    dataset_id, temp_path = dataset_result["id"], dataset_result["file_path"]
    # dataset_id, param_value, temp_path = dataset_result["id"], dataset_result["param_value"], dataset_result[
    #     "file_path"]
    # print(type(param_value))
    # param_value = json.loads(param_value)
    # todo: 再基于数据集id 获取数据集地址，参数，返回数据类型，数据对象
    # data_source_sql = """select ds.url,ds.params,ds.type,ds.data_name from clb_report_data_source ds inner join clb_report_data_set_source_map m on ds.id = m.data_source_id
    #     where m.data_set_id = {};""".format(dataset_id)
    data_source_sql = """select ds.url,m.param_value,ds.type,m.return_object from clb_report_data_source ds inner join clb_report_data_set_source_map m on ds.id = m.data_source_id
        where m.data_set_id = {};""".format(dataset_id)
    datasource_result_list = dbm.query(sql=data_source_sql)
    # 关闭数据库连接
    # dbm.close()
    # todo: 第一步：基于模板路径和模板url 获取下载模板链接
    template_request = temp_url + "/" + temp_path
    # 先判断是否是docx 格式
    template_filename = template_request.split("/")[-1]
    # print(template_filename)
    # print(template_request)
    # 文件先下载再判断是否转换
    if ".doc" in template_request:
        r = requests.get(template_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, template_filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        temp_template_path = os.path.join(UPLOAD_FOLDER, template_filename)
        # 获取文件路径前缀
        template_path = os.path.splitext(temp_template_path)[0] + '.docx'
        # 将doc转换为docx
        doc2docx(temp_template_path, template_path)

    elif ".docx" in template_request:
        r = requests.get(template_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, template_filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)
        template_path = os.path.join(UPLOAD_FOLDER, template_filename)
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"
    # todo: 第二步：基于数据源信息获取数据对象
    dict_data = dict()
    for datasource_result in datasource_result_list:
        dataset_url = datasource_result["url"]
        params = datasource_result["param_value"]
        # 4月23号调整
        dict_param = json.loads(params)
        # {"id": 2, "name": "nihao"}
        connect_list = []
        for key, value in dict_param.items():
            connect_param = str(key) + "=" + str(value)
            connect_list.append(connect_param)
        # params_list = params.split(",")
        # connect_list = []
        # for temp_param in params_list:
        #     connect_param = temp_param + "=" + str(param_value[temp_param])
        #     connect_list.append(connect_param)
        request_str = "&".join(connect_list)
        dataset_request = dataset_url + "?" + request_str
        str_dataset_info = requests.get(dataset_request, stream=True)
        logger.info(str_dataset_info.content)
        dataset_info = json.loads(str_dataset_info.content)
        data_name = datasource_result["return_object"]
        dict_data[data_name] = dataset_info["result"]
    logger.info(dict_data)
    # 定义平台临时文件名
    report_name = "平台模板样例报告.docx"
    pl_process(template_path, dict_data, report_name)
    os.remove(os.path.join(UPLOAD_FOLDER, template_filename))
    send_path = os.path.join(UPLOAD_FOLDER, report_name)
    # "data/财务报告.docx"
    ret_upload = client.upload_by_filename(send_path)
    return ret_upload["Remote file_id"]


@app.route('/extract_cover_factor', methods=['POST'])
def extract_cover_factor():
    data = request.get_json()
    file_request = data["path"]
    filename = file_request.split("/")[-1]
    if ".doc" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        doc_path = os.path.join(UPLOAD_FOLDER, filename)
        new_path = os.path.splitext(doc_path)[0] + '.docx'

        doc2docx(doc_path, new_path)
        cover_contents, other_contents = get_cover_content_from_docx(new_path)

    elif ".docx" in file_request:
        r = requests.get(file_request, stream=True)
        with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
            for chunk in r.iter_content(chunk_size=512):
                f.write(chunk)

        cover_contents, other_contents = get_cover_content_from_docx(os.path.join(UPLOAD_FOLDER, filename))
    else:
        return "上传文件格式有误，当前仅支持doc 和 docx 格式，请选择正确文件重新上传！"

    cover_pattern = re.compile(r"([0-9]{0,4}).*(?=(财务报告))")
    # print(content)
    cover_group = cover_pattern.search(cover_contents)
    if cover_group:
        cover_text = cover_group.group().strip()
    else:
        cover_text = ""
    # todo: 处理其它的字段信息
    other_extract = Other_Extract()
    other_data = other_extract.extract_other_result(other_contents)
    other_data["reportTitle"] = cover_text
    os.remove(os.path.join(UPLOAD_FOLDER, filename))
    return Response(json.dumps(other_data, ensure_ascii=False), content_type='application/json')


if __name__ == '__main__':
    app.config['JSON_AS_ASCII'] = False
    app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
    app.run(host='0.0.0.0', port=4000, debug=False)
