import configparser
import csv
import glob
import os
import re

import pandas as pd


def combinFiles(inFileName,outFileName):
    # 查找当前目录及其子目录下所有以.txt结尾的文件
    # files = glob.glob(r"D:\hg\2023\08\累计\贸易伙伴\202301-08--贸易伙伴--进口-307.csv", recursive=True)
    files = glob.glob(inFileName, recursive=True)
    # 创建一个空的DataFrame用于存储合并后的数据
    merged_data = pd.DataFrame()
    # 逐个读取CSV文件并合并到DataFrame中
    for file in files:
        try:
            print('------'+file)
            data = pd.read_csv(file, encoding='gbk',dtype=str, keep_default_na=False)
            # 尝试读取数据
            # df = pd.read_csv('filename.csv', error_bad_lines=False)
        except Exception as e2:
            print(e2)
        # 去掉最后一列
        data = data.iloc[:, :-1]
        dad=pd.DataFrame(data,dtype=str)
        # merged_data = merged_data.append(dad, ignore_index=True)
        merged_data =pd.concat([merged_data, dad], ignore_index=True)
    # 将合并后的数据保存到新的CSV文件中
    merged_data.to_csv(outFileName, encoding='gbk', index=False, quoting=1, quotechar='"', escapechar='\\')
    print(f'文件名称：{outFileName}')
    print('合并完成！！')
    return outFileName


def fileclearn(csvFile,outxlsFile,recordName,iEType_name):
    # file=r'D:\hg\2023\202307\202307--收发货地址商品--进口.csv'
    df = pd.read_csv(csvFile, encoding='gbk',dtype=str)
    # df = pd.read_csv(csvFile, encoding='gbk',dtype=str)
    # print(data.iloc[1:5])
    # 去掉最后一列
    # df = data.drop(data.columns[-1], axis=1)
    # 将商品编码的列转换为字符串类型
    df['商品编码'] = df['商品编码'].astype(str)
    # 在数字长度小于8的前面补0
    df['商品编码'] = df['商品编码'].str.zfill(8)
    df['商品名称'] =df['商品名称'].str.replace('\r', '')
    # 添加新列并放在最前面
    df.insert(0, '进出口标识', iEType_name)
    df.insert(0, '报告期', recordName)
    # 找到美元列的索引
    usd_column_index = df.columns.get_loc('美元')
    # 删除美元列及其后面的列
    df = df.iloc[:, :usd_column_index+1]
    # 去除逗号并将文本数据转换为数值型
    df['美元'] =df['美元'].str.replace(',', '').astype(float)
    df['第一数量'] = pd.to_numeric(df['第一数量'].str.replace(',', '').replace('-', ''), errors='coerce')
    # 将NaN值替换为0
    df['第一数量'] = df['第一数量'].fillna(0)
    df['第二数量'] =pd.to_numeric(df['第二数量'].str.replace(',', '').replace('-', ''), errors='coerce')
    df['第二数量'] = df['第二数量'].fillna(0)

    # 将处理后的DataFrame保存为xlsx文件
    #df.to_excel(outxlsFile, encoding='gbk', index=False, engine='openpyxl')
    df.to_excel(outxlsFile, index=False, engine='openpyxl')
    print('数据处理完成！')
    return outxlsFile

"""
文件合并
文件路径
D:\hg\2023\07\单月\收发货地址\*--进口.csv
D:\hg\2023\07\累计\收发货地址\

参数 year endmonth 字段类型 进出口类型

"""
def getFileName(year,startMonth,endMonth,field_name,iEType_name):
    path='D:\\hg\\'
    if startMonth<endMonth:
        start_str = '01'
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+start_str+'-'+end_str+'--'+field_name+'--'+iEType_name+'-*.csv'
        filepath=path+str(year)+'\\'+end_str+'\\累计\\'+field_name+'\\'
    else:
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+end_str+'--'+field_name+'--'+iEType_name+'-*.csv'
        filepath=path+str(year)+'\\'+end_str+'\\单月\\'+field_name+'\\'
    filename = os.path.join(filepath, filename)

    return filename

def getRecordFileName(year,startMonth,endMonth,field_name,iEType_name):
    path='D:\\hg\\'
    if startMonth<endMonth:
        start_str = '01'
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+start_str+'-'+end_str
    else:
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+end_str

    return filename

def getOutFileName(year,startMonth,endMonth,field_name,iEType_name):
    path='D:\\hg\\'
    if startMonth<endMonth:
        start_str = '01'
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+start_str+'-'+end_str+'--'+field_name+'商品--'+iEType_name+'.csv'
        filepath=path+str(year)+'\\'+end_str+'\\'
    else:
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+end_str+'--'+field_name+'商品--'+iEType_name+'.csv'
        filepath=path+str(year)+'\\'+end_str+'\\'

    filename = os.path.join(filepath, filename)

    return filename

def getOutFileNameXls(year,startMonth,endMonth,field_name,iEType_name):
    path='D:\\hg\\'
    if startMonth<endMonth:
        start_str = '01'
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+start_str+'-'+end_str+'--'+field_name+'商品--'+iEType_name+'.xlsx'
        filepath=path+str(year)+'\\'+end_str+'\\'
    else:
        end_str = "{:02d}".format(endMonth)
        filename=str(year)+end_str+'--'+field_name+'商品--'+iEType_name+'.xlsx'
        filepath=path+str(year)+'\\'+end_str+'\\'

    filename = os.path.join(filepath, filename)

    return filename

#读取配置并调用方法处理文件
def readConfig():
    config = configparser.ConfigParser()
    config.read('config.ini')
    years=config.get('param', 'year')
    endMonths=config.get('param', 'endMonth')
    field_names=['贸易伙伴']
    # field_names=['收发货地址','贸易方式']
    iEType_names=['进口','进出口','出口']
    for yy in years.split(','):
        year=int(yy)
        for emm in endMonths.split(','):
            endMonth=int(emm)
            startMonths=[1]
            if endMonth>1:
                startMonths.append(endMonth)
            for smm in startMonths:
                startMonth=int(smm)
                for field_name in field_names:
                    for iEType_name in iEType_names:
                        infileName=getFileName(year,startMonth,endMonth,field_name,iEType_name)
                        outFileName=getOutFileName(year,startMonth,endMonth,field_name,iEType_name)
                        outfileNameXls=getOutFileNameXls(year,startMonth,endMonth,field_name,iEType_name)
                        print(infileName)
                        print(outFileName)
                        # 合并文件
                        outfileNamecsv=combinFiles(infileName,outFileName)
                        #转换清洗保存成excel
                        recordName=getRecordFileName(year,startMonth,endMonth,field_name,iEType_name)
                        outfileNameXls=fileclearn(outfileNamecsv,outfileNameXls,recordName,iEType_name)
                        print(outfileNameXls)


if __name__ == '__main__':
    readConfig()


