
import configparser
import csv
import glob
import os
import shutil
import time

import pandas as pd
import redis
import requests
from datetime import datetime
'''
海关下载数据类型和参数分类组合
CODE_TS #商品编码  ORIGIN_COUNTRY  #贸易伙伴 TRADE_MODE #贸易方式 TRADE_CO_PORT #收发货地址
1.设置进出口类型 （默认进口，出口，进出口都进行下载）采用遍历的方式
2.设置查询起止时间 默认最新一个月的单月数据，和累计的数据下载 
3.设置币种 默认是usd  
4.查询字段分组 1.商品详情 四个都设置
5.单个统计数据下载 下载单个分组的数据
6.排序方式，使用默认的编码排序

7.下载文件路径设置和命名规则
d:/hg/2023/7/
数据默认存储位置 D://hg 
其它路径从参数中读取 
一级 年份 
二级月份 
三级月份类型单月，累计 
四级 币种 
五级 字段分组
六级 文件名

3、采集单个字段的统计数据

4.临时文件
1）将请求下载的文件放到临时目录中，
2）对临时的目录文件进行数据的过滤修改重命名保存到对应目录下
3）将临时文件删除
4）根据文件名和列表记录做对比，来下载缺失的文件

5.数据下载分类 
1）按照类型分组获取对应的每月的最新编码信息
2）根据字段编码和商品进行对应统计信息的下载
3）根据商品编码下载数据
6.添加文件内容格式校验
1）获取统计文件信息
2）获取下载的文件数据
3）计算对应的值，如果异常就删除
'''

class HgDownFile(object):

    def __init__(self):
        self.downUrl="http://stats.customs.gov.cn/queryData/downloadQueryData"
        # 创建ConfigParser对象
        self.config = configparser.ConfigParser()
        # 读取配置文件
        self.config.read('config.ini')
        self.r = redis.Redis(host=self.config.get('redis', 'host'),
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=0)

    def getcookie(self):
        cookie=self.r.spop('hgcookie')
        # cookie=self.r.srandmember('hgcookie')
        while cookie is None:
            time.sleep(10)
            cookie=self.r.srandmember('hgcookie')
            if cookie is not None:
                break
        cookie=cookie.decode('utf-8')
        cookie=cookie.strip('"')
        return cookie
    #请求下载文件
    def reqDownFile(self,data):

        data=data
        proxy={}
        # response=requests.post(url=self.downUrl,data=data,headers=header,verify=False,timeout=20)
        statuscode=410
        filename='数据文件.csv'
        while statuscode != 200:
            # time.sleep(5)
            try:
                # header={
                #     'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
                #     'Accept-Encoding':'gzip, deflate',
                #     'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
                #     'Cache-Control':'max-age=0',
                #     'Content-Type':'application/x-www-form-urlencoded',
                #     'Host':'stats.customs.gov.cn',
                #     'Origin':'http://stats.customs.gov.cn',
                #     'Proxy-Connection':'keep-alive',
                #     'Upgrade-Insecure-Requests':'1',
                #     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.64',
                #     'Cookie': self.getcookie()
                # }
                header={
                    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
                    'Accept-Encoding':'gzip, deflate',
                    'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
                    'Cache-Control':'max-age=0',
                    'Content-Length':'306',
                    'Content-Type':'application/x-www-form-urlencoded',
                    'Host':'stats.customs.gov.cn',
                    'Origin':'http://stats.customs.gov.cn',
                    'Proxy-Connection':'keep-alive',
                    #'Referer':'http://stats.customs.gov.cn/queryData/queryDataList?pageNum=1&codeLength=8&currentStartTime=202203&currentEndTime=202309&currentDateBySource=202309&selectTableState=3&orderType=CODE%20ASC%20DEFAULT&iEType=0&currencyType=usd&year=2022&startMonth=1&endMonth=11&monthFlag=&unitFlag=false&unitFlag1=false&outerField1=&outerField2=CODE_TS&outerField3=&outerField4=&outerValue1=&outerValue2=&outerValue3=&outerValue4=',
                    'Upgrade-Insecure-Requests':'1',
                    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.64',
                    'Cookie': self.getcookie()
                }
                data_str = '&'.join([f"{key}={value}" for key, value in data.items()])

                response=requests.post(url=self.downUrl,data=data,headers=header,verify=False,timeout=20)
                # response.encoding = response.apparent_encoding
                response.encoding = 'GB2312'
                statuscode=response.status_code
                if statuscode == 200:
                    try:
                        csv_content = response.text
                        count = csv_content.count("\n")
                        csv_content=csv_content.replace('\0', '')
                        print(count)
                        # filename='数据文件.csv'
                        tmppath='D:\\hg\\tmp'
                        # save_dir = os.path.dirname(tmppath)
                        os.makedirs(tmppath, exist_ok=True)
                        filename = os.path.join(tmppath, filename)
                        with open(filename, 'w') as file:
                            file.write(csv_content)
                        print('CSV文件下载保存成功！')
                        break
                    except Exception as e:
                        print(e)
                        statuscode=411

                else:
                    print('CSV文件下载保存失败！')

            except Exception as e:
                print(data)
                print(e)
                statuscode=412
                continue
            print(f'statuscode:{statuscode}')
        return filename
    #统计数据的文件路径设置单个字段
    def filepath(self,iEType,currencyType,year,startMonth,endMonth,outerField1):
        path='D:\\hg\\'
        field_name=self.getfieldName(outerField1)
        iEType_name=self.getiETypeName(iEType)
        if startMonth<endMonth:
            start_str = '01'
            end_str = "{:02d}".format(endMonth)
            try:
                filename=str(year)+start_str+'-'+end_str+'--'+field_name+'--'+iEType_name+'.csv'
                filepath=path+str(year)+'\\'+end_str+'\\累计\\'
            except Exception as e:
                print(e)
        else:
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+end_str+'--'+field_name+'--'+iEType_name+'.csv'
            filepath=path+str(year)+'\\'+end_str+'\\单月\\'

        save_dir = os.path.dirname(filepath)
        os.makedirs(save_dir, exist_ok=True)
        filename = os.path.join(save_dir, filename)

        return filename

    #统计数据的文件路径设置单个字段
    def codeFilepath(self,iEType,currencyType,year,startMonth,endMonth,outerField1,code):
        path='D:\\hg\\'
        field_name=self.getfieldName(outerField1)
        iEType_name=self.getiETypeName(iEType)
        if startMonth<endMonth:
            start_str = '01'
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+start_str+'-'+end_str+'--'+field_name+'--'+iEType_name+'-'+str(code)+'.csv'
            filepath=path+str(year)+'\\'+end_str+'\\累计\\'+field_name+'\\'
        else:
            end_str = "{:02d}".format(endMonth)
            filename=str(year)+end_str+'--'+field_name+'--'+iEType_name+'-'+str(code)+'.csv'
            filepath=path+str(year)+'\\'+end_str+'\\单月\\'+field_name+'\\'

        save_dir = os.path.dirname(filepath)
        os.makedirs(save_dir, exist_ok=True)
        filename = os.path.join(save_dir, filename)

        return filename

    def getfieldName(self,outerField1):
        field_name=''
        if 'CODE_TS' in outerField1:
            #商品信息
            field_name='商品'
        elif 'ORIGIN_COUNTRY' in outerField1:
            #国家
            field_name='贸易伙伴'
        elif 'TRADE_MODE' in outerField1:
            #
            field_name='贸易方式'
        elif 'TRADE_CO_PORT' in outerField1:
            #国内省份
            field_name='收发货地址'
        return field_name

    def getiETypeName(self,iEType):
        iETypeName=''
        if 0==iEType:
            iETypeName='出口'
        elif 1==iEType:
            iETypeName='进口'
        elif 10==iEType:
            iETypeName='进出口'

        return iETypeName

    #单个字段的参数设置
    def setparam(self,iEType,currencyType,year,startMonth,endMonth,outerField1):
        # 2022年 1-1  202202 2   2022年 1月之前数据是 2
        # 2022年 1-2  202202 3  2022年的累计数据是 3
        # 2022年 2-2  202202 1  2022年 1月之后数据是 1
        selectTableState=2
        if year<2022:
            selectTableState= 2  #202202前的数据为2 后的数据是1
        else:
            s=int(startMonth)
            e=int(endMonth)
            if year==2022 and s<e: #2022年累计数据单独设置参数
                selectTableState= 3
                if e==2:
                    selectTableState= 2
            elif year==2022 and e==1:
                selectTableState= 2
            elif year==2022 and s==e:
                selectTableState= 1  #202202前的数据为2 后的数据是1
        param={
            'pageSize': 10,
            'iEType': iEType,
            'currencyType': currencyType,
            'year': year,
            'startMonth': startMonth,
            'endMonth': endMonth,
            'monthFlag':'',
            'unitFlag': True,
            'unitFlag1': True,
            'codeLength': 8,
            'outerField1': outerField1,
            'outerField2':'',
            'outerField3':'',
            'outerField4':'',
            'outerValue1':'',
            'outerValue2':'',
            'outerValue3':'',
            'outerValue4':'',
            'orderType': 'CODE ASC DEFAULT',
            'selectTableState': selectTableState,  #202201前的数据为2 后的数据是1
            'currentStartTime': 202203,
        }
        return param

    #联合查询字段的参数设置
    def setcodesAndProductparam(self,iEType,currencyType,year,startMonth,endMonth,outerField1,filedCode):
        selectTableState= 1  #默认是1
        if year<2022:
            selectTableState= 2  #202203 前的数据为2
        else:
            s=int(startMonth)
            e=int(endMonth)
            if year==2022 and s<e: #2022年累计数据参数是3
                selectTableState= 3
                if e==2:
                    selectTableState= 2
            elif year==2022 and e==1:
                selectTableState= 2 #202203 1月的数据单月的参数是2
            elif year==2022 and s==e:
                selectTableState= 1  #202203除1月的数据单月的参数是1
        param={
            'pageSize': 10,
            'iEType': iEType,
            'currencyType': currencyType,
            'year': year,
            'startMonth': startMonth,
            'endMonth': endMonth,
            'monthFlag':'',
            'unitFlag': True,
            'unitFlag1': True,
            'codeLength': '8',
            'outerField1': outerField1,
            'outerField2':'CODE_TS',
            'outerField3':'',
            'outerField4':'',
            'outerValue1': filedCode,
            'outerValue2':'',
            'outerValue3':'',
            'outerValue4':'',
            'orderType': 'CODE ASC DEFAULT',
            'selectTableState': selectTableState,
            'currentStartTime': 202203,
        }
        return param

    #将临时文件放复制到目录中
    def tmpToFile(self,tmpfilename,filePathName):
        # 打开csv文件
        with open(tmpfilename, 'r') as file:
            # 创建csv阅读器
            csv_reader = csv.reader(file)
            # 使用len()函数获取行数
            line_count = len(list(csv_reader))
        if line_count > 9995:
            print('csv文件行数过大需要对编码进行拆分')
            os.remove(tmpfilename)
            return ''
        else:
            shutil.copy(tmpfilename, filePathName)

            os.remove(tmpfilename)
        return   filePathName

    def readcsv(self,filePath):
        codes=[]
        with open(filePath, newline='') as csvfile:
            reader = csv.reader(csvfile)
            #跳过第一条数据
            next(reader)
            for row in reader:
                # print(row[0])
                codes.append(row[0])
        return codes
    #下载获取字段的编码信息
    def field1Down(self,year,endMonth):
        fieldFileList=[]
        current_date = datetime.now()
        # year = current_date.year
        # year = int(self.config.get('param', 'year'))
        year = int(year)
        month = current_date.month
        iETypes=[0,1,10]
        outerFields=['CODE_TS','ORIGIN_COUNTRY','TRADE_MODE','TRADE_CO_PORT']
        # outerFields=['CODE_TS']
        currencyType='usd'

        if endMonth==1:
            startMonths=[1]
        else:
            startMonths=[1,endMonth]
        for startMonth in startMonths:
            for iEType in iETypes:
                for outerField1 in outerFields:
                    param=self.setparam(iEType,currencyType,year,startMonth,endMonth,outerField1)
                    filePathName=self.filepath(iEType,currencyType,year,startMonth,endMonth,outerField1)
                    fieldFileList.append(filePathName)
                    if os.path.exists(filePathName):
                        continue
                    tmpfilename=self.reqDownFile(param)
                    saveFileName=self.tmpToFile(tmpfilename,filePathName)
                    print(saveFileName)

        return fieldFileList
    #下载贸易方式商品，贸易伙伴商品，注册地商品 的统计信息
    #1.从单个统计文件中获取对应的贸易编码，
    #2.对每个贸易编码进行文件下载
    #3.对下载的文件进行合并清洗重命名
    def fieldCodeDown(self,iEType,currencyType,year,startMonth,endMonth,outerField1,codes):
        codeFileList=[]
        for code in codes:
            param=self.setcodesAndProductparam(iEType,currencyType,year,startMonth,endMonth,outerField1,code)
            filePathName=self.codeFilepath(iEType,currencyType,year,startMonth,endMonth,outerField1,code)
            if os.path.exists(filePathName):
                print(f'文件已存在{filePathName}')
                codeFileList.append(filePathName)
                continue
            #进行数据下载返回临时文件
            tmpfilename=self.reqDownFile(param)
            #校验临时的金额是否跟统计文件中的对应
            flagg=self.verifyFile(tmpfilename,year,startMonth,endMonth,outerField1,iEType,currencyType)
            #将临时文件的数据复制到指定文件中
            if flagg:
                saveFileName=self.tmpToFile(tmpfilename,filePathName)
                print(saveFileName)
                codeFileList.append(saveFileName)
            else:
                os.remove(tmpfilename)
        return codeFileList

    def verifyFile(self,tmpfilename,year,startMonth,endMonths,outerField1,iEType,currencyType):
        flag=False
        path='D:\\hg\\'
        years=year
        endMonths=endMonths
        end_str=int(endMonths)
        startMonths=startMonth
        if startMonths<=end_str:
            filePathName=self.filepath(iEType,currencyType,year,startMonth,end_str,outerField1)
            try:
                dfAll = pd.read_csv(filePathName, encoding='gbk',dtype=str)
                dfAll['美元'] = dfAll['美元'].str.replace(',', '').astype(float)
                ddf = pd.read_csv(tmpfilename, encoding='gbk',dtype=str)
                column_sum = pd.to_numeric(ddf['美元'].str.replace(',', '').astype(float)).sum()
                codeId=ddf.iloc[0, 0]
                fieldNm=''
                if 'ORIGIN_COUNTRY' in outerField1:
                    fieldNm='贸易伙伴'
                if 'TRADE_MODE' in outerField1:
                    fieldNm='贸易方式'
                if 'TRADE_CO_PORT' in outerField1:
                    fieldNm='收发货地址'
                if fieldNm=='收发货地址':
                    row =dfAll.loc[dfAll['注册地编码']==codeId]
                else:
                    row =dfAll.loc[dfAll[fieldNm+'编码']==codeId]
                try:
                    usvalue = row.at[row.index[-1], '美元']
                    if usvalue==column_sum:
                        flag=True
                except Exception as e:
                    print(e)
            except Exception as e22:
                print(e22)
        return flag

    def codeFieldDown(self,fieldFileList,year,endMonth):
        current_date = datetime.now()
        # year = current_date.year
        year = int(year)
        for fieldFile in fieldFileList:
            #['CODE_TS','ORIGIN_COUNTRY','TRADE_MODE','TRADE_CO_PORT']
            try:
                if '商品' in fieldFile:
                    continue
                if '贸易伙伴' in fieldFile:
                    outerField1=['ORIGIN_COUNTRY']
                if '贸易方式' in fieldFile:
                    outerField1=['TRADE_MODE']
                if '收发货地址' in fieldFile:
                    outerField1=['TRADE_CO_PORT']
                if '单月' in fieldFile:
                    startMonth=endMonth
                if '累计' in fieldFile:
                    startMonth=1
                if '--进口' in fieldFile:
                    iEType=1
                if '--出口' in fieldFile:
                    iEType=0
                if '--进出口' in fieldFile:
                    iEType=10
                currencyType='usd'
                codes=hgDownFile.readcsv(fieldFile)
                codeFileList=hgDownFile.fieldCodeDown(iEType,currencyType,year,startMonth,endMonth,outerField1,codes)
                print(f'codes:{len(codeFileList)}')
                print(len(codeFileList))
                while len(codes)!= len(codeFileList):
                    print('+++++++++++++')
                    codeFileList=hgDownFile.fieldCodeDown(iEType,currencyType,year,startMonth,endMonth,outerField1,codes)

            except Exception as e:
                print(e)


if __name__ == '__main__':
    hgDownFile=HgDownFile()
    # hgDownFile.fileNameleiji()
    # hgDownFile.fileNamedanyue()
    # hgDownFile.tmpToFile(tmpfilename,filePathName)

    ss=hgDownFile.config.get('param', 'endMonth')
    yss=hgDownFile.config.get('param', 'year')
    for ye in yss.split(','):
        year=int(ye)
        for s in ss.split(','):
            endMonth=int(s)
            fieldFileList=hgDownFile.field1Down(year,endMonth)
            if endMonth==1:
                while len(fieldFileList)< 12:
                    fieldFileList=hgDownFile.field1Down(year,endMonth)
                    if len(fieldFileList)>= 12:
                        break
            else:
                while len(fieldFileList)< 24:
                    fieldFileList=hgDownFile.field1Down(year,endMonth)
                    if len(fieldFileList)>= 24:
                        break
            for i in range(1,3):
                print('_______________')
                hgDownFile.codeFieldDown(fieldFileList,year,endMonth)
