9/18

88707bfa · 薛凌堃 · d27ed8e5 · 88707bfa · 88707bfa · 88707bfa
--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
@@ -55,9 +55,9 @@ def closeSql(cnx,cursor):
 def NewsEnterprise():
    cnx,cursor = connectSql()
    # #获取国内企业
-    gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
-    cursor.execute(gn_query)
-    gn_result = cursor.fetchall()
+    # gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
+    # cursor.execute(gn_query)
+    # gn_result = cursor.fetchall()
    #获取国外企业
    gw_query = "select SocialCode from EnterpriseInfo where Place = '2'"
    cursor.execute(gw_query)
@@ -66,11 +66,11 @@ def NewsEnterprise():
    gw_social_list = [item[0] for item in gw_result]
    #todo:打印长度
    # print(len(gw_social_list))
-    gn_social_list = [item[0] for item in gn_result]
+    # gn_social_list = [item[0] for item in gn_result]
    print('=======')
    #将数据插入到redis中
-    for item in gn_social_list:
-        r.rpush('NewsEnterprise:gnqy_socialCode', item)
+    # for item in gn_social_list:
+    #     r.rpush('NewsEnterprise:gnqy_socialCode', item)

    for item in gw_social_list:
        r.rpush('NewsEnterprise:gwqy_socialCode', item)
@@ -275,7 +275,7 @@ def AnnualEnterpriseXueQ_task():
 def AnnualEnterpriseUS():
    cnx,cursor = connectSql()
    # 获取美股企业
-    us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode is not null"
+    us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode is not null and  CreateTime='2023-08-15 14:00:00'"
    # us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode = 'BP' "
    #ZZSN22080900000025
    cursor.execute(us_query)
@@ -387,6 +387,44 @@ def yahooCode_task():
        print('定时采集异常', e)
        pass

+#新三板
+def NQEnterprise():
+    cnx, cursor = connectSql()
+
+    nq_query = "select gpdm from NQEnterprise where QCCID is not null "
+    cursor.execute(nq_query)
+    nq_result = cursor.fetchall()
+    nq_social_list = [item[0] for item in nq_result]
+
+    for item in nq_social_list:
+        # r.rpush('NQEnterprise:nq_Ipo', item)
+        r.rpush('NQEnterprise:nq_finance',item)
+        # r.rpush('NQEnterprise:nq_notice',item)
+    closeSql(cnx, cursor)
+
+def omeng():
+    cnx, cursor = connectSql()
+
+    # om_query = " SELECT A.SocialCode FROM EnterpriseInfo A , EnterpriseType B WHERE B.TYPE=6 AND A.SocialCode=B.SocialCode AND A.Place=1 order by A.id desc "
+    om_query = "  SELECT A.SocialCode FROM EnterpriseInfo A , EnterpriseType B WHERE B.TYPE=6 AND A.SocialCode=B.SocialCode AND A.Place=2 "
+    cursor.execute(om_query)
+    om_result = cursor.fetchall()
+    om_social_list = [item[0] for item in om_result]
+
+    for item in om_social_list:
+        #企业基本信息
+        r.rpush('gwOMEnterprise_socialcode:BaseInfo', item)
+        #企业高管信息
+        # r.rpush('gnOMEnterprise_socialcode:TYCid', item)
+        # r.rpush('gnOMEnterprise_socialcode:CenterPerson', item)
+        #企业动态
+        r.rpush('gwOMEnterprise_socialcode:News', item)
+        #企业年报
+        # r.rpush('gnOMEnterprise_socialcode:Report', item)
+        #企业公告
+        # r.rpush('gnOMEnterprise_socialcode:Notice', item)
+    closeSql(cnx, cursor)
+

 if __name__ == "__main__":
    start = time.time()
@@ -399,7 +437,9 @@ if __name__ == "__main__":
    # BaseInfoEnterprise()
    # FBS()
    # MengZhi()
-    AnnualEnterpriseUS()
+    NQEnterprise()
+    # omeng()
+    # AnnualEnterpriseUS()
    # NoticeEnterprise_task()
    # AnnualEnterprise_task()
    # NoticeEnterprise()

--- a/base/fdfs_client/client.py
+++ b/base/fdfs_client/client.py
@@ -16,7 +16,7 @@ from fdfs_client.storage_client import *
 from fdfs_client.exceptions import *


-def get_tracker_conf(conf_path='client.conf'):
+def get_tracker_conf(conf_path='../../client.conf'):
    cf = Fdfs_ConfigParser()
    tracker = {}
    try:

--- a/comData/BaseInfo_qcc/base_info_0815.py
+++ b/comData/BaseInfo_qcc/base_info_0815.py
+# -*- coding: utf-8 -*-
+import redis
+import time
+from urllib.parse import quote
+import pymongo
+import requests
+from bson.objectid import ObjectId
+import json
+from pyquery import PyQuery as pq
+import urllib3
+import hashlib
+from kafka import KafkaProducer
+import pandas as pd
+import zhconv
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(name):
+    urllib3.disable_warnings()
+
+    qcc_key = name
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except:
+            print('重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    if resp_dict['result']['Result']:
+        result_dict = resp_dict['result']['Result'][0]
+        KeyNo = result_dict['KeyNo']
+        Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+        if Name == '':
+            KeyNo = ''
+    else:
+        KeyNo = ''
+
+    print("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
+
+
+# 判断字符串里是否含数字
+def str_have_num(str_num):
+    panduan = False
+
+    for str_1 in str_num:
+        ppp = str_1.isdigit()
+        if ppp:
+            panduan = ppp
+    return panduan
+
+
+# 通过企查查id获取企业官网
+def info_by_id(com_id,com_name):
+    aa_dict_list = []
+
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        print(com_name + ":获取失败")
+
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    print(com_name + "：爬取完成")
+    return WebSite
+
+
+headers = {
+    'Host': 'xcx.qcc.com',
+    'Connection': 'keep-alive',
+    'Qcc-Platform': 'mp-weixin',
+    'Qcc-Timestamp': '',
+    'Qcc-Version': '1.0.0',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+    'content-type': 'application/json',
+    'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+    'Accept-Encoding': 'gzip, deflate, br,'
+}
+
+#TODO:需要隔两个小时左右抓包修改
+token = '1dcc61d85177733298e5827653706f1a'  # 需要隔两个小时左右抓包修改
+start = time.time()
+list_weicha = []
+#待采集企业文件
+filename = 'data/内蒙古市属国有企业_官网.xlsx'
+df_all = pd.read_excel('data/内蒙古市属国有企业.xls',dtype=str)
+list_all_info = []
+for num_df in range(162,len(df_all)):
+    #企业社会信用代码
+    id_code = str(df_all['本企业代码'][num_df])
+    #企业名称
+    com_name = str(df_all['企业名称'][num_df])
+    #行次
+    line = str(df_all['行次'][num_df])
+    dic_com = {
+        'line': line,
+        'social_code': id_code,
+        'com_name': id_code,
+        'website':''
+    }
+
+    company_id = find_id_by_name(id_code)
+
+    if company_id == "":
+        print(com_name + "：企业ID获取失败")
+        list_weicha.append(com_name + "：企业ID获取失败")
+        continue
+
+    WebSite = info_by_id(company_id,com_name)
+    dic_com['website'] = WebSite
+    log.info(f'---{num_df}-------{com_name}----------耗时{baseCore.getTimeCost(start,time.time())}')
+    list_all_info.append(dic_com)
+    baseCore.writerToExcel(list_all_info,filename)
+
+
+
+
+
+
+
+
--- a/comData/BaseInfo_qcc/base_info_0822.py
+++ b/comData/BaseInfo_qcc/base_info_0822.py
+# -*- coding: utf-8 -*-
+import redis
+import time
+from urllib.parse import quote
+import pymongo
+import requests
+from bson.objectid import ObjectId
+import json
+from pyquery import PyQuery as pq
+import urllib3
+import hashlib
+from kafka import KafkaProducer
+import pandas as pd
+import zhconv
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(name):
+    urllib3.disable_warnings()
+
+    qcc_key = name
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except:
+            print('重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    if resp_dict['result']['Result']:
+        result_dict = resp_dict['result']['Result'][0]
+        KeyNo = result_dict['KeyNo']
+        Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+        if Name == '':
+            KeyNo = ''
+    else:
+        KeyNo = ''
+
+    print("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
+
+
+# 判断字符串里是否含数字
+def str_have_num(str_num):
+    panduan = False
+
+    for str_1 in str_num:
+        ppp = str_1.isdigit()
+        if ppp:
+            panduan = ppp
+    return panduan
+
+
+# 通过企查查id获取企业官网
+def info_by_id(com_id,com_name):
+    aa_dict_list = []
+
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        print(com_name + ":获取失败")
+
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    print(com_name + "：爬取完成")
+    return WebSite
+
+
+headers = {
+    'Host': 'xcx.qcc.com',
+    'Connection': 'keep-alive',
+    'Qcc-Platform': 'mp-weixin',
+    'Qcc-Timestamp': '',
+    'Qcc-Version': '1.0.0',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+    'content-type': 'application/json',
+    'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+    'Accept-Encoding': 'gzip, deflate, br,'
+}
+
+#TODO:需要隔两个小时左右抓包修改
+token = '06de5b73cb9d6f9fcae27edaf94749cf'  # 需要隔两个小时左右抓包修改
+start = time.time()
+list_weicha = []
+#待采集企业文件
+filename = 'data/钢铁企业_官网.xlsx'
+df_all = pd.read_excel('data/钢铁企业.xlsx',dtype=str)
+list_all_info = []
+for num_df in range(len(df_all)):
+    #企业社会信用代码
+    id_code = str(df_all['信用代码'][num_df])
+    #企业名称
+    com_name = str(df_all['企业名称'][num_df])
+    #行次
+    # line = str(df_all['行次'][num_df])
+    dic_com = {
+        # 'line': line,
+        'social_code': id_code,
+        'com_name': com_name,
+        'website':''
+    }
+
+    company_id = find_id_by_name(id_code)
+
+    if company_id == "":
+        print(com_name + "：企业ID获取失败")
+        list_weicha.append(com_name + "：企业ID获取失败")
+        continue
+
+    WebSite = info_by_id(company_id,com_name)
+    dic_com['website'] = WebSite
+    log.info(f'---{num_df}-------{com_name}----------耗时{baseCore.getTimeCost(start,time.time())}')
+    list_all_info.append(dic_com)
+baseCore.writerToExcel(list_all_info,filename)
+
+
+
+
+
+
+
+
--- a/comData/BaseInfo_qcc/data/内蒙古市属国有企业.xls
+++ b/comData/BaseInfo_qcc/data/内蒙古市属国有企业.xls
--- a/comData/BaseInfo_qcc/data/蒙智省属国有企业.xls
+++ b/comData/BaseInfo_qcc/data/蒙智省属国有企业.xls
--- a/comData/BaseInfo_qcc/fbsbaseinfo.py
+++ b/comData/BaseInfo_qcc/fbsbaseinfo.py
@@ -9,7 +9,10 @@ import json
 from kafka import KafkaProducer
 from base.BaseCore import BaseCore
 from getQccId import find_id_by_name
-
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+cnx_ = baseCore.cnx
+cursor_ = baseCore.cursor
 baseCore = BaseCore()

 cnx = baseCore.cnx
@@ -310,7 +313,7 @@ def info_by_id(com_id,com_name):
    return aa_dict_list

 if __name__ == '__main__':
-    taskType = '基本信息/企查查'
+    taskType = '基本信息/企查查/福布斯'
    headers = {
        'Host': 'xcx.qcc.com',
        'Connection': 'keep-alive',
@@ -325,75 +328,64 @@ if __name__ == '__main__':

    #从redis里拿数据
    while True:
-        start = time.time()
-        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
        token = baseCore.GetToken()
        list_weicha = []
        list_all_info = []
        name_list = []
        start_time = time.time()
        # 获取企业信息
-        query = "SELECT * FROM Tfbs where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%'   and  state1=1    limit 1 "
-        #兴业银行
-        # query = "SELECT * FROM Tfbs where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%' and col5='兴业银行'"
-        cursor.execute(query)
-        row = cursor.fetchone()
-        if row:
-            pass
-        else:
-            print('没有数据了，结束脚本')
-            break
-        com_name = row[6]
-        social_code = row[4]
-        code = row[7]
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        updateBeginSql = f"update Tfbs set state1=0,date2='{time_now}' where col3='{social_code}' "
-        # print(updateBeginSql)
-        cursor.execute(updateBeginSql)
-        cnx.commit()
-
-        company_id = find_id_by_name(start,token,social_code)
-        if company_id == False:
-            #表示token失效
-            time.sleep(10)
-            updateBeginSql = f"update Tfbs set state1=1,date2='{time_now}' where col3='{social_code}' "
-            # print(updateBeginSql)
-            cursor.execute(updateBeginSql)
-            cnx.commit()
+        social_code = baseCore.redicPullData('BaseInfoEnterpriseFbs:gnqy_social_code')
+        # social_code = '91110000710924945A'
+        if social_code is None:
+            time.sleep(20)
            continue
-
-        if company_id == "":
-                log.info(com_name + "：企业ID获取失败")
+        log.info(f'----当前企业{social_code}-----')
+        dic_info = baseCore.getInfomation(social_code)
+        #
+        count = dic_info[13]
+        com_name = dic_info[1]
+        social_code = dic_info[2]
+        # 企查查id
+        company_id = dic_info[12]
+        # 如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
+        if company_id == None:
+            if social_code:
+                company_id = find_id_by_name(start_time, token, social_code)
+            else:
+                company_id = find_id_by_name(start_time, token, com_name)
+            # todo:写入数据库
+            updateSql = f"update EnterpriseInfo set QCCID = '{company_id}' where SocialCode = '{social_code}'"
+            cursor_.execute(updateSql)
+            cnx_.commit()
+            post_data_list = info_by_id(company_id, com_name)
+            if company_id == "":
+                print(com_name + "：企业ID获取失败")
                list_weicha.append(com_name + "：企业ID获取失败")
-                #400表示企业更新失败
-                updateBeginSql = f"update Tfbs set state1=400,date2='{time_now}' where col3='{social_code}' "
-                # print(updateBeginSql)
-                cursor.execute(updateBeginSql)
-                cnx.commit()
                continue
        else:
-            post_data_list = info_by_id(company_id,social_code)
+            log.info(f'====={social_code}===={company_id}=====获取企业id成功=====')
+            try:
+                post_data_list = info_by_id(company_id, com_name)
+            except:
+                log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
+                baseCore.rePutIntoR('BaseInfoEnterpriseFbs:gnqy_social_code', social_code)
+                continue

        for post_data in post_data_list:
            list_all_info.append(post_data)
            if post_data is None:
-                log.info(com_name + "：企业信息获取失败")
+                print(com_name + "：企业信息获取失败")
                list_weicha.append(com_name + "：企业信息获取失败")
-                # 400表示企业更新失败
-                updateBeginSql = f"update Tfbs set state1=400,date2='{time_now}' where col3='{social_code}' "
-                # print(updateBeginSql)
-                cursor.execute(updateBeginSql)
-                cnx.commit()
                continue
            get_name = post_data['name']
            get_socialcode = post_data['socialCreditCode']
            name_compile = {
-                'yuan_name':com_name,
-                'get_name':get_name
+                'yuan_name': com_name,
+                'get_name': get_name
            }
            name_list.append(name_compile)

-            log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
+            log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time, time.time())}')
            try:
                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
                kafka_result = producer.send("regionInfo", json.dumps(post_data, ensure_ascii=False).encode('utf8'))
@@ -404,16 +396,17 @@ if __name__ == '__main__':
                takeTime = baseCore.getTimeCost(start_time, time.time())
                baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
                log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
-        #200表示成功
-        updateBeginSql = f"update Tfbs set state1=200,date2='{time_now}' where col3='{social_code}' "
-        # print(updateBeginSql)
-        cursor.execute(updateBeginSql)
-        cnx.commit()
-    nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
+        # break
+        # 信息采集完成后将该企业的采集次数更新
+        runType = 'BaseInfoRunCount'
+        count += 1
+        baseCore.updateRun(social_code, runType, count)
+    nowtime = baseCore.getNowTime(1).replace('-', '_')[:10]
    companyName = pd.DataFrame(name_list)
-    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
+    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx', index=False)
    false_com = pd.DataFrame(list_weicha)
-    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
+    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx', index=False)
+    baseCore.close()




--- a/comData/BaseInfo_qcc/test.py
+++ b/comData/BaseInfo_qcc/test.py
+import redis
+
+# 连接到Redis
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
+
+# 列表名称
+list_name = 'BaseInfoEnterpriseMz:gnqy_socialCode'
+
+# 获取列表中的所有元素
+elements = r.lrange(list_name, 0, -1)
+
+# 遍历列表中的元素
+for element in elements:
+    # 获取元素在列表中的数量
+    count = r.lrem(list_name, 0, element)
+    # 如果数量大于1，说明有重复值，删除多余的重复值
+    if count > 1:
+        r.lrem(list_name, count - 1, element)
+
+# 打印处理后的列表
+print(r.lrange(list_name, 0, -1))
--- a/comData/ZW_paper/全球创新指标数据(1).xlsx
+++ b/comData/ZW_paper/全球创新指标数据(1).xlsx
--- a/comData/annualReport_US/annualreportUS.py
+++ b/comData/annualReport_US/annualreportUS.py
@@ -300,7 +300,7 @@ if __name__ == '__main__':
        start_time = time.time()
        # 获取企业信息
        # social_code = baseCore.redicPullData('AnnualEnterprise:usqy_socialCode')
-        social_code = 'ZZSN22080900000025'
+        social_code = 'ZZSN230912210643024'
        if not social_code:
            time.sleep(20)
            continue

--- a/comData/caiwushuju/test.py
+++ b/comData/caiwushuju/test.py
+# import redis
+#
+#
+# r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+#
+# # 获取所有键
+# keys = r.keys('*')
+# # print(keys)
+# for key in keys:
+#     f_key = key.decode()
+#     print(f_key)
+#     print("----------")
+#     res = r.exists(f_key)
+#     value = list(r.smembers(f_key))
+#     # 对列表进行排序
+#     value.sort()
+#     # 遍历排序后的列表
+#     list_data = []
+#     for member in value:
+#         member = member.decode()
+#         members = member.strip('[').strip(']').replace('\'','').strip().split(',')
+#         #获取每一个报告期
+#         for date in members:
+#             data = date.strip()
+#             # print(date.strip())
+#             list_data.append(data)
+#     # 放入redis
+#     for item in list_data:
+#         r.sadd(key, item)
+#
+#     # 获取Set中的所有元素
+#     items = r.smembers(key)
+#     # print(items)
+#     print("======================================")
+
+import datetime
+
+timestamp = 1688054400  # 示例时间戳
+date = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
+
+print(date)
+
--- a/comData/caiwushuju/test2.py
+++ b/comData/caiwushuju/test2.py
+import redis
+
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+# 获取所有键
+# keys = r.keys('*')
+# # print(keys)
+# for key in keys:
+#     f_key = key.decode()
+#     print(f_key)
+    # print("----------")
+    # r.srem(f_key,'[]')
+    # r.srem(f_key,'')
+def check_date(com_code,info_date):
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
+    res = r.sismember('com_caiwushuju_code::'+com_code, info_date)  # 注意是 保存set的方式
+    if res:
+        return True
+    else:
+        return False
+
+def add_date():
+    date_list = ['2023-06-30']
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+    #遍历date_list 放入redis
+    for date in date_list:
+        res = r.sadd('com_caiwushuju_code::'+'123456',date)
+# check_date('sh601398','2023-03-31')
+add_date()
--- a/comData/caiwushuju/test3.py
+++ b/comData/caiwushuju/test3.py
+#com_caiwushuju_code::bj430418
+#	['2018-12-31', '2018-09-30', '2018-06-30', '2018-03-31']
+import redis
+
+
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+f_key = 'com_caiwushuju_code::bj851834'
+r.set(f_key,3)
+res = r.exists(f_key)
+print(res)
+
+# list = ['2023-03-31', '2022-12-31', '2022-09-30', '2022-06-30', '2022-03-31', '2021-12-31', '2021-09-30', '2021-06-30', '2021-03-31', '2020-12-31', '2020-09-30', '2020-06-30', '2020-03-31', '2019-12-31', '2019-09-30', '2019-06-30', '2019-03-31']
+#
+# for a in list:
+#     r.sadd(f_key,a)
+# from base.BaseCore import BaseCore
+# baseCore  = BaseCore()
+# from datetime import datetime,timedelta
+# timeNow = baseCore.getNowTime(1)[:10]
+# # list_date = []
+# #
+# # list_date.append(timeNow)
+# # print(timeNow)
+# # print(list_date)
+#
+# current_date = datetime.now()
+# # 计算昨天的日期
+# yesterday = current_date - timedelta(days=1)
+# # 格式化昨天的日期
+# report_date = yesterday.strftime('%Y-%m-%d')
+# # list_date.append(report_date)
+# year = int(current_date.strftime('%Y'))
+# print(year)
+# print(type(year))
\ No newline at end of file
--- a/comData/dfcfwGpdm/NQenterprise/BaseCore.py
+++ b/comData/dfcfwGpdm/NQenterprise/BaseCore.py
--- a/comData/dfcfwGpdm/NQenterprise/NQbase_info.py
+++ b/comData/dfcfwGpdm/NQenterprise/NQbase_info.py
@@ -327,13 +327,14 @@ if __name__ == '__main__':
    #从redis里拿数据
    while True:
        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
-        token = '027ea02da6d901a724ecca47930379b4'
+        token = 'b4eb43143abdcf395f1335f322ca29e5'
        list_weicha = []
        list_all_info = []
        name_list = []
        start_time = time.time()
        # 获取企业信息
-        com_code = baseCore.redicPullData('EnterpriseIpo:nq_gpdm')
+        # com_code = baseCore.redicPullData('EnterpriseIpo:nq_gpdm')
+        com_code = '873349'
        if '.NQ' in com_code:
            com_code1 = com_code
        else:
@@ -358,7 +359,7 @@ if __name__ == '__main__':
            post_data_list = info_by_id(company_id, '',com_code1)
        except:
            log.info(f'====={com_code}=====获取基本信息失败，重新放入redis=====')
-            baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_social_code', com_code)
+            baseCore.rePutIntoR('EnterpriseIpo:nq_gpdm', com_code)
            continue
        if post_data_list:
            pass

--- a/comData/dfcfwGpdm/NQenterprise/NQipoInfo.py
+++ b/comData/dfcfwGpdm/NQenterprise/NQipoInfo.py
@@ -7,7 +7,7 @@ import requests
 from bs4 import BeautifulSoup
 from selenium import webdriver
 import urllib3
-from base.BaseCore import BaseCore
+from BaseCore import BaseCore
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # from gpdm import Gpdm
 baseCore = BaseCore()
@@ -20,15 +20,17 @@ log = baseCore.getLogger()
 error_list = []
 list_all_info = []
 # 需要提供股票代码、企业信用代码
+flag = 0
 while True:
    #从表中读取企业
-    flag = 0
-    com_code1 = baseCore.redicPullData('EnterpriseIpo:nq_gpdm')
-    if com_code1 is None:
-        time.sleep(20)
-        if flag==1:
+    com_code = baseCore.redicPullData('NQEnterprise:nq_Ipo')
+    # com_code = baseCore.redicPullData('NQEnterprise:nq_Ipo_test')
+    if com_code is None:
+        if flag==0:
+            time.sleep(20)
+            log.info('已没有数据----------等待')
            continue
-        elif flag==0:
+        elif flag==1:
            # 通过接口将数据保存进数据库
            for num in range(0, len(list_all_info), 100):

@@ -42,15 +44,18 @@ while True:
                    print(e)
                print("{}：到：{}".format(num, num + 100))
                print(response.text)
+            log.info('-----------数据发送接口完毕----------')
+            flag = 0
            continue
-        break

-    com_code = '838616'
-    short_name = ''
-    social_code = ''
+    #从数据库中查询到其他信息
+    log.info(f'========正在采集{com_code}===========')
+    data = baseCore.getInfomation(com_code)
+    social_code = data[1]
+    short_name = data[3]
    start = time.time()
    log.info(f'======开始采集{com_code}======')
-    url = f'https://xinsanban.eastmoney.com/F10/CompanyInfo/Introduction/833658.html'
+    url = f'https://xinsanban.eastmoney.com/F10/CompanyInfo/Introduction/{com_code}.html'
    headers = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
@@ -95,12 +100,9 @@ while True:
        "eastIndustry": industry,
        "csrcIndustry": ''
    }
-    print(dic_cwsj)
+    # print(dic_cwsj)
    list_all_info.append(dic_cwsj)
    log.info(f'======{com_code}====采集成功=====')
    flag = 1


-
-    break
-
--- a/comData/caiwushuju/finance_xq.py
+++ b/comData/caiwushuju/finance_xq.py
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup
 import datetime
 from selenium import webdriver

-from base.BaseCore import BaseCore
+from BaseCore import BaseCore
 baseCore = BaseCore()
 log = baseCore.getLogger()

@@ -84,7 +84,7 @@ def getdetail(reportInfodata,name_map,listinfo,url_name):
        listinfo.append(dic_info)
    return listinfo

-def getinfo(com_code):
+def getinfo(com_code,social_code):
    dic_info = {}
    for nnn in range(0, 3):
        try:
@@ -127,10 +127,11 @@ def getinfo(com_code):
                break
            except:
                time.sleep(1)
-        log.info(f'======正在采集：{report_date}=======')
+        log.info(f'======正在采集：{com_code}---{report_date}=======')
        #利润表
        list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
-        print(list_Lrb)
+        log.info(f'利润表数据：{len(list_Lrb)}个')
+        # print(list_Lrb)
        #资产负债表
        reportZcfzbdata = b_infoData[i]
        list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
@@ -148,27 +149,30 @@ def getinfo(com_code):
            "cash": list_Xjllb,
            "ynFirst": ynFirst,
        }
-        print(dic_info)
+        # print(dic_info)
        #一个报告期结束
-        for nnn in range(0, 3):
-            try:
-                add_date(com_code, report_date)
-                break
-            except:
-                time.sleep(1)
        log.info(f'----{com_code}--{report_date}----结束')
-        # if dic_info:
-        #     # 调凯歌接口存储数据
-        #     data = json.dumps(dic_info)
-        #     # print(data)
-        #     url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
-        #     for nnn in range(0, 3):
-        #         try:
-        #             res_baocun = requests.post(url_baocun, data=data)
-        #             break
-        #         except:
-        #             time.sleep(1)
-        #     print(res_baocun.text)
+        if dic_info:
+            # 调凯歌接口存储数据
+            data = json.dumps(dic_info)
+            # print(data)
+            url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
+            for nnn in range(0, 3):
+                try:
+                    res_baocun = requests.post(url_baocun, data=data)
+                    break
+                except:
+                    time.sleep(1)
+            print(res_baocun.text)
+            log.info('------------数据发送接口完毕------------')
+            for nnn in range(0, 3):
+                try:
+                    add_date(com_code, report_date)
+                    break
+                except:
+                    time.sleep(1)
+        else:
+            log.error(f'---{com_code}--{report_date}--')
        return dic_info

 if __name__ == '__main__':
@@ -178,9 +182,27 @@ if __name__ == '__main__':
        browser = webdriver.Chrome(chromedriver)
    except Exception as e:
        print(e)
-
-
-    social_code = ''
+    headers = {
+        'authority': 'stock.xueqiu.com',
+        'method': 'GET',
+        'path': '/v5/stock/finance/cn/income.json?symbol=NQ873286&type=all&is_detail=true&count=5&timestamp=1694414063178',
+        'scheme': 'https',
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cache-Control': 'no-cache',
+        'Cookie': 'device_id=84ced64554d8060750b1528dc22a3696; s=bt110kz0n8; cookiesu=661693188384462; u=661693188384462; Hm_lvt_1db88642e346389874251b5a1eded6e3=1693188388; xq_a_token=29bdb37dee2432c294425cc9e8f45710a62643a5; xqat=29bdb37dee2432c294425cc9e8f45710a62643a5; xq_r_token=3a35db27fcf5471898becda7aa5dab6afeafe471; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOi0xLCJpc3MiOiJ1YyIsImV4cCI6MTY5NjgxMTc5NCwiY3RtIjoxNjk0NDEzNTQ2ODU4LCJjaWQiOiJkOWQwbjRBWnVwIn0.Xxu329nQq4bMtwKFJWlScnUUSWrky4T5SWkMum46c2G8la2z4g0d4nyvsO08WP-7moMffId6P3bGWuELULkbv6EHvIZgqge9-fAD4-hmLOjeRh96NsoGfyTAQK7tbnt9LhKz1fDg6SUi8loMqYgM7l-4g-ZM4B6zrZ5hKWdQJFLy0-V8Wzx7HTFYZSX3FNSsbgGqHlW4vykIpsRaNeOOX1M6LYdt6BhbAi1Iv4TflB08LIdu6F1n4dTRbmPq1KCndb2LsLR2HrJZmqmHJB9WMzwlVcIGdz778_CutNrwuWgJbWtb-s3dSESzO0WWw1uIIGZUvRl1D0KSl0P_GQLw9w; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1694414056',
+        'Origin': 'https://xueqiu.com',
+        'Pragma': 'no-cache',
+        'Referer': 'https://xueqiu.com/snowman/S/NQ873286/detail',
+        'Sec-Ch-Ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
+        'Sec-Ch-Ua-Mobile': '?0',
+        'Sec-Ch-Ua-Platform': '"Windows"',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
+    }
    #中英文名称映射
    lrb_name_map = {
        '营业总收入':'total_revenue',
@@ -347,32 +369,39 @@ if __name__ == '__main__':
        '加：期初现金及现金等价物余额':'final_balance_of_cce',
        '期末现金及现金等价物余额':'final_balance_of_cce'
    }
-
    table_type = ['income','balance']
+    flag = 0
+    while True:
+        # com_code = baseCore.redicPullData('NQEnterprise:nq_finance')
+        com_code = baseCore.redicPullData('NQEnterprise:nq_finance_test')
+        if com_code is None:
+            if flag==0:
+                log.info('已没有数据----------等待')
+                time.sleep(20)
+                continue
+            elif flag==1:
+                log.info('=============调用对比指标接口======')
+                time.sleep(5400)
+                url_ = 'http://114.115.236.206:8088/sync/calculateIndex?type=1'
+                for nnn in range(0, 3):
+                    try:
+                        res_ = requests.get(url_)
+                        break
+                    except:
+                        time.sleep(1)
+                print(res_.text)
+                log.info('-----------数据触发对比指标接口完毕----------')
+                flag = 0
+                continue
+        log.info(f'========正在采集{com_code}===========')
+        data = baseCore.getInfomation(com_code)
+        social_code = data[1]
+        short_name = data[3]
+        start = time.time()
+        com_code = 'NQ' + com_code
+        dic_info = getinfo(com_code,social_code)
+        flag =1

-    com_code = 'NQ' + '873286'
-    headers = {
-        'authority': 'stock.xueqiu.com',
-        'method': 'GET',
-        'path': '/v5/stock/finance/cn/income.json?symbol=NQ873286&type=all&is_detail=true&count=5&timestamp=1694414063178',
-        'scheme': 'https',
-        'Accept': 'application/json, text/plain, */*',
-        'Accept-Encoding': 'gzip, deflate, br',
-        'Accept-Language': 'zh-CN,zh;q=0.9',
-        'Cache-Control': 'no-cache',
-        'Cookie': 'device_id=84ced64554d8060750b1528dc22a3696; s=bt110kz0n8; cookiesu=661693188384462; u=661693188384462; Hm_lvt_1db88642e346389874251b5a1eded6e3=1693188388; xq_a_token=29bdb37dee2432c294425cc9e8f45710a62643a5; xqat=29bdb37dee2432c294425cc9e8f45710a62643a5; xq_r_token=3a35db27fcf5471898becda7aa5dab6afeafe471; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOi0xLCJpc3MiOiJ1YyIsImV4cCI6MTY5NjgxMTc5NCwiY3RtIjoxNjk0NDEzNTQ2ODU4LCJjaWQiOiJkOWQwbjRBWnVwIn0.Xxu329nQq4bMtwKFJWlScnUUSWrky4T5SWkMum46c2G8la2z4g0d4nyvsO08WP-7moMffId6P3bGWuELULkbv6EHvIZgqge9-fAD4-hmLOjeRh96NsoGfyTAQK7tbnt9LhKz1fDg6SUi8loMqYgM7l-4g-ZM4B6zrZ5hKWdQJFLy0-V8Wzx7HTFYZSX3FNSsbgGqHlW4vykIpsRaNeOOX1M6LYdt6BhbAi1Iv4TflB08LIdu6F1n4dTRbmPq1KCndb2LsLR2HrJZmqmHJB9WMzwlVcIGdz778_CutNrwuWgJbWtb-s3dSESzO0WWw1uIIGZUvRl1D0KSl0P_GQLw9w; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1694414056',
-        'Origin': 'https://xueqiu.com',
-        'Pragma': 'no-cache',
-        'Referer': 'https://xueqiu.com/snowman/S/NQ873286/detail',
-        'Sec-Ch-Ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
-        'Sec-Ch-Ua-Mobile': '?0',
-        'Sec-Ch-Ua-Platform': '"Windows"',
-        'Sec-Fetch-Dest': 'empty',
-        'Sec-Fetch-Mode': 'cors',
-        'Sec-Fetch-Site': 'same-site',
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
-    }
-    dic_info = getinfo(com_code)




--- a/comData/industry_chain_report/job.py
+++ b/comData/industry_chain_report/job.py
+import json
+import time
+
+import requests
+from bs4 import BeautifulSoup
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+from urllib import parse
+
+headers = {
+'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+'Accept-Encoding': 'gzip, deflate, br',
+'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+'Cache-Control': 'max-age=0',
+'Connection': 'keep-alive',
+'Cookie': 'qgqp_b_id=92f470109c2462c6c6aa5115d15f7b35; emshistory=%5B%22sz007sz%22%5D; qRecords=%5B%7B%22name%22%3A%22%u5929%u98CE%u8BC1%u5238%22%2C%22code%22%3A%22SH601162%22%7D%5D; HAList=ty-1-601766-%u4E2D%u56FD%u4E2D%u8F66%2Cty-116-03690-%u7F8E%u56E2-W%2Cty-0-002828-%u8D1D%u80AF%u80FD%u6E90%2Cty-1-601162-%u5929%u98CE%u8BC1%u5238%2Cty-0-000001-%u5E73%u5B89%u94F6%u884C%2Cty-0-002070-%u4F17%u548C%u9000%2Cty-1-600723-%u9996%u5546%u80A1%u4EFD%2Cty-0-300106-%u897F%u90E8%u7267%u4E1A%2Cty-116-00992-%u8054%u60F3%u96C6%u56E2%2Cty-0-300362-%u5929%u7FD4%u9000; st_si=06778540617554; st_pvi=44810095342512; st_sp=2023-07-18%2013%3A55%3A09; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=1; st_psi=20230901152305423-113300304201-4533354410; st_asi=delete',
+'Host': 'data.eastmoney.com',
+'Sec-Fetch-Dest': 'document',
+'Sec-Fetch-Mode': 'navigate',
+'Sec-Fetch-Site': 'none',
+'Sec-Fetch-User': '?1',
+'Upgrade-Insecure-Requests': '1',
+'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.62',
+'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116"',
+'sec-ch-ua-mobile': '?0',
+'sec-ch-ua-platform': '"Windows"'
+}
+file_name = '研报--产业链.xlsx'
+for page in range(100,101):
+    log.info(f'----开始采集第{page}页-------')
+    param = {"uid":"",
+             "keyword":"产业链",
+             "type":["researchReport"],
+             "client":"web",
+             "clientVersion":"curr",
+             "clientType":"web",
+             "param":{"researchReport":{"client":"web","pageSize":10,"pageIndex":page}}
+             }
+    param_url = parse.quote(str(param).replace(" ", ""))
+    t = int(time.time() * 1000)
+    url = f'https://search-api-web.eastmoney.com/search/jsonp?cb=&param={param_url}&_={t}'
+    res = requests.get(url).text[1:-1]
+    res_json = json.loads(res)
+    list_all = res_json['result']['researchReport']
+    # print(list_all)
+
+    if list_all:
+        pass
+    else:
+        continue
+    num = 1
+    for one_news in list_all:
+        log.info(f'---------开始采集第{num}条---------')
+        dataList = []
+        com_name = one_news['stockName']
+        title = str(one_news['title']).replace('<em>','').replace('</em>','')
+        date = one_news['date'][:10]
+        code = one_news['code']
+        href = f'https://data.eastmoney.com/report/zw_stock.jshtml?infocode={code}'
+        # print(date,href)
+        #newsContent
+        req = requests.get(href,headers=headers,verify=False,timeout=30)
+        soup = BeautifulSoup(req.content,'html.parser')
+        content = soup.find('div',class_='newsContent')
+        try:
+            pdf_url = soup.find('div',class_='report-infos').find_all('span')[4].find('a')['href']
+        except:
+            log.info(f'-----{href}-----')
+            continue
+        #.find_all('span')[-1].find('a')['href']
+        log.info(pdf_url)
+        dic_info = {
+            '公司名称':com_name,
+            '标题':title,
+            '正文':content.text.strip(),
+            '附件链接':pdf_url,
+            '发布时间':date,
+            'contentwithTag':content.prettify()
+        }
+        # print(dic_info)
+        dataList.append(dic_info)
+        baseCore.writerToExcel(dataList,file_name)
+        num+=1
+    # break
--- a/comData/policylaw/client.conf
+++ b/comData/policylaw/client.conf
+
+# connect timeout in seconds
+# default value is 30s
+connect_timeout=300
+ 
+# network timeout in seconds
+# default value is 30s
+network_timeout=600
+ 
+# the base path to store log files
+#base_path=/home/tarena/django-project/cc_shop1/cc_shop1/logs
+ 
+# tracker_server can ocur more than once, and tracker_server format is
+#  "host:port", host can be hostname or ip address
+tracker_server=114.115.215.96:22122
+ 
+#standard log level as syslog, case insensitive, value list:
+### emerg for emergency
+### alert
+### crit for critical
+### error
+### warn for warning
+### notice
+### info
+### debug
+log_level=info
+ 
+# if use connection pool
+# default value is false
+# since V4.05
+use_connection_pool = false
+ 
+# connections whose the idle time exceeds this time will be closed
+# unit: second
+# default value is 3600
+# since V4.05
+connection_pool_max_idle_time = 3600
+ 
+# if load FastDFS parameters from tracker server
+# since V4.05
+# default value is false
+load_fdfs_parameters_from_tracker=false
+ 
+# if use storage ID instead of IP address
+# same as tracker.conf
+# valid only when load_fdfs_parameters_from_tracker is false
+# default value is false
+# since V4.05
+use_storage_id = false
+ 
+# specify storage ids filename, can use relative or absolute path
+# same as tracker.conf
+# valid only when load_fdfs_parameters_from_tracker is false
+# since V4.05
+storage_ids_filename = storage_ids.conf
+ 
+ 
+#HTTP settings
+http.tracker_server_port=80
+ 
+#use "#include" directive to include HTTP other settiongs
+##include http.conf
\ No newline at end of file
--- a/comData/policylaw/fdfs_client/client.py
+++ b/comData/policylaw/fdfs_client/client.py
--- a/comData/policylaw/fdfs_client/exceptions.py
+++ b/comData/policylaw/fdfs_client/exceptions.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# filename: exceptions.py
+
+'''Core exceptions raised by fdfs client'''
+
+
+class FDFSError(Exception):
+    pass
+
+
+class ConnectionError(FDFSError):
+    pass
+
+
+class ResponseError(FDFSError):
+    pass
+
+
+class InvaildResponse(FDFSError):
+    pass
+
+
+class DataError(FDFSError):
+    pass
--- a/comData/policylaw/fdfs_client/fdfs_protol.py
+++ b/comData/policylaw/fdfs_client/fdfs_protol.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# filename: fdfs_protol.py
+
+import struct
+import socket
+from fdfs_client.exceptions import (
+    FDFSError,
+    ConnectionError,
+    ResponseError,
+    InvaildResponse,
+    DataError
+)
+
+# define FDFS protol constans
+TRACKER_PROTO_CMD_STORAGE_JOIN = 81
+FDFS_PROTO_CMD_QUIT = 82
+TRACKER_PROTO_CMD_STORAGE_BEAT = 83  # storage heart beat
+TRACKER_PROTO_CMD_STORAGE_REPORT_DISK_USAGE = 84  # report disk usage
+TRACKER_PROTO_CMD_STORAGE_REPLICA_CHG = 85  # repl new storage servers
+TRACKER_PROTO_CMD_STORAGE_SYNC_SRC_REQ = 86  # src storage require sync
+TRACKER_PROTO_CMD_STORAGE_SYNC_DEST_REQ = 87  # dest storage require sync
+TRACKER_PROTO_CMD_STORAGE_SYNC_NOTIFY = 88  # sync done notify
+TRACKER_PROTO_CMD_STORAGE_SYNC_REPORT = 89  # report src last synced time as dest server
+TRACKER_PROTO_CMD_STORAGE_SYNC_DEST_QUERY = 79  # dest storage query sync src storage server
+TRACKER_PROTO_CMD_STORAGE_REPORT_IP_CHANGED = 78  # storage server report it's ip changed
+TRACKER_PROTO_CMD_STORAGE_CHANGELOG_REQ = 77  # storage server request storage server's changelog
+TRACKER_PROTO_CMD_STORAGE_REPORT_STATUS = 76  # report specified storage server status
+TRACKER_PROTO_CMD_STORAGE_PARAMETER_REQ = 75  # storage server request parameters
+TRACKER_PROTO_CMD_STORAGE_REPORT_TRUNK_FREE = 74  # storage report trunk free space
+TRACKER_PROTO_CMD_STORAGE_REPORT_TRUNK_FID = 73  # storage report current trunk file id
+TRACKER_PROTO_CMD_STORAGE_FETCH_TRUNK_FID = 72  # storage get current trunk file id
+
+TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_START = 61  # start of tracker get system data files
+TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_END = 62  # end of tracker get system data files
+TRACKER_PROTO_CMD_TRACKER_GET_ONE_SYS_FILE = 63  # tracker get a system data file
+TRACKER_PROTO_CMD_TRACKER_GET_STATUS = 64  # tracker get status of other tracker
+TRACKER_PROTO_CMD_TRACKER_PING_LEADER = 65  # tracker ping leader
+TRACKER_PROTO_CMD_TRACKER_NOTIFY_NEXT_LEADER = 66  # notify next leader to other trackers
+TRACKER_PROTO_CMD_TRACKER_COMMIT_NEXT_LEADER = 67  # commit next leader to other trackers
+
+TRACKER_PROTO_CMD_SERVER_LIST_ONE_GROUP = 90
+TRACKER_PROTO_CMD_SERVER_LIST_ALL_GROUPS = 91
+TRACKER_PROTO_CMD_SERVER_LIST_STORAGE = 92
+TRACKER_PROTO_CMD_SERVER_DELETE_STORAGE = 93
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ONE = 101
+TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ONE = 102
+TRACKER_PROTO_CMD_SERVICE_QUERY_UPDATE = 103
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITH_GROUP_ONE = 104
+TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ALL = 105
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ALL = 106
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITH_GROUP_ALL = 107
+TRACKER_PROTO_CMD_RESP = 100
+FDFS_PROTO_CMD_ACTIVE_TEST = 111  # active test, tracker and storage both support since V1.28
+
+STORAGE_PROTO_CMD_REPORT_CLIENT_IP = 9  # ip as tracker client
+STORAGE_PROTO_CMD_UPLOAD_FILE = 11
+STORAGE_PROTO_CMD_DELETE_FILE = 12
+STORAGE_PROTO_CMD_SET_METADATA = 13
+STORAGE_PROTO_CMD_DOWNLOAD_FILE = 14
+STORAGE_PROTO_CMD_GET_METADATA = 15
+STORAGE_PROTO_CMD_SYNC_CREATE_FILE = 16
+STORAGE_PROTO_CMD_SYNC_DELETE_FILE = 17
+STORAGE_PROTO_CMD_SYNC_UPDATE_FILE = 18
+STORAGE_PROTO_CMD_SYNC_CREATE_LINK = 19
+STORAGE_PROTO_CMD_CREATE_LINK = 20
+STORAGE_PROTO_CMD_UPLOAD_SLAVE_FILE = 21
+STORAGE_PROTO_CMD_QUERY_FILE_INFO = 22
+STORAGE_PROTO_CMD_UPLOAD_APPENDER_FILE = 23  # create appender file
+STORAGE_PROTO_CMD_APPEND_FILE = 24  # append file
+STORAGE_PROTO_CMD_SYNC_APPEND_FILE = 25
+STORAGE_PROTO_CMD_FETCH_ONE_PATH_BINLOG = 26  # fetch binlog of one store path
+STORAGE_PROTO_CMD_RESP = TRACKER_PROTO_CMD_RESP
+STORAGE_PROTO_CMD_UPLOAD_MASTER_FILE = STORAGE_PROTO_CMD_UPLOAD_FILE
+
+STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE = 27  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_ALLOC_CONFIRM = 28  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_FREE_SPACE = 29  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_SYNC_BINLOG = 30  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_GET_BINLOG_SIZE = 31  # since V3.07
+STORAGE_PROTO_CMD_TRUNK_DELETE_BINLOG_MARKS = 32  # since V3.07
+STORAGE_PROTO_CMD_TRUNK_TRUNCATE_BINLOG_FILE = 33  # since V3.07
+
+STORAGE_PROTO_CMD_MODIFY_FILE = 34  # since V3.08
+STORAGE_PROTO_CMD_SYNC_MODIFY_FILE = 35  # since V3.08
+STORAGE_PROTO_CMD_TRUNCATE_FILE = 36  # since V3.08
+STORAGE_PROTO_CMD_SYNC_TRUNCATE_FILE = 37  # since V3.08
+
+# for overwrite all old metadata
+STORAGE_SET_METADATA_FLAG_OVERWRITE = 'O'
+STORAGE_SET_METADATA_FLAG_OVERWRITE_STR = "O"
+# for replace, insert when the meta item not exist, otherwise update it
+STORAGE_SET_METADATA_FLAG_MERGE = 'M'
+STORAGE_SET_METADATA_FLAG_MERGE_STR = "M"
+
+FDFS_RECORD_SEPERATOR = '\x01'
+FDFS_FIELD_SEPERATOR = '\x02'
+
+# common constants
+FDFS_GROUP_NAME_MAX_LEN = 16
+IP_ADDRESS_SIZE = 16
+FDFS_PROTO_PKG_LEN_SIZE = 8
+FDFS_PROTO_CMD_SIZE = 1
+FDFS_PROTO_STATUS_SIZE = 1
+FDFS_PROTO_IP_PORT_SIZE = (IP_ADDRESS_SIZE + 6)
+FDFS_MAX_SERVERS_EACH_GROUP = 32
+FDFS_MAX_GROUPS = 512
+FDFS_MAX_TRACKERS = 16
+FDFS_DOMAIN_NAME_MAX_LEN = 128
+
+FDFS_MAX_META_NAME_LEN = 64
+FDFS_MAX_META_VALUE_LEN = 256
+
+FDFS_FILE_PREFIX_MAX_LEN = 16
+FDFS_LOGIC_FILE_PATH_LEN = 10
+FDFS_TRUE_FILE_PATH_LEN = 6
+FDFS_FILENAME_BASE64_LENGTH = 27
+FDFS_TRUNK_FILE_INFO_LEN = 16
+FDFS_FILE_EXT_NAME_MAX_LEN = 6
+FDFS_SPACE_SIZE_BASE_INDEX = 2  # storage space size based (MB)
+
+FDFS_UPLOAD_BY_BUFFER = 1
+FDFS_UPLOAD_BY_FILENAME = 2
+FDFS_UPLOAD_BY_FILE = 3
+FDFS_DOWNLOAD_TO_BUFFER = 1
+FDFS_DOWNLOAD_TO_FILE = 2
+
+FDFS_NORMAL_LOGIC_FILENAME_LENGTH = (
+    FDFS_LOGIC_FILE_PATH_LEN + FDFS_FILENAME_BASE64_LENGTH + FDFS_FILE_EXT_NAME_MAX_LEN + 1)
+
+FDFS_TRUNK_FILENAME_LENGTH = (
+    FDFS_TRUE_FILE_PATH_LEN + FDFS_FILENAME_BASE64_LENGTH + FDFS_TRUNK_FILE_INFO_LEN + 1 + FDFS_FILE_EXT_NAME_MAX_LEN)
+FDFS_TRUNK_LOGIC_FILENAME_LENGTH = (FDFS_TRUNK_FILENAME_LENGTH + (FDFS_LOGIC_FILE_PATH_LEN - FDFS_TRUE_FILE_PATH_LEN))
+
+FDFS_VERSION_SIZE = 6
+
+TRACKER_QUERY_STORAGE_FETCH_BODY_LEN = (FDFS_GROUP_NAME_MAX_LEN + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE)
+TRACKER_QUERY_STORAGE_STORE_BODY_LEN = (FDFS_GROUP_NAME_MAX_LEN + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE + 1)
+# status code, order is important!
+FDFS_STORAGE_STATUS_INIT = 0
+FDFS_STORAGE_STATUS_WAIT_SYNC = 1
+FDFS_STORAGE_STATUS_SYNCING = 2
+FDFS_STORAGE_STATUS_IP_CHANGED = 3
+FDFS_STORAGE_STATUS_DELETED = 4
+FDFS_STORAGE_STATUS_OFFLINE = 5
+FDFS_STORAGE_STATUS_ONLINE = 6
+FDFS_STORAGE_STATUS_ACTIVE = 7
+FDFS_STORAGE_STATUS_RECOVERY = 9
+FDFS_STORAGE_STATUS_NONE = 99
+
+
+class Storage_server(object):
+    '''Class storage server for upload.'''
+
+    def __init__(self):
+        self.ip_addr = None
+        self.port = None
+        self.group_name = ''
+        self.store_path_index = 0
+
+
+# Class tracker_header
+class Tracker_header(object):
+    '''
+    Class for Pack or Unpack tracker header
+        struct tracker_header{
+            char pkg_len[FDFS_PROTO_PKG_LEN_SIZE],
+            char cmd,
+            char status,
+        }
+    '''
+
+    def __init__(self):
+        self.fmt = '!QBB'  # pkg_len[FDFS_PROTO_PKG_LEN_SIZE] + cmd + status
+        self.st = struct.Struct(self.fmt)
+        self.pkg_len = 0
+        self.cmd = 0
+        self.status = 0
+
+    def _pack(self, pkg_len=0, cmd=0, status=0):
+        return self.st.pack(pkg_len, cmd, status)
+
+    def _unpack(self, bytes_stream):
+        self.pkg_len, self.cmd, self.status = self.st.unpack(bytes_stream)
+        return True
+
+    def header_len(self):
+        return self.st.size
+
+    def send_header(self, conn):
+        '''Send Tracker header to server.'''
+        header = self._pack(self.pkg_len, self.cmd, self.status)
+        try:
+            conn._sock.sendall(header)
+        except (socket.error, socket.timeout) as e:
+            raise ConnectionError('[-] Error: while writting to socket: %s' % (e.args,))
+
+    def recv_header(self, conn):
+        '''Receive response from server.
+           if sucess, class member (pkg_len, cmd, status) is response.
+        '''
+        try:
+            header = conn._sock.recv(self.header_len())
+        except (socket.error, socket.timeout) as e:
+            raise ConnectionError('[-] Error: while reading from socket: %s' % (e.args,))
+        self._unpack(header)
+
+
+def fdfs_pack_metadata(meta_dict):
+    ret = ''
+    for key in meta_dict:
+        ret += '%s%c%s%c' % (key, FDFS_FIELD_SEPERATOR, meta_dict[key], FDFS_RECORD_SEPERATOR)
+    return ret[0:-1]
+
+
+def fdfs_unpack_metadata(bytes_stream):
+    li = bytes_stream.split(FDFS_RECORD_SEPERATOR)
+    return dict([item.split(FDFS_FIELD_SEPERATOR) for item in li])
--- a/comData/policylaw/fdfs_client/storage_client.py
+++ b/comData/policylaw/fdfs_client/storage_client.py
--- a/comData/policylaw/fdfs_client/tracker_client.py
+++ b/comData/policylaw/fdfs_client/tracker_client.py
--- a/comData/policylaw/fdfs_client/utils.py
+++ b/comData/policylaw/fdfs_client/utils.py
+#!/usr/bin/env python
+# -*- coding = utf-8 -*-
+# filename: utils.py
+
+import io
+import os
+import sys
+import stat
+import platform
+import configparser
+
+SUFFIX = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
+__os_sep__ = "/" if platform.system() == 'Windows' else os.sep
+
+
+def appromix(size, base=0):
+    '''Conver bytes stream size to human-readable format.
+    Keyword arguments:
+    size: int, bytes stream size
+    base: int, suffix index
+    Return: string
+    '''
+    multiples = 1024
+    if size < 0:
+        raise ValueError('[-] Error: number must be non-negative.')
+    if size < multiples:
+        return '{0:d}{1}'.format(size, SUFFIX[base])
+    for suffix in SUFFIX[base:]:
+        if size < multiples:
+            return '{0:.2f}{1}'.format(size, suffix)
+        size = size / float(multiples)
+    raise ValueError('[-] Error: number too big.')
+
+
+def get_file_ext_name(filename, double_ext=True):
+    li = filename.split(os.extsep)
+    if len(li) <= 1:
+        return ''
+    else:
+        if li[-1].find(__os_sep__) != -1:
+            return ''
+    if double_ext:
+        if len(li) > 2:
+            if li[-2].find(__os_sep__) == -1:
+                return '%s.%s' % (li[-2], li[-1])
+    return li[-1]
+
+
+class Fdfs_ConfigParser(configparser.RawConfigParser):
+    """ 
+    Extends ConfigParser to allow files without sections. 
+ 
+    This is done by wrapping read files and prepending them with a placeholder 
+    section, which defaults to '__config__' 
+    """
+
+    def __init__(self, default_section=None, *args, **kwargs):
+        configparser.RawConfigParser.__init__(self, *args, **kwargs)
+
+        self._default_section = None
+        self.set_default_section(default_section or '__config__')
+
+    def get_default_section(self):
+        return self._default_section
+
+    def set_default_section(self, section):
+        self.add_section(section)
+
+        # move all values from the previous default section to the new one 
+        try:
+            default_section_items = self.items(self._default_section)
+            self.remove_section(self._default_section)
+        except configparser.NoSectionError:
+            pass
+        else:
+            for (key, value) in default_section_items:
+                self.set(section, key, value)
+
+        self._default_section = section
+
+    def read(self, filenames):
+        if isinstance(filenames, str):
+            filenames = [filenames]
+
+        read_ok = []
+        for filename in filenames:
+            try:
+                with open(filename) as fp:
+                    self.readfp(fp)
+            except IOError:
+                continue
+            else:
+                read_ok.append(filename)
+
+        return read_ok
+
+    def readfp(self, fp, *args, **kwargs):
+        stream = io.StringIO()
+
+        try:
+            stream.name = fp.name
+        except AttributeError:
+            pass
+
+        stream.write('[' + self._default_section + ']\n')
+        stream.write(fp.read())
+        stream.seek(0, 0)
+
+        return self._read(stream, stream.name)
+
+    def write(self, fp):
+        # Write the items from the default section manually and then remove them 
+        # from the data. They'll be re-added later. 
+        try:
+            default_section_items = self.items(self._default_section)
+            self.remove_section(self._default_section)
+
+            for (key, value) in default_section_items:
+                fp.write("{0} = {1}\n".format(key, value))
+
+            fp.write("\n")
+        except configparser.NoSectionError:
+            pass
+
+        configparser.RawConfigParser.write(self, fp)
+
+        self.add_section(self._default_section)
+        for (key, value) in default_section_items:
+            self.set(self._default_section, key, value)
+
+    def _read(self, fp, fpname):
+        """Parse a sectioned setup file.
+
+        The sections in setup file contains a title line at the top,
+        indicated by a name in square brackets (`[]'), plus key/value
+        options lines, indicated by `name: value' format lines.
+        Continuations are represented by an embedded newline then
+        leading whitespace.  Blank lines, lines beginning with a '#',
+        and just about everything else are ignored.
+        """
+        cursect = None  # None, or a dictionary
+        optname = None
+        lineno = 0
+        e = None  # None, or an exception
+        while True:
+            line = fp.readline()
+            if not line:
+                break
+            lineno = lineno + 1
+            # comment or blank line?
+            if line.strip() == '' or line[0] in '#;':
+                continue
+            if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR":
+                # no leading whitespace
+                continue
+            # continuation line?
+            if line[0].isspace() and cursect is not None and optname:
+                value = line.strip()
+                if value:
+                    cursect[optname] = "%s\n%s" % (cursect[optname], value)
+            # a section header or option header?
+            else:
+                # is it a section header?
+                mo = self.SECTCRE.match(line)
+                if mo:
+                    sectname = mo.group('header')
+                    if sectname in self._sections:
+                        cursect = self._sections[sectname]
+                    elif sectname == DEFAULTSECT:
+                        cursect = self._defaults
+                    else:
+                        cursect = self._dict()
+                        cursect['__name__'] = sectname
+                        self._sections[sectname] = cursect
+                    # So sections can't start with a continuation line
+                    optname = None
+                # no section header in the file?
+                elif cursect is None:
+                    raise MissingSectionHeaderError(fpname, lineno, line)
+                # an option line?
+                else:
+                    mo = self.OPTCRE.match(line)
+                    if mo:
+                        optname, vi, optval = mo.group('option', 'vi', 'value')
+                        if vi in ('=', ':') and ';' in optval:
+                            # ';' is a comment delimiter only if it follows
+                            # a spacing character
+                            pos = optval.find(';')
+                            if pos != -1 and optval[pos - 1].isspace():
+                                optval = optval[:pos]
+                        optval = optval.strip()
+                        # allow empty values
+                        if optval == '""':
+                            optval = ''
+                        optname = self.optionxform(optname.rstrip())
+                        if optname in cursect:
+                            if not isinstance(cursect[optname], list):
+                                cursect[optname] = [cursect[optname]]
+                            cursect[optname].append(optval)
+                        else:
+                            cursect[optname] = optval
+                    else:
+                        # a non-fatal parsing error occurred.  set up the
+                        # exception but keep going. the exception will be
+                        # raised at the end of the file and will contain a
+                        # list of all bogus lines
+                        if not e:
+                            e = ParsingError(fpname)
+                        e.append(lineno, repr(line))
+        # if any parsing errors occurred, raise an exception
+        if e:
+            raise e
+
+
+def split_remote_fileid(remote_file_id):
+    '''
+    Splite remote_file_id to (group_name, remote_file_name)
+    arguments:
+    @remote_file_id: string
+    @return tuple, (group_name, remote_file_name)
+    '''
+    index = remote_file_id.find(b'/')
+    if -1 == index:
+        return None
+    return (remote_file_id[0:index], remote_file_id[(index + 1):])
+
+
+def fdfs_check_file(filename):
+    ret = True
+    errmsg = ''
+    if not os.path.isfile(filename):
+        ret = False
+        errmsg = '[-] Error: %s is not a file.' % filename
+    elif not stat.S_ISREG(os.stat(filename).st_mode):
+        ret = False
+        errmsg = '[-] Error: %s is not a regular file.' % filename
+    return (ret, errmsg)
+
+
+if __name__ == '__main__':
+    print(get_file_ext_name('/bc.tar.gz'))
--- a/comData/policylaw/test.py
+++ b/comData/policylaw/test.py
+# link = "../path/to/file/../folder/../"
+#
+# count = link.count("../")
+#
+# print(count)  # 输出：4
+#
+# my_list = [1, 2, 3]
+#
+# result = ','.join(map(str, my_list))
+#
+# print(type(result))  # 输出：1,2,3
+import io
+import os
+
+import requests
+import win32com.client as win32
+import win32com
+
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+'Accept-Encoding': 'gzip, deflate, br',
+'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+'Cache-Control': 'max-age=0',
+'Connection': 'keep-alive',
+'Cookie': 'Secure HttpOnly; __jsluid_h=622c9569b7d6db50280612dfbe457ec4; __jsluid_s=b76555cb5e11504bde71924ab7ff780a; _va_ref=%5B%22%22%2C%22%22%2C1693534257%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3Dm-wPghSYa5m0rKj3FjcXWqp8gfXcvYwljt9y1RdmIrHXd1lAl4QhbIbXDjZR9hZY%26wd%3D%26eqid%3Da0f738ed0005f760000000036459cd31%22%5D; _va_id=afdeea114ed19176.1683278397.11.1693534257.1693534257.; _va_ses=*',
+'Host': 'gzw.beijing.gov.cn',
+'If-Modified-Since': 'Mon, 31 Jul 2023 13:33:44 GMT',
+'If-None-Match': 'W/"64c7b838-9676"',
+'Sec-Fetch-Dest': 'document',
+'Sec-Fetch-Mode': 'navigate',
+'Sec-Fetch-Site': 'none',
+'Sec-Fetch-User': '?1',
+'Upgrade-Insecure-Requests': '1',
+'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.62',
+'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116"',
+'sec-ch-ua-mobile': '?0',
+'sec-ch-ua-platform': '"Windows"'}
+
+# def doc_page(doc_bytes):
+#     doc = Document(doc_bytes)
+#     num_pages = len(doc.sections)
+#     print("文档页数：", num_pages)
+#     return num_pages
+#
+# file_href = 'https://gzw.beijing.gov.cn/xxfb/zcfg/202204/P020230106624599701195.doc'
+# resp_content = requests.get(file_href,headers=headers,verify=False, timeout=20).content
+# doc_bytes = BytesIO(resp_content)
+# category = '.doc'
+# if category == '.doc' or category == '.docx':
+#     page_size = doc_page(doc_bytes)
+    # print(page_size)
+
+# from docx import Document
+# import requests
+# from io import BytesIO
+# import docx2txt
+# import docx
+# # 获取远程Word文档内容
+# url = "https://gzw.beijing.gov.cn/xxfb/zcfg/202204/P020230106624599701195.doc"
+# response = requests.get(url,headers=headers,verify=False, timeout=20)
+# doc_bytes = BytesIO(response.content)
+#
+# # 解析文档页数
+# # doc = Document(doc_bytes)
+# # num_pages = len(doc.sections)
+# # print("文档页数：", num_pages)
+#
+# doc_file =  docx.Document('E:\kkwork\P020230106624599701195.docx')
+# # text = doc_file[0].getText()
+# # print("文档内容：", text)
+#
+# # 解析文档页数
+# num_pages = len(doc_file.sections)
+# print("文档页数：", num_pages)
+
+# from docx import Document
+# import requests
+#
+# # # 下载远程Word文档
+url = "https://gzw.beijing.gov.cn/xxfb/zcfg/202204/P020230106624599701195.doc"
+# response = requests.get(url,headers=headers,verify=False, timeout=20)
+# # with open("remote_document.doc", "wb") as file:
+# #     file.write(response.content)
+#
+# # 将doc转为docx格式
+# import os
+# os.system("textutil -convert docx remote_document.doc")
+#
+# # 解析文档页数
+# doc = Document("remote_document.docx")
+# num_pages = len(doc.sections)
+# print("文档页数：", num_pages)
+
+# def get_file_stream(file_url):
+#     # 发送GET请求获取文件内容
+#     response = requests.get(url,headers=headers,verify=False,timeout=10)
+#     # 将文件内容保存为文件流
+#     file_stream = io.BytesIO(response.content)
+#     return file_stream
+# def read_file_stream(file_stream):
+#     # 创建一个Word应用程序对象
+#     word_app = win32com.client.Dispatch("Word.Application")
+#     # 将文件流保存为临时文件
+#     temp_file = "E:/temp.doc"
+#     with open(temp_file, "wb") as f:
+#         f.write(file_stream.read())
+#     # 打开临时文件
+#     doc = word_app.Documents.Open(temp_file)
+#     # 读取文件内容
+#     # content = doc.Content.Text
+#     # print(content)
+#     # 计算文档的统计信息
+#     doc.ComputeStatistics(2)  # 参数2表示统计页数
+#     # 获取文档的页数
+#     page_count = doc.BuiltInDocumentProperties("Number of Pages").Value
+#     # 输出页数
+#     print(f"页数：{page_count}")
+#     # 关闭文件和应用程序
+#     doc.Close()
+#     word_app.Quit()
+#     # 删除临时文件
+#     os.remove(temp_file)
+#     return page_count
+# file_stream = get_file_stream(url)
+# page_count = read_file_stream(file_stream)
+# print(f"页数22：{page_count}")
+
+import io
+import inspect
+import requests
+import os
+import win32com.client as win32
+import win32com
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+header={
+    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+    'Accept-Encoding':'gzip, deflate, br',
+    'Accept-Language':'zh-CN,zh;q=0.9',
+    'Cache-Control':'no-cache',
+    'Connection':'keep-alive',
+    'Cookie':'Secure HttpOnly; __jsluid_s=8e1b254e40166ac092b3e137cacf4f09; _va_ses=*; _va_id=0c66e4a68ee344ba.1693453315.1.1693453317.1693453315.',
+    'Host':'gzw.beijing.gov.cn',
+    'Pragma':'no-cache',
+    'Sec-Fetch-Dest':'document',
+    'Sec-Fetch-Mode':'navigate',
+    'Sec-Fetch-Site':'none',
+    'Sec-Fetch-User':'?1',
+    'Upgrade-Insecure-Requests':'1',
+    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+    'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
+    'sec-ch-ua-mobile':'?0',
+    'sec-ch-ua-platform':'"Windows"'
+}
+url='https://gzw.beijing.gov.cn/xxfb/zcfg/202204/P020230106624599701195.doc'
+def get_file_stream(file_url):
+    # 发送GET请求获取文件内容
+    response = requests.get(url,headers=header,verify=False,timeout=10)
+    # 将文件内容保存为文件流
+    file_stream = io.BytesIO(response.content)
+    return file_stream
+def read_file_stream(file_stream):
+    # 创建一个Word应用程序对象
+    word_app = win32com.client.Dispatch("Word.Application")
+    # 将文件流保存为临时文件
+    temp_file = "D:/temp.doc"
+    with open(temp_file, "wb") as f:
+        f.write(file_stream.read())
+    # 打开临时文件
+    doc = word_app.Documents.Open(temp_file)
+    # 读取文件内容
+    # content = doc.Content.Text
+    # print(content)
+    # 计算文档的统计信息
+    doc.ComputeStatistics(2)  # 参数2表示统计页数
+    # 获取文档的页数
+    page_count = doc.BuiltInDocumentProperties("Number of Pages").Value
+    # 输出页数
+    print(f"页数：{page_count}")
+    # 关闭文件和应用程序
+    doc.Close()
+    word_app.Quit()
+    # 删除临时文件
+    os.remove(temp_file)
+    return page_count
+file_stream = get_file_stream(url)
+page_count = read_file_stream(file_stream)
+print(f"页数22：{page_count}")
+
+print('程序结束！')
+
+
--- a/comData/policylaw/test2.py
+++ b/comData/policylaw/test2.py
+import json
+import re
+import time
+
+import fitz
+import pymongo
+import requests
+from bs4 import BeautifulSoup
+from kafka import KafkaProducer
+from pyquery import PyQuery as pq
+from requests.packages import urllib3
+from urllib.parse import urljoin
+
+from BaseCore import BaseCore
+baseCore = BaseCore()
+
+urllib3.disable_warnings()
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from lxml import etree
+from random import choice
+def paserUrl(html,listurl):
+    # soup = BeautifulSoup(html, 'html.parser')
+    # 获取所有的<a>标签和<img>标签
+    links = html.find_all(['a', 'img'])
+    # 遍历标签，将相对地址转换为绝对地址
+    for link in links:
+        if 'href' in link.attrs:
+            link['href'] = urljoin(listurl, link['href'])
+        elif 'src' in link.attrs:
+            link['src'] = urljoin(listurl, link['src'])
+    return html
+real_href = 'http://gzw.fujian.gov.cn/ztzl/gzjgfzjs/gfxwj_7426/201809/t20180911_4492105.htm'
+href_text = requests.get(url=real_href, verify=False)
+href_text.encoding = href_text.apparent_encoding
+i_html = href_text.text
+i_soup = BeautifulSoup(i_html, 'html.parser')
+#相对路径转化为绝对路径
+i_soup = paserUrl(i_soup,real_href)
+# print(i_soup)
+source_ = str(i_soup.find('div', attrs={'class': 'xl_tit2_l'}).text)
+pub_source = source_.split('来源：')[1].split('发布时间：')[0].strip().lstrip()
+pub_time = source_.split('发布时间：')[1].split('浏览量：')[0].strip().lstrip()
+content = i_soup.find('div', attrs={'class': 'xl_con1'})
+pub_hao = ''
+print(real_href)
+id_list = []
+#todo:获取附件地址
+fu_jian_list = content.find('ul',class_='clearflx myzj_xl_list').find_all('a')
+for fu_jian in fu_jian_list:
+
+    fj_href = fu_jian['href']
+    if '.doc' in fj_href or '.pdf' in fj_href or '.xls' in fj_href or '.zip' in fj_href \
+            or '.rar' in fj_href or '.ppt' in fj_href or '.PDF' in fj_href or '.DOC' in fj_href \
+            or '.XLS' in fj_href or '.ZIP' in fj_href or '.RAR' in fj_href:
+        #找到附件后 上传至文件服务器
+        retData = baseCore.uploadToserver(fj_href,'1673')
+        if retData['state']:
+            pass
+        else:
+            continue
+        att_id, full_path = baseCore.tableUpdate(retData, '福建省国资委', 'aaa', 1)
+        id_list.append(att_id)
+
+        fu_jian['href'] = 'http://114.115.215.96/' + full_path
+        print(f'-----{content}')
\ No newline at end of file
--- a/comData/weixin_solo/config.ini
+++ b/comData/weixin_solo/config.ini
+[redis]
+host=114.115.236.206
+port=6379
+pass=clbzzsn
+
+[mysql]
+host=114.115.159.144
+username=caiji
+password=zzsn9988
+database=caiji
+url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
+
+[kafka]
+bootstrap_servers=114.115.159.144:9092
+topic=keyWordsInfo
+groupId=python_baidu
+
+[selenium]
+chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
+binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
+
+
+
--- a/comData/weixin_solo/inputSql.py
+++ b/comData/weixin_solo/inputSql.py
+import redis
+from base.BaseCore import BaseCore
+
+baseCore = BaseCore()
+log = baseCore.getLogger()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn')
+
+#获取所有键
+keys = r.keys('*')
+for key in keys:
+    f_key = key.decode()
+    if 'wx_url' in f_key:
+        list_info = []
+        print(f_key)
+        print("----------")
+        # 截取sid
+        sid = f_key.split('url_')[1]
+        #拿到微信公众号的key 获取值
+        value = list(r.smembers(f_key))
+        list_data = []
+        for member in value:
+            member = member.decode()
+            # print(member)
+            data = (sid,member)
+            # print(data)
+        # print(list_info)
+            select_sql = f"select sid from wx_link where link = '{member}'"
+            cursor.execute(select_sql)
+            select = cursor.fetchone()
+            if select:
+                continue
+            else:
+                list_info.append(data)
+        if len(list_info)!=0:
+            pass
+        else:
+            continue
+        #批量插入
+        sql = "INSERT INTO wx_link (sid, link,state) VALUES (%s, %s,'99')"
+        cursor.executemany(sql, list_info)
+        cnx.commit()
+        log.info(f'{sid}---插入成功--{len(list_info)}----')
+baseCore.close()
+
+
--- a/comData/weixin_solo/test.py
+++ b/comData/weixin_solo/test.py
+import time

 import pandas as pd

@@ -30,11 +31,52 @@ import pandas as pd
 #     workbook.save(filename)
 #     writeaa()

-gpdm = '01109.HK'
-if 'HK' in str(gpdm):
-    tmp_g = str(gpdm).split('.')[0]
-    if len(tmp_g) == 5:
-        gpdm = str(gpdm)[1:]
-        print(gpdm)
+# gpdm = '01109.HK'
+# if 'HK' in str(gpdm):
+#     tmp_g = str(gpdm).split('.')[0]
+#     if len(tmp_g) == 5:
+#         gpdm = str(gpdm)[1:]
+#         print(gpdm)
+#     else:
+#         pass
+import redis
+
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+r = baseCore.r
+key = 'counter'
+expiration_time = 10  # 设置过期时间 60秒
+# # 设置自增
+# r.incr(key)
+
+
+# # #自增并设置过期时间
+# while True:
+#     # 设置自增
+#     r.incr(key)
+#     value = int(r.get(key).decode())
+#
+#     if value > 10:
+#         print(value)
+#         # 设置过期时间
+#         r.expire(key, expiration_time)
+#         time.sleep(20)
+#         print('------------------')
+#         continue
+#     # print(value)
+#     time.sleep(5)
+#     print(value)
+#     print("==========")
+
+def check_url():
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=6)
+    res = r.exists('WeiXinGZH:infoSourceCode','IN-20220418-0001')
+    print(res)
+    if res == 1:
+        print('True')
    else:
-        pass
+        print('False')
+check_url()
+
+
+
--- a/comData/weixin_solo/test1.py
+++ b/comData/weixin_solo/test1.py
+import time
+
+import pandas as pd
+
+# def writeaa():
+#     detailList=[]
+#     aa={
+#         'id':3,
+#         'name':'qqqwe'
+#     }
+#     detailList.append(aa)
+#     writerToExcel(detailList)
+# 将数据追加到excel
+# def writerToExcel(detailList):
+#     # filename='baidu搜索.xlsx'
+#     # 读取已存在的xlsx文件
+#     existing_data = pd.read_excel(filename,engine='openpyxl')
+#     # 创建新的数据
+#     new_data = pd.DataFrame(data=detailList)
+#     # 将新数据添加到现有数据的末尾
+#     combined_data = existing_data.append(new_data, ignore_index=True)
+#     # 将结果写入到xlsx文件
+#     combined_data.to_excel(filename, index=False)
+#
+# from openpyxl import Workbook
+#
+# if __name__ == '__main__':
+#     filename='test1.xlsx'
+#     # # 创建一个工作簿
+#     workbook = Workbook(filename)
+#     workbook.save(filename)
+#     writeaa()
+
+# gpdm = '01109.HK'
+# if 'HK' in str(gpdm):
+#     tmp_g = str(gpdm).split('.')[0]
+#     if len(tmp_g) == 5:
+#         gpdm = str(gpdm)[1:]
+#         print(gpdm)
+#     else:
+#         pass
+
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+r = baseCore.r
+
+# #自增并设置过期时间
+# while True:
+#     key = 'mykey'
+#     expiration_time = 60  # 设置过期时间 60秒
+#     #设置自增
+#     r.incr(key)
+#
+#
+#
+#     value = int(r.get(key).decode())
+#
+#     if value > 10:
+#         print(value)
+#         # 设置过期时间
+#         r.expire(key, expiration_time)
+#
+#         time.sleep(70)
+#         print('------------------')
+#         continue
+#     # print(value)
+#
+#     print("==========")
+# expiration_time = 60
+# # 创建PubSub对象
+# p = r.pubsub()
+#
+# # 订阅过期事件
+# p.psubscribe('__keyevent@6__:expired')
+# aa = p.listen()
+# # 监听过期事件
+# for message in p.listen():
+#     if message['type'] == 'pmessage':
+#         expired_key = message['data'].decode()
+#         print('过期的key:', expired_key)
+#         if expired_key == 'counter':
+#             # 执行重置操作
+#             r.set('counter', 0)
+#             print('计数器已重置为0')
+#             # 设置自增
+#             r.incr('counter')
+#             # 设置过期时间
+#             r.expire('counter', expiration_time)
+
+for i in range(0, 24, 5):
+    print(i)
\ No newline at end of file
--- a/comData/weixin_solo/test1.xlsx
+++ b/comData/weixin_solo/test1.xlsx
--- a/comData/weixin_solo/test2.xlsx
+++ b/comData/weixin_solo/test2.xlsx
--- a/comData/yhcj/test.py
+++ b/comData/yhcj/test.py
-import pandas as pd
-import numpy as np
-import pymysql
-import time
-import requests
-import certifi
-from bs4 import BeautifulSoup
-from base import BaseCore
-baseCore = BaseCore.BaseCore()
-log= baseCore.getLogger()
-# cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
-# cursor = cnx.cursor()
-cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji', charset='utf8mb4')
-curosr = cnx.cursor()
-
-headers = {
-    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
-    'accept-encoding': 'gzip, deflate, br',
-    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
-    'cache-control': 'max-age=0',
-    #'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
-    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
-    'sec-ch-ua-mobile': '?0',
-    'sec-ch-ua-platform': "Windows",
-    'sec-fetch-dest': 'document',
-    'sec-fetch-mode': 'navigate',
-    'sec-fetch-site': 'same-origin',
-    'sec-fetch-user': '?1',
-    'upgrade-insecure-requests': '1',
-    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
-}
-def getInfo(gpdm,xydm):
-    print('开始')
-    gpdm_ = gpdm
-    if 'HK' in gpdm_:
-        gpdm_ = gpdm_[1:]
-    start = time.time()
-    retData={}
-    retData['base_info'] = {
-        '公司名称': '',
-        '英文名':'',
-        '信用代码': xydm,
-        '股票代码': gpdm_,
-        '地址': '',
-        '电话': '',
-        '公司网站': '',
-        '部门': '',
-        '行业': '',
-        '员工人数': '',
-        '公司简介': ''
-    }
-    retData['people_info']=[]
-    # https://finance.yahoo.com/quote/VOW3.DE/profile?p=VOW3.DE
-    url = f'https://finance.yahoo.com/quote/{gpdm}/profile?p={gpdm}'
-
-    time.sleep(3)
-    for i in range(0,3):
-        try:
-            response = requests.get(url, headers=headers, verify=False)
-            time.sleep(1)
-            if (response.status_code == 200):
-                break
-            else:
-                log.error(f"{gpdm}---第{i}次---获取基本信息接口返回失败：{response.status_code}")
-        except :
-            continue
-
-    if (response.status_code == 200):
-        pass
-    else:
-        log.error(f"{gpdm}------获取基本信息接口重试后依然失败失败：{response.status_code}")
-        return retData
-    soup = BeautifulSoup(response.content, 'html.parser')
-    page = soup.find('div', {'id': 'Col1-0-Profile-Proxy'})
-    name = page.find('h3',{'class':'Fz(m) Mb(10px)'})
-    try:
-        com_info = page.find('div', {'class': 'Mb(25px)'})
-    except:
-        com_info = ''
-    try:
-        com_phone = com_info.find_all('p')[0].find('a').text
-    except:
-        com_phone = ''
-    try:
-        com_url = com_info.find_all('p')[0].find('a', {'target': '_blank'}).text
-    except:
-        com_url = ''
-    try:
-        com_address = com_info.find_all('p')[0].text.replace(com_phone, '').replace(com_url, '')
-    except:
-        com_address = ''
-    try:
-        com_bumen = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[0].text
-    except:
-        com_bumen = ''
-    try:
-        com_hangye = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[1].text
-    except:
-        com_hangye = ''
-    try:
-        com_people = com_info.find_all('p')[1].find_all('span', {'class': 'Fw(600)'})[2].text
-    except:
-        com_people = ''
-    try:
-        com_jianjie = page.find('p', {'class': 'Mt(15px) Lh(1.6)'}).text
-    except:
-        com_jianjie = ''
-    dic_com_info = {
-        '公司名称':'',
-        '英文名':name,
-        '信用代码': xydm,
-        '股票代码': gpdm_,
-        '地址': com_address,
-        '电话': com_phone,
-        '公司网站': com_url,
-        '部门': com_bumen,
-        '行业': com_hangye,
-        '员工人数': com_people,
-        '公司简介': com_jianjie
-    }
-    retData['base_info']=dic_com_info
-    #高管信息
-    retPeople = []
-    try:
-        list_people = page.find('table', {'class': 'W(100%)'}).find_all('tr')[1:]
-    except:
-        list_people = []
-    for one_people in list_people:
-        try:
-            p_name = one_people.find_all('td')[0].text
-        except:
-            p_name = ''
-            continue
-        try:
-            p_zhiwu = one_people.find_all('td')[1].text
-        except:
-            p_zhiwu = ''
-        try:
-            p_money = one_people.find_all('td')[2].text
-        except:
-            p_money = ''
-        try:
-            p_xingshi = one_people.find_all('td')[3].text
-        except:
-            p_xingshi = ''
-        try:
-            p_year = one_people.find_all('td')[4].text
-        except:
-            p_year = ''
-
-        if(p_zhiwu=="N/A"):
-            p_zhiwu=""
-        if (p_money == "N/A"):
-            p_money = ""
-        if (p_xingshi == "N/A"):
-            p_xingshi = ""
-        if (p_year == "N/A"):
-            p_year = ""
-        dic_main_people = {
-            '公司名称': name,
-            '股票代码': gpdm_,
-            '信用代码': xydm,
-            '姓名': p_name,
-            '职务': p_zhiwu,
-            '薪资': p_money,
-            '行使': p_xingshi,
-            '出生年份': p_year
-        }
-        retPeople.append(dic_main_people)
-    retData['people_info'] = retPeople
-    # df_a = pd.DataFrame(retData['base_info'])
-    log.info(f"获取基本信息--{gpdm}，耗时{baseCore.getTimeCost(start, time.time())}")
-    return retData
-
-
-# # 数据库中获取企业gpdm、xydm
-sql_select = "SELECT * FROM Tfbs where col3 is not null and length(col3)>3 and col6 is not null  and  state1=1   and col3 like 'ZZSN%' order by date1 ,id LIMIT 1"
-curosr.execute(sql_select)
-data = curosr.fetchone()
-id = data[0]
-# 更新以获取企业的采集状态
-# sql_update = f"UPDATE Tfbs set state1 = 2 WHERE id = {id}"
-# curosr.execute(sql_update)
-# cnx.commit()
-xydm = data[4]
-gpdm = data[7]
-# 获取企业的基本信息和高管信息
-retData = getInfo(gpdm,xydm)
-print(retData)
-curosr.close()
-cnx.close()
-url = 'https://finance.yahoo.com/quote/VOW3.DE/profile?p=VOW3.DE'
-req = requests.get(url=url,headers=headers,verify=False)
-print(req.status_code)
\ No newline at end of file
--- a/comData/微博采集/weibo.py
+++ b/comData/微博采集/weibo.py
--- a/fdfs_client/client.py
+++ b/fdfs_client/client.py
--- a/fdfs_client/connection.py
+++ b/fdfs_client/connection.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# filename: connection.py
+
+import socket
+import os
+import sys
+import time
+import random
+from itertools import chain
+from fdfs_client.exceptions import (
+    FDFSError,
+    ConnectionError,
+    ResponseError,
+    InvaildResponse,
+    DataError
+)
+
+
+# start class Connection
+class Connection(object):
+    '''Manage TCP comunication to and from Fastdfs Server.'''
+
+    def __init__(self, **conn_kwargs):
+        self.pid = os.getpid()
+        self.host_tuple = conn_kwargs['host_tuple']
+        self.remote_port = conn_kwargs['port']
+        self.remote_addr = None
+        self.timeout = conn_kwargs['timeout']
+        self._sock = None
+
+    def __del__(self):
+        try:
+            self.disconnect()
+        except:
+            pass
+
+    def connect(self):
+        '''Connect to fdfs server.'''
+        if self._sock:
+            return
+        try:
+            sock = self._connect()
+        except socket.error as e:
+            raise ConnectionError(self._errormessage(e))
+        self._sock = sock
+        # print '[+] Create a connection success.'
+        # print '\tLocal address is %s:%s.' % self._sock.getsockname()
+        # print '\tRemote address is %s:%s' % (self.remote_addr, self.remote_port)
+
+    def _connect(self):
+        '''Create TCP socket. The host is random one of host_tuple.'''
+        self.remote_addr = random.choice(self.host_tuple)
+        # print '[+] Connecting... remote: %s:%s' % (self.remote_addr, self.remote_port)
+        # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        # sock.settimeout(self.timeout)
+        sock = socket.create_connection((self.remote_addr, self.remote_port), self.timeout)
+        return sock
+
+    def disconnect(self):
+        '''Disconnect from fdfs server.'''
+        if self._sock is None:
+            return
+        try:
+            self._sock.close()
+        except socket.error as e:
+            raise ConnectionError(self._errormessage(e))
+        self._sock = None
+
+    def get_sock(self):
+        return self._sock
+
+    def _errormessage(self, exception):
+        # args for socket.error can either be (errno, "message")
+        # or just "message" '''
+        if len(exception.args) == 1:
+            return "[-] Error: connect to %s:%s. %s." % (self.remote_addr, self.remote_port, exception.args[0])
+        else:
+            return "[-] Error: %s connect to %s:%s. %s." % \
+                   (exception.args[0], self.remote_addr, self.remote_port, exception.args[1])
+
+
+# end class Connection
+
+# start ConnectionPool
+class ConnectionPool(object):
+    '''Generic Connection Pool'''
+
+    def __init__(self, name='', conn_class=Connection,
+                 max_conn=None, **conn_kwargs):
+        self.pool_name = name
+        self.pid = os.getpid()
+        self.conn_class = conn_class
+        self.max_conn = max_conn or 2 ** 31
+        self.conn_kwargs = conn_kwargs
+        self._conns_created = 0
+        self._conns_available = []
+        self._conns_inuse = set()
+        # print '[+] Create a connection pool success, name: %s.' % self.pool_name
+
+    def _check_pid(self):
+        if self.pid != os.getpid():
+            self.destroy()
+            self.__init__(self.conn_class, self.max_conn, **self.conn_kwargs)
+
+    def make_conn(self):
+        '''Create a new connection.'''
+        if self._conns_created >= self.max_conn:
+            raise ConnectionError('[-] Error: Too many connections.')
+        num_try = 10
+        while True:
+            try:
+                if num_try <= 0:
+                    sys.exit()
+                conn_instance = self.conn_class(**self.conn_kwargs)
+                conn_instance.connect()
+                self._conns_created += 1
+                break
+            except ConnectionError as e:
+                print(e)
+                num_try -= 1
+                conn_instance = None
+        return conn_instance
+
+    def get_connection(self):
+        '''Get a connection from pool.'''
+        self._check_pid()
+        try:
+            conn = self._conns_available.pop()
+            # print '[+] Get a connection from pool %s.' % self.pool_name
+            # print '\tLocal address is %s:%s.' % conn._sock.getsockname()
+            # print '\tRemote address is %s:%s' % (conn.remote_addr, conn.remote_port)
+        except IndexError:
+            conn = self.make_conn()
+        self._conns_inuse.add(conn)
+        return conn
+
+    def remove(self, conn):
+        '''Remove connection from pool.'''
+        if conn in self._conns_inuse:
+            self._conns_inuse.remove(conn)
+            self._conns_created -= 1
+        if conn in self._conns_available:
+            self._conns_available.remove(conn)
+            self._conns_created -= 1
+
+    def destroy(self):
+        '''Disconnect all connections in the pool.'''
+        all_conns = chain(self._conns_inuse, self._conns_available)
+        for conn in all_conns:
+            conn.disconnect()
+            # print '[-] Destroy connection pool %s.' % self.pool_name
+
+    def release(self, conn):
+        '''Release the connection back to the pool.'''
+        self._check_pid()
+        if conn.pid == self.pid:
+            self._conns_inuse.remove(conn)
+            self._conns_available.append(conn)
+            # print '[-] Release connection back to pool %s.' % self.pool_name
+
+
+# end ConnectionPool class
+
+def tcp_recv_response(conn, bytes_size, buffer_size=4096):
+    '''Receive response from server.
+        It is not include tracker header.
+        arguments:
+        @conn: connection
+        @bytes_size: int, will be received byte_stream size
+        @buffer_size: int, receive buffer size
+        @Return: tuple,(response, received_size)
+    '''
+    recv_buff = []
+    total_size = 0
+    try:
+        while bytes_size > 0:
+            resp = conn._sock.recv(buffer_size)
+            recv_buff.append(resp)
+            total_size += len(resp)
+            bytes_size -= len(resp)
+    except (socket.error, socket.timeout) as e:
+        raise ConnectionError('[-] Error: while reading from socket: (%s)' % e.args)
+    return (b''.join(recv_buff), total_size)
+
+
+def tcp_send_data(conn, bytes_stream):
+    '''Send buffer to server.
+        It is not include tracker header.
+        arguments:
+        @conn: connection
+        @bytes_stream: trasmit buffer
+        @Return bool
+    '''
+    try:
+        conn._sock.sendall(bytes_stream)
+    except (socket.error, socket.timeout) as e:
+        raise ConnectionError('[-] Error: while writting to socket: (%s)' % e.args)
--- a/fdfs_client/exceptions.py
+++ b/fdfs_client/exceptions.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# filename: exceptions.py
+
+'''Core exceptions raised by fdfs client'''
+
+
+class FDFSError(Exception):
+    pass
+
+
+class ConnectionError(FDFSError):
+    pass
+
+
+class ResponseError(FDFSError):
+    pass
+
+
+class InvaildResponse(FDFSError):
+    pass
+
+
+class DataError(FDFSError):
+    pass
--- a/fdfs_client/fdfs_protol.py
+++ b/fdfs_client/fdfs_protol.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# filename: fdfs_protol.py
+
+import struct
+import socket
+from fdfs_client.exceptions import (
+    FDFSError,
+    ConnectionError,
+    ResponseError,
+    InvaildResponse,
+    DataError
+)
+
+# define FDFS protol constans
+TRACKER_PROTO_CMD_STORAGE_JOIN = 81
+FDFS_PROTO_CMD_QUIT = 82
+TRACKER_PROTO_CMD_STORAGE_BEAT = 83  # storage heart beat
+TRACKER_PROTO_CMD_STORAGE_REPORT_DISK_USAGE = 84  # report disk usage
+TRACKER_PROTO_CMD_STORAGE_REPLICA_CHG = 85  # repl new storage servers
+TRACKER_PROTO_CMD_STORAGE_SYNC_SRC_REQ = 86  # src storage require sync
+TRACKER_PROTO_CMD_STORAGE_SYNC_DEST_REQ = 87  # dest storage require sync
+TRACKER_PROTO_CMD_STORAGE_SYNC_NOTIFY = 88  # sync done notify
+TRACKER_PROTO_CMD_STORAGE_SYNC_REPORT = 89  # report src last synced time as dest server
+TRACKER_PROTO_CMD_STORAGE_SYNC_DEST_QUERY = 79  # dest storage query sync src storage server
+TRACKER_PROTO_CMD_STORAGE_REPORT_IP_CHANGED = 78  # storage server report it's ip changed
+TRACKER_PROTO_CMD_STORAGE_CHANGELOG_REQ = 77  # storage server request storage server's changelog
+TRACKER_PROTO_CMD_STORAGE_REPORT_STATUS = 76  # report specified storage server status
+TRACKER_PROTO_CMD_STORAGE_PARAMETER_REQ = 75  # storage server request parameters
+TRACKER_PROTO_CMD_STORAGE_REPORT_TRUNK_FREE = 74  # storage report trunk free space
+TRACKER_PROTO_CMD_STORAGE_REPORT_TRUNK_FID = 73  # storage report current trunk file id
+TRACKER_PROTO_CMD_STORAGE_FETCH_TRUNK_FID = 72  # storage get current trunk file id
+
+TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_START = 61  # start of tracker get system data files
+TRACKER_PROTO_CMD_TRACKER_GET_SYS_FILES_END = 62  # end of tracker get system data files
+TRACKER_PROTO_CMD_TRACKER_GET_ONE_SYS_FILE = 63  # tracker get a system data file
+TRACKER_PROTO_CMD_TRACKER_GET_STATUS = 64  # tracker get status of other tracker
+TRACKER_PROTO_CMD_TRACKER_PING_LEADER = 65  # tracker ping leader
+TRACKER_PROTO_CMD_TRACKER_NOTIFY_NEXT_LEADER = 66  # notify next leader to other trackers
+TRACKER_PROTO_CMD_TRACKER_COMMIT_NEXT_LEADER = 67  # commit next leader to other trackers
+
+TRACKER_PROTO_CMD_SERVER_LIST_ONE_GROUP = 90
+TRACKER_PROTO_CMD_SERVER_LIST_ALL_GROUPS = 91
+TRACKER_PROTO_CMD_SERVER_LIST_STORAGE = 92
+TRACKER_PROTO_CMD_SERVER_DELETE_STORAGE = 93
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ONE = 101
+TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ONE = 102
+TRACKER_PROTO_CMD_SERVICE_QUERY_UPDATE = 103
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITH_GROUP_ONE = 104
+TRACKER_PROTO_CMD_SERVICE_QUERY_FETCH_ALL = 105
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ALL = 106
+TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITH_GROUP_ALL = 107
+TRACKER_PROTO_CMD_RESP = 100
+FDFS_PROTO_CMD_ACTIVE_TEST = 111  # active test, tracker and storage both support since V1.28
+
+STORAGE_PROTO_CMD_REPORT_CLIENT_IP = 9  # ip as tracker client
+STORAGE_PROTO_CMD_UPLOAD_FILE = 11
+STORAGE_PROTO_CMD_DELETE_FILE = 12
+STORAGE_PROTO_CMD_SET_METADATA = 13
+STORAGE_PROTO_CMD_DOWNLOAD_FILE = 14
+STORAGE_PROTO_CMD_GET_METADATA = 15
+STORAGE_PROTO_CMD_SYNC_CREATE_FILE = 16
+STORAGE_PROTO_CMD_SYNC_DELETE_FILE = 17
+STORAGE_PROTO_CMD_SYNC_UPDATE_FILE = 18
+STORAGE_PROTO_CMD_SYNC_CREATE_LINK = 19
+STORAGE_PROTO_CMD_CREATE_LINK = 20
+STORAGE_PROTO_CMD_UPLOAD_SLAVE_FILE = 21
+STORAGE_PROTO_CMD_QUERY_FILE_INFO = 22
+STORAGE_PROTO_CMD_UPLOAD_APPENDER_FILE = 23  # create appender file
+STORAGE_PROTO_CMD_APPEND_FILE = 24  # append file
+STORAGE_PROTO_CMD_SYNC_APPEND_FILE = 25
+STORAGE_PROTO_CMD_FETCH_ONE_PATH_BINLOG = 26  # fetch binlog of one store path
+STORAGE_PROTO_CMD_RESP = TRACKER_PROTO_CMD_RESP
+STORAGE_PROTO_CMD_UPLOAD_MASTER_FILE = STORAGE_PROTO_CMD_UPLOAD_FILE
+
+STORAGE_PROTO_CMD_TRUNK_ALLOC_SPACE = 27  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_ALLOC_CONFIRM = 28  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_FREE_SPACE = 29  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_SYNC_BINLOG = 30  # since V3.00
+STORAGE_PROTO_CMD_TRUNK_GET_BINLOG_SIZE = 31  # since V3.07
+STORAGE_PROTO_CMD_TRUNK_DELETE_BINLOG_MARKS = 32  # since V3.07
+STORAGE_PROTO_CMD_TRUNK_TRUNCATE_BINLOG_FILE = 33  # since V3.07
+
+STORAGE_PROTO_CMD_MODIFY_FILE = 34  # since V3.08
+STORAGE_PROTO_CMD_SYNC_MODIFY_FILE = 35  # since V3.08
+STORAGE_PROTO_CMD_TRUNCATE_FILE = 36  # since V3.08
+STORAGE_PROTO_CMD_SYNC_TRUNCATE_FILE = 37  # since V3.08
+
+# for overwrite all old metadata
+STORAGE_SET_METADATA_FLAG_OVERWRITE = 'O'
+STORAGE_SET_METADATA_FLAG_OVERWRITE_STR = "O"
+# for replace, insert when the meta item not exist, otherwise update it
+STORAGE_SET_METADATA_FLAG_MERGE = 'M'
+STORAGE_SET_METADATA_FLAG_MERGE_STR = "M"
+
+FDFS_RECORD_SEPERATOR = '\x01'
+FDFS_FIELD_SEPERATOR = '\x02'
+
+# common constants
+FDFS_GROUP_NAME_MAX_LEN = 16
+IP_ADDRESS_SIZE = 16
+FDFS_PROTO_PKG_LEN_SIZE = 8
+FDFS_PROTO_CMD_SIZE = 1
+FDFS_PROTO_STATUS_SIZE = 1
+FDFS_PROTO_IP_PORT_SIZE = (IP_ADDRESS_SIZE + 6)
+FDFS_MAX_SERVERS_EACH_GROUP = 32
+FDFS_MAX_GROUPS = 512
+FDFS_MAX_TRACKERS = 16
+FDFS_DOMAIN_NAME_MAX_LEN = 128
+
+FDFS_MAX_META_NAME_LEN = 64
+FDFS_MAX_META_VALUE_LEN = 256
+
+FDFS_FILE_PREFIX_MAX_LEN = 16
+FDFS_LOGIC_FILE_PATH_LEN = 10
+FDFS_TRUE_FILE_PATH_LEN = 6
+FDFS_FILENAME_BASE64_LENGTH = 27
+FDFS_TRUNK_FILE_INFO_LEN = 16
+FDFS_FILE_EXT_NAME_MAX_LEN = 6
+FDFS_SPACE_SIZE_BASE_INDEX = 2  # storage space size based (MB)
+
+FDFS_UPLOAD_BY_BUFFER = 1
+FDFS_UPLOAD_BY_FILENAME = 2
+FDFS_UPLOAD_BY_FILE = 3
+FDFS_DOWNLOAD_TO_BUFFER = 1
+FDFS_DOWNLOAD_TO_FILE = 2
+
+FDFS_NORMAL_LOGIC_FILENAME_LENGTH = (
+    FDFS_LOGIC_FILE_PATH_LEN + FDFS_FILENAME_BASE64_LENGTH + FDFS_FILE_EXT_NAME_MAX_LEN + 1)
+
+FDFS_TRUNK_FILENAME_LENGTH = (
+    FDFS_TRUE_FILE_PATH_LEN + FDFS_FILENAME_BASE64_LENGTH + FDFS_TRUNK_FILE_INFO_LEN + 1 + FDFS_FILE_EXT_NAME_MAX_LEN)
+FDFS_TRUNK_LOGIC_FILENAME_LENGTH = (FDFS_TRUNK_FILENAME_LENGTH + (FDFS_LOGIC_FILE_PATH_LEN - FDFS_TRUE_FILE_PATH_LEN))
+
+FDFS_VERSION_SIZE = 6
+
+TRACKER_QUERY_STORAGE_FETCH_BODY_LEN = (FDFS_GROUP_NAME_MAX_LEN + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE)
+TRACKER_QUERY_STORAGE_STORE_BODY_LEN = (FDFS_GROUP_NAME_MAX_LEN + IP_ADDRESS_SIZE - 1 + FDFS_PROTO_PKG_LEN_SIZE + 1)
+# status code, order is important!
+FDFS_STORAGE_STATUS_INIT = 0
+FDFS_STORAGE_STATUS_WAIT_SYNC = 1
+FDFS_STORAGE_STATUS_SYNCING = 2
+FDFS_STORAGE_STATUS_IP_CHANGED = 3
+FDFS_STORAGE_STATUS_DELETED = 4
+FDFS_STORAGE_STATUS_OFFLINE = 5
+FDFS_STORAGE_STATUS_ONLINE = 6
+FDFS_STORAGE_STATUS_ACTIVE = 7
+FDFS_STORAGE_STATUS_RECOVERY = 9
+FDFS_STORAGE_STATUS_NONE = 99
+
+
+class Storage_server(object):
+    '''Class storage server for upload.'''
+
+    def __init__(self):
+        self.ip_addr = None
+        self.port = None
+        self.group_name = ''
+        self.store_path_index = 0
+
+
+# Class tracker_header
+class Tracker_header(object):
+    '''
+    Class for Pack or Unpack tracker header
+        struct tracker_header{
+            char pkg_len[FDFS_PROTO_PKG_LEN_SIZE],
+            char cmd,
+            char status,
+        }
+    '''
+
+    def __init__(self):
+        self.fmt = '!QBB'  # pkg_len[FDFS_PROTO_PKG_LEN_SIZE] + cmd + status
+        self.st = struct.Struct(self.fmt)
+        self.pkg_len = 0
+        self.cmd = 0
+        self.status = 0
+
+    def _pack(self, pkg_len=0, cmd=0, status=0):
+        return self.st.pack(pkg_len, cmd, status)
+
+    def _unpack(self, bytes_stream):
+        self.pkg_len, self.cmd, self.status = self.st.unpack(bytes_stream)
+        return True
+
+    def header_len(self):
+        return self.st.size
+
+    def send_header(self, conn):
+        '''Send Tracker header to server.'''
+        header = self._pack(self.pkg_len, self.cmd, self.status)
+        try:
+            conn._sock.sendall(header)
+        except (socket.error, socket.timeout) as e:
+            raise ConnectionError('[-] Error: while writting to socket: %s' % (e.args,))
+
+    def recv_header(self, conn):
+        '''Receive response from server.
+           if sucess, class member (pkg_len, cmd, status) is response.
+        '''
+        try:
+            header = conn._sock.recv(self.header_len())
+        except (socket.error, socket.timeout) as e:
+            raise ConnectionError('[-] Error: while reading from socket: %s' % (e.args,))
+        self._unpack(header)
+
+
+def fdfs_pack_metadata(meta_dict):
+    ret = ''
+    for key in meta_dict:
+        ret += '%s%c%s%c' % (key, FDFS_FIELD_SEPERATOR, meta_dict[key], FDFS_RECORD_SEPERATOR)
+    return ret[0:-1]
+
+
+def fdfs_unpack_metadata(bytes_stream):
+    li = bytes_stream.split(FDFS_RECORD_SEPERATOR)
+    return dict([item.split(FDFS_FIELD_SEPERATOR) for item in li])
--- a/fdfs_client/fdfs_test.py
+++ b/fdfs_client/fdfs_test.py
--- a/fdfs_client/storage_client.py
+++ b/fdfs_client/storage_client.py
--- a/fdfs_client/tracker_client.py
+++ b/fdfs_client/tracker_client.py
--- a/fdfs_client/utils.py
+++ b/fdfs_client/utils.py
+#!/usr/bin/env python
+# -*- coding = utf-8 -*-
+# filename: utils.py
+
+import io
+import os
+import sys
+import stat
+import platform
+import configparser
+
+SUFFIX = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
+__os_sep__ = "/" if platform.system() == 'Windows' else os.sep
+
+
+def appromix(size, base=0):
+    '''Conver bytes stream size to human-readable format.
+    Keyword arguments:
+    size: int, bytes stream size
+    base: int, suffix index
+    Return: string
+    '''
+    multiples = 1024
+    if size < 0:
+        raise ValueError('[-] Error: number must be non-negative.')
+    if size < multiples:
+        return '{0:d}{1}'.format(size, SUFFIX[base])
+    for suffix in SUFFIX[base:]:
+        if size < multiples:
+            return '{0:.2f}{1}'.format(size, suffix)
+        size = size / float(multiples)
+    raise ValueError('[-] Error: number too big.')
+
+
+def get_file_ext_name(filename, double_ext=True):
+    li = filename.split(os.extsep)
+    if len(li) <= 1:
+        return ''
+    else:
+        if li[-1].find(__os_sep__) != -1:
+            return ''
+    if double_ext:
+        if len(li) > 2:
+            if li[-2].find(__os_sep__) == -1:
+                return '%s.%s' % (li[-2], li[-1])
+    return li[-1]
+
+
+class Fdfs_ConfigParser(configparser.RawConfigParser):
+    """ 
+    Extends ConfigParser to allow files without sections. 
+ 
+    This is done by wrapping read files and prepending them with a placeholder 
+    section, which defaults to '__config__' 
+    """
+
+    def __init__(self, default_section=None, *args, **kwargs):
+        configparser.RawConfigParser.__init__(self, *args, **kwargs)
+
+        self._default_section = None
+        self.set_default_section(default_section or '__config__')
+
+    def get_default_section(self):
+        return self._default_section
+
+    def set_default_section(self, section):
+        self.add_section(section)
+
+        # move all values from the previous default section to the new one 
+        try:
+            default_section_items = self.items(self._default_section)
+            self.remove_section(self._default_section)
+        except configparser.NoSectionError:
+            pass
+        else:
+            for (key, value) in default_section_items:
+                self.set(section, key, value)
+
+        self._default_section = section
+
+    def read(self, filenames):
+        if isinstance(filenames, str):
+            filenames = [filenames]
+
+        read_ok = []
+        for filename in filenames:
+            try:
+                with open(filename) as fp:
+                    self.readfp(fp)
+            except IOError:
+                continue
+            else:
+                read_ok.append(filename)
+
+        return read_ok
+
+    def readfp(self, fp, *args, **kwargs):
+        stream = io.StringIO()
+
+        try:
+            stream.name = fp.name
+        except AttributeError:
+            pass
+
+        stream.write('[' + self._default_section + ']\n')
+        stream.write(fp.read())
+        stream.seek(0, 0)
+
+        return self._read(stream, stream.name)
+
+    def write(self, fp):
+        # Write the items from the default section manually and then remove them 
+        # from the data. They'll be re-added later. 
+        try:
+            default_section_items = self.items(self._default_section)
+            self.remove_section(self._default_section)
+
+            for (key, value) in default_section_items:
+                fp.write("{0} = {1}\n".format(key, value))
+
+            fp.write("\n")
+        except configparser.NoSectionError:
+            pass
+
+        configparser.RawConfigParser.write(self, fp)
+
+        self.add_section(self._default_section)
+        for (key, value) in default_section_items:
+            self.set(self._default_section, key, value)
+
+    def _read(self, fp, fpname):
+        """Parse a sectioned setup file.
+
+        The sections in setup file contains a title line at the top,
+        indicated by a name in square brackets (`[]'), plus key/value
+        options lines, indicated by `name: value' format lines.
+        Continuations are represented by an embedded newline then
+        leading whitespace.  Blank lines, lines beginning with a '#',
+        and just about everything else are ignored.
+        """
+        cursect = None  # None, or a dictionary
+        optname = None
+        lineno = 0
+        e = None  # None, or an exception
+        while True:
+            line = fp.readline()
+            if not line:
+                break
+            lineno = lineno + 1
+            # comment or blank line?
+            if line.strip() == '' or line[0] in '#;':
+                continue
+            if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR":
+                # no leading whitespace
+                continue
+            # continuation line?
+            if line[0].isspace() and cursect is not None and optname:
+                value = line.strip()
+                if value:
+                    cursect[optname] = "%s\n%s" % (cursect[optname], value)
+            # a section header or option header?
+            else:
+                # is it a section header?
+                mo = self.SECTCRE.match(line)
+                if mo:
+                    sectname = mo.group('header')
+                    if sectname in self._sections:
+                        cursect = self._sections[sectname]
+                    elif sectname == DEFAULTSECT:
+                        cursect = self._defaults
+                    else:
+                        cursect = self._dict()
+                        cursect['__name__'] = sectname
+                        self._sections[sectname] = cursect
+                    # So sections can't start with a continuation line
+                    optname = None
+                # no section header in the file?
+                elif cursect is None:
+                    raise MissingSectionHeaderError(fpname, lineno, line)
+                # an option line?
+                else:
+                    mo = self.OPTCRE.match(line)
+                    if mo:
+                        optname, vi, optval = mo.group('option', 'vi', 'value')
+                        if vi in ('=', ':') and ';' in optval:
+                            # ';' is a comment delimiter only if it follows
+                            # a spacing character
+                            pos = optval.find(';')
+                            if pos != -1 and optval[pos - 1].isspace():
+                                optval = optval[:pos]
+                        optval = optval.strip()
+                        # allow empty values
+                        if optval == '""':
+                            optval = ''
+                        optname = self.optionxform(optname.rstrip())
+                        if optname in cursect:
+                            if not isinstance(cursect[optname], list):
+                                cursect[optname] = [cursect[optname]]
+                            cursect[optname].append(optval)
+                        else:
+                            cursect[optname] = optval
+                    else:
+                        # a non-fatal parsing error occurred.  set up the
+                        # exception but keep going. the exception will be
+                        # raised at the end of the file and will contain a
+                        # list of all bogus lines
+                        if not e:
+                            e = ParsingError(fpname)
+                        e.append(lineno, repr(line))
+        # if any parsing errors occurred, raise an exception
+        if e:
+            raise e
+
+
+def split_remote_fileid(remote_file_id):
+    '''
+    Splite remote_file_id to (group_name, remote_file_name)
+    arguments:
+    @remote_file_id: string
+    @return tuple, (group_name, remote_file_name)
+    '''
+    index = remote_file_id.find(b'/')
+    if -1 == index:
+        return None
+    return (remote_file_id[0:index], remote_file_id[(index + 1):])
+
+
+def fdfs_check_file(filename):
+    ret = True
+    errmsg = ''
+    if not os.path.isfile(filename):
+        ret = False
+        errmsg = '[-] Error: %s is not a file.' % filename
+    elif not stat.S_ISREG(os.stat(filename).st_mode):
+        ret = False
+        errmsg = '[-] Error: %s is not a regular file.' % filename
+    return (ret, errmsg)
+
+
+if __name__ == '__main__':
+    print(get_file_ext_name('/bc.tar.gz'))
--- a/huilv.py
+++ b/huilv.py
+# -*- coding: utf-8 -*-
+
+import requests
+import datetime
+from pyquery import PyQuery as pq
+# import cx_Oracle
+import urllib3
+urllib3.disable_warnings()
+# conn = cx_Oracle.connect('cis', 'cis_zzsn9988', '114.116.91.1:1521/ORCL')
+# cursor = conn.cursor()
+result_list1 = [
+    [
+        '人民币',
+        'CNY'],
+    [
+        '美元',
+        'USD'],
+    [
+        '欧元',
+        'EUR'],
+    [
+        '瑞士法郎',
+        'CHF'],
+    [
+        '加元',
+        'CAD'],
+    [
+        '波兰兹罗提',
+        'PLN'],
+    [
+        '英镑',
+        'GBP'],
+    [
+        '澳元',
+        'AUD'],
+    [
+        '泰铢',
+        'THB'],
+    [
+        '沙特里亚尔',
+        'SAR'],
+    [
+        '巴西里亚伊',
+        'BRL'],
+    [
+        '新土耳其新里拉',
+        'TRY'],
+    [
+        '新台币',
+        'TWD'],
+    [
+        '印度卢比',
+        'INR'],
+    [
+        '墨西哥比索',
+        'MXN'],
+    [
+        '日元',
+        'JPY'],
+    [
+        '瑞典克朗',
+        'SEK'],
+    [
+        '韩元',
+        'KRW'],
+    [
+        '俄罗斯卢布',
+        'RUB'],
+    [
+        '新加坡元',
+        'SGD'],
+    [
+        '港币',
+        'HKD']]
+result_list2 = [
+    'USD',
+    'CNY']
+headers = {
+    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+    'Accept-Encoding':'gzip, deflate, br',
+    'Accept-Language':'zh-CN,zh;q=0.9',
+    'Cache-Control':'no-cache',
+    'Connection':'keep-alive',
+    'Cookie':'ASPSESSIONIDQQRTBSSA=CJNLDGJALHNJGKBKPEBCDCOE; Hm_lvt_ecdd6f3afaa488ece3938bcdbb89e8da=1692951904; Hm_lpvt_ecdd6f3afaa488ece3938bcdbb89e8da=1692951904',
+    'Host':'qq.ip138.com',
+    'Pragma':'no-cache',
+    'Sec-Fetch-Dest':'document',
+    'Sec-Fetch-Mode':'navigate',
+    'Sec-Fetch-Site':'none',
+    'Sec-Fetch-User':'?1',
+    'Upgrade-Insecure-Requests':'1',
+    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+    'sec-ch-ua':'"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
+    'sec-ch-ua-mobile':'?0',
+    'sec-ch-ua-platform':'"Windows"'
+     }
+for result1 in result_list1:
+    currency_name = result1[0]
+    currency = result1[1]
+    to_USD = ''
+    to_CNY = ''
+    for i in range(len(result_list2)):
+        result2 = result_list2[i]
+        # https://qq.ip138.com/hl.asp?from=CNY&to=USD&q=1
+        # url = f'''https://qq.ip138.com/hl.asp?from={currency}&to={result2}&q=1'''
+        # res = requests.get(url, headers,verify=False)
+        res = requests.get('https://qq.ip138.com/hl.asp?from=CNY&to=USD&q=1')
+        resp_text=res.content
+        doc_resp = pq(resp_text)
+        money = doc_resp('table tr:nth-child(3) td:nth-child(3)').text()
+        if money == '1':
+            money_result = money
+        else:
+            try:
+                money_result = round(float(money), 4)
+            except:
+                continue
+        if i == 0:
+            to_USD = money_result
+        else:
+            to_CNY = money_result
+    now = datetime.datetime.now()
+    now_time = now.strftime('%Y-%m-%d')
+    if to_USD == '' or to_CNY == '':
+        continue
+    result_dict = {
+        '币种': currency_name,
+        '币简称': currency,
+        '对美元': to_USD,
+        '对人民币': to_CNY,
+        '更新时间': now_time }
+    print(result_dict)
+    # sql = "insert into CIS_QCC_CURRENCY_RATE(ID, CURRENCY_NAME, CURRENCY_CODE, RATE_TO_USD, RATE_TO_CNY,CREATE_DATE) values (SEQ_CIS_QCC_CURRENCY_RATE.nextval,'{0}','{1}','{2}','{3}','{4}')".format(currency_name, currency, to_USD, to_CNY, now_time)
+    # cursor.execute(sql)
+    # conn.commit()
+
--- a/tmp/usVsRussia/BaseCore.py
+++ b/tmp/usVsRussia/BaseCore.py
--- a/tmp/usVsRussia/downPdf.py
+++ b/tmp/usVsRussia/downPdf.py
@@ -7,7 +7,7 @@ import requests
 import urllib3
 from pymysql.converters import escape_string

-from base.BaseCore import BaseCore
+from BaseCore import BaseCore
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


@@ -50,8 +50,9 @@ def get_file_name(headers):
 def downFile(url,path):
    try:
        baseCore.mkPath(path)
-        proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
-        response = requests.get(url, proxies=proxy, headers=headers, verify=False,timeout=10)
+        # proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+        response = requests.get(url, headers=headers, verify=False, timeout=10)
+        # response = requests.get(url, proxies=proxy, headers=headers, verify=False,timeout=10)
        fileName = get_file_name(response.headers)
        with open(os.path.join(path, fileName), "wb") as pyFile:
            for chunk in response.iter_content(chunk_size=1024):

--- a/tmp/usVsRussia/white_House.py
+++ b/tmp/usVsRussia/white_House.py
+import os
+
+import pandas as pd
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+from pymysql.converters import escape_string
+import downPdf
+from BaseCore import BaseCore
+from datetime import datetime
+baseCore = BaseCore()
+log =baseCore.getLogger()
+headers = {
+    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+    'accept-encoding': 'gzip, deflate, br',
+    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
+    'cache-control': 'max-age=0',
+    # 'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
+    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': "Windows",
+    'sec-fetch-dest': 'document',
+    'sec-fetch-mode': 'navigate',
+    'sec-fetch-site': 'same-origin',
+    'sec-fetch-user': '?1',
+    'upgrade-insecure-requests': '1',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
+}
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+
+
+def job_2():
+
+    log.info('----开始采集---俄罗斯国家杂志----')
+    # path = 'D:chrome/chromedriver.exe'
+    # driverContent = baseCore.buildDriver(path, headless=False)
+    for i in range(1,139):
+        if i == 1:
+            url = 'https://www.whitehouse.gov/?s=Russia'
+        else:
+            url = f'https://www.whitehouse.gov/?s=Russia&paged={i}'
+        req = requests.get(url,headers)
+        soup = BeautifulSoup(req.content,'html.parser')
+        container = soup.find('col-md-10 col-lg-6',class_='col-md-10 col-lg-6')
+        web_list = container.find_all('article')
+        for web in web_list:
+
+            title = web.find('h2',class_='entry-title')
+            newsTitle = title.find('a').text
+            newsUrl = title.find('a')['href']
+            date_string = web.find('div',class_='entry-meta shared-meta').find('time').text
+            #时间格式转化
+            date_object = datetime.strptime(date_string, "%B %d, %Y")
+            pub_time = date_object.strftime("%Y-%m-%d")
+            print(pub_time)
+            pdf_name = web.find('div',class_='infoindocumentlist').find_all('div')[0].find('span',class_='info-data').text
+            #下载pdf
+            path=r'D:\美国VS俄罗斯制裁'
+            path = os.path.join(path, downPdf.getPath(newsTitle))
+            downFile(pdfUrl,path,pdf_name)
+            selectCountSql = f"select * from usvsrussia where url = '{pdfUrl}' "
+            cursor.execute(selectCountSql)
+            url = cursor.fetchone()
+            if url:
+                log.info("已采集，跳过")
+                continue
+            else:
+                pass
+            insertSql = f"insert into  usvsrussia (website,url,title,pub_time,state,pdf_name,pdf_path,create_time) values ('总统令文件','{pdfUrl}','{escape_string(pdftitle)}','{pub_time}',0,'{pdf_name}','{path}',now() )"
+            # log.info(insertSql)
+            cursor.execute(insertSql)
+            cnx.commit()
+        break
\ No newline at end of file
--- a/tmp/usVsRussia/whitehouse.py
+++ b/tmp/usVsRussia/whitehouse.py
+import pandas as pd
+import requests
+import urllib3
+from pyquery import PyQuery as pq
+urllib3.disable_warnings()
+
+def getHtml(url):
+    try:
+        # proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+        response = requests.get(url,verify=False,timeout=10)
+        html=response.text
+    except Exception as e:
+        html=''
+    return html
+
+def getList():
+    for i in range(17,139):
+        print(f'============开始采集第{i}页=========')
+        url=f'https://www.whitehouse.gov/?s=Russia&paged={i}'
+        html=getHtml(url)
+        if html:
+            pass
+        else:
+            for nnn in range(0, 3):
+                html = getHtml(url)
+                if html:
+                    break
+                else:
+                    continue
+        try:
+            doc=pq(html)
+        except:
+            return
+        ac=doc('div[class="col-md-10 col-lg-6"]>article')
+        for ii in range(0,len(ac)):
+            doc=pq(ac[ii])
+            title=doc('h2[class="entry-title"]>a').text()
+            url=doc('h2[class="entry-title"]>a').attr('href')
+            summary=doc('div[class="post-content"]').text()
+            try:
+                time=doc('time').attr('datetime').split('T')[0]
+            except:
+                time = ''
+            type=doc('span[class="tax-links cat-links"]>a').text()
+            detail={
+                'title':title,
+                'url':url,
+                'time':time,
+                'type':type,
+                'summary':summary,
+            }
+            getDetail(detail)
+            print(f'=============第{i}页===第{ii}条采集完成==========')
+
+def getDetail(detail):
+    detailList=[]
+    url=detail['url']
+    html=getHtml(url)
+    if html:
+        pass
+    else:
+        for nnn in range(0,3):
+            html = getHtml(url)
+            if html:
+                break
+            else:
+                continue
+    try:
+        doc=pq(html)
+    except:
+        return
+    content=doc('section[class="body-content"]').text()
+    detail['content'] = content
+    detailList.append(detail)
+    writerToExcel(detailList)
+    return content
+
+# 将数据追加到excel
+def writerToExcel(detailList):
+    # filename='baidu搜索.xlsx'
+    # 读取已存在的xlsx文件
+    existing_data = pd.read_excel(filename)
+    # 创建新的数据
+    new_data = pd.DataFrame(data=detailList)
+    # 将新数据添加到现有数据的末尾
+    combined_data = existing_data.append(new_data, ignore_index=True)
+    # 将结果写入到xlsx文件
+    combined_data.to_excel(filename, index=False)
+
+from openpyxl import Workbook
+if __name__ == '__main__':
+    # # 创建一个工作簿
+    filename='./cis.xlsx'
+    workbook = Workbook()
+    workbook.save(filename)
+    getList()
\ No newline at end of file