Merge remote-tracking branch 'origin/master'

9a76d2aa · 丁双波 · cbfd2391 · a2f6dae1 · 9a76d2aa · 9a76d2aa
--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
@@ -421,6 +421,7 @@ def NQEnterprise():
    nq_social_list = [item[0] for item in nq_result]
    for item in nq_social_list:
+        #新三板企业财务数据 上市信息 核心人员已采集  企业动态、企业公告未采集 企业公告脚本已开发，企业动态需要每天放入redis
        # r.rpush('NQEnterprise:nq_Ipo', item)
        r.rpush('NQEnterprise:nq_finance',item)
        # r.rpush('NQEnterprise:nq_notice',item)
@@ -451,11 +452,26 @@ def omeng():
 #单项冠军
 def danxiangguanjun():
-    pass
+    cnx, cursor = connectSql()
+    query = "SELECT CompanyName FROM champion"
+    cursor.execute(query)
+    result = cursor.fetchall()
+    cnx.commit()
+    com_namelist = [item[0] for item in result]
+    for item in com_namelist:
+        r.rpush('champion:baseinfo',item)
 #科改示范
 def kegaishifan():
-    pass
+    cnx, cursor = connectSql()
+    query = "SELECT CompanyName FROM technological"
+    cursor.execute(query)
+    result = cursor.fetchall()
+    cnx.commit()
+    com_namelist = [item[0] for item in result]
+    for item in com_namelist:
+        r.rpush('technological:baseinfo',item)
 #双百企业
 def shuangbaiqiye():
@@ -467,6 +483,8 @@ def zhuangjingtexind():
 if __name__ == "__main__":
    start = time.time()
+    # danxiangguanjun()
+    kegaishifan()
    # NoticeEnterprise()
    # AnnualEnterpriseIPO()
    # AnnualEnterprise()
@@ -477,7 +495,7 @@ if __name__ == "__main__":
    # FBS()
    # MengZhi()
    # NQEnterprise()
-    SEC_CIK()
+    # SEC_CIK()
    # omeng()
    # AnnualEnterpriseUS()
    # NoticeEnterprise_task()

--- a/comData/SEC_US/base_info_us.py
+++ b/comData/SEC_US/base_info_us.py
@@ -85,7 +85,22 @@ if __name__=='__main__':
            ein = jsonData['ein']  # 联邦税号
            address = jsonData['addresses']
            city = address['business']['city']
-            business_address = address['business']['street1'] + ',' + city + ' ' + address['business']['stateOrCountryDescription']
+            try:
+                if city:
+                    business_address = address['business']['street1'] + ',' + city + ' ' + address['business'][
+                        'stateOrCountryDescription']
+                else:
+                    business_address = address['business']['stateOrCountryDescription']
+            except:
+                try:
+                    business_address = address['business']['street1'] + ',' + city
+                except:
+                    try:
+                        business_address = city + ' ' + address['business']['stateOrCountryDescription']
+                    except:
+                        business_address = ''
+            # city = address['business']['city']
+            # business_address = address['business']['street1'] + ',' + city + ' ' + address['business']['stateOrCountryDescription']
            phone = jsonData['phone']  # 电话
            try:
                formerNames = jsonData['formerNames'][0]['name']   # 曾用名

--- a/comData/SEC_US/finance_us.py
+++ b/comData/SEC_US/finance_us.py
+"""
+解析json数据 两个链接：
+            https://data.sec.gov/api/xbrl/companyfacts/CIK0000320193.json 数据值和gaap字段
+            https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/MetaLinks.json html字段和gaap字段映射
+step1：拼接链接
+step2：
+"""
+import json
+import time
+import requests
+from kafka import KafkaProducer
+from operator import itemgetter
+from itertools import groupby
+from base.BaseCore import BaseCore
+# import urllib3
+# urllib3.disable_warings()
+baseCore = BaseCore()
+log = baseCore.getLogger()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+def fromcikgetinfo(cik):
+    query = f"select * from mgzqyjwyh_list where cik='{cik}' "
+    cursor.execute(query)
+    data = cursor.fetchone()
+    return data
+def getRequest(url):
+    headers = {
+        'Host': 'data.sec.gov',
+        'Connection': 'keep-alive',
+        'Cache-Control': 'max-age=0',
+        'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'Upgrade-Insecure-Requests': '1',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Sec-Fetch-Site': 'none',
+        'Sec-Fetch-Mode': 'navigate',
+        'Sec-Fetch-User': '?1',
+        'Sec-Fetch-Dest': 'document',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cookie': '_ga=GA1.2.784424676.1695174651; _4c_=%7B%22_4c_s_%22%3A%22lZFLT4QwFIX%2FyqRrILS0pbAzmBgXajQ%2BlhNpLwOZcUoKDo4T%2Fru3gMbHym5ov55zcjk9kaGGPcmpzARNuVRcxElAtnDsSH4irjH%2BcyA50awsDTUq1ElShZwZCMuKmbASSQUUKsYoIwF5w6w0ZpmIpeBKqTEgul0yTkRbA5hFs4iqKA6rDh39OxKuYty2zppX3a%2F7Y%2BtlA5SrzmzxwsCh0bAeGtPX3s8m%2BUJraDZ1jzhlE22dl0QC90OzN3b47Vvol0%2BkFGnp7NCB9xa1sy%2BwolQitlgEeZocfloHFTg3yfDUNb0ftAMdbexhAVjezMKZPTaemtV9cYf8%2Bhu5LW6uFtT6jv0YO6ufdz4UnyUgF2frh8tz%2F2%2BKc8ZlKqPPpxKUjHPfCJiksRAZldhnvyO5kjz2a5yTp%2FrpTzVXWfZXPbcQ%2Bulh%2Fx%2FrOH4A%22%7D; _ga_300V1CHKH1=GS1.1.1695174651.1.1.1695174684.0.0.0; ak_bmsc=91C6D28D093861656DB8C1FC1972DAB6~000000000000000000000000000000~YAAQlQ8kF2U6orCKAQAAgyl9uxX8kNk3C77pkMi6N6RxnsUqDbYEmIcNjtLSa8W6kfGL9cQMRHBUaYcbEA1+oXsvUwUF80G8hmH/F4S0ZOEnVCrlcBLx219N24l2qmoSKtVDH+VKe7c1bji9MHc7tO2R56R7juZJv9gceAdtKEuArkPfD8ijx/TyEgIrM+XruGtzCRmLnfq86UoJYP+j+tXcaWkc/qm1zHDReDNf/cHd6h2aRMs4lsES8+uh6YTjE7bfCp8h2DNJ2e07pm0ojcI/kdycUPHmuTqWPdTBEjUybad31E1hRNBAE8PbGjy2lvlPY/piuN3HX3Q5ifsmTqCNJzynN2kjGm6i4SHhmEAijUeIzNQXB11GrVmALJVV6pEjd/uu; bm_sv=FD8981426EA388050697DFB615BAFFE3~YAAQ1wcsF5K72ZSKAQAAsvl/uxUw0do3nknGCkllXH27UZBpM7kQUXm4crBNTAkhek5YSDKIrrm2uFWidfpBfyxbRSr+w7FH7Y0w4cXMAa7BELzcc/B9Uf8T6e2I2W29wjurKkBFtSseslHSqYD3BWx9/GidJMW+dFNrlzNUMd1dONUR9J1TDnYifPhE6A/zSLPHVrCTJl7xzg7VlW/05Ay0i+Bo7TynZdWgotfjET3vg2/ZVixVSGaWeQo4~1'
+    }
+    for m in range(0,3):
+        try:
+            response = requests.get(url=url,headers=headers,verify=False)
+            break
+        except Exception as e:
+            log.error(f"request请求异常-------{e}")
+            continue
+    # 检查响应状态码
+    if response.status_code == 200:
+        jsonData = response.json()
+        return jsonData
+    else:
+        return False
+if __name__=='__main__':
+    taskType = '财务数据/SEC'
+    zcfzb_mapping = {
+        'AccountsAndOtherReceivablesNetCurrent':'指标1'
+    }
+    lrb_mapping = {
+    }
+    xjllb_mapping = {
+    }
+    while True:
+        start_time = time.time
+        # todo:从redis中获取企业cik
+        # cik = baseCore.redicPullData('sec_cik_US:uscik')
+        cik = '320193'
+        #通过cik去数据库中获取信息
+        data = fromcikgetinfo(cik)
+        com_name = data[2]
+        com_code = data[3]
+        exchange = data[4]
+        #拼接链接的cik是十位数
+        url_cik = cik
+        while True:
+            if len(url_cik) < 10:
+                url_cik = '0' + url_cik
+            else:
+                break
+        url = f'https://data.sec.gov/api/xbrl/companyfacts/CIK{url_cik}.json'
+        jsonData = getRequest(url)
+        if jsonData:
+            pass
+        print(jsonData)
+        try:
+            us_gaap = jsonData['facts']['us-gaap']
+        except:
+            continue
+        # 遍历map的key值
+        Listzcfzb = []
+        for key in zcfzb_mapping.keys():
+            # 一个财务指标的所有年份和金额
+            usd_list = us_gaap[key]['units']['USD']
+            # form: 10-K fp: FY
+            for j in usd_list:
+                form = usd_list[j]['form']
+                fp = usd_list[j]['fp']
+                if form=='10-K' and fp=='FY':
+                    pass
+                else:
+                    continue
+                date = usd_list[j]['end']
+                if date.endswith('03-31') or date.endswith('06-30') or date.endswith('09-30') or date.endswith('12-31'):
+                    pass
+                else:
+                    continue
+                val = usd_list[j]['val']
+                zcfzb_dic ={
+                    'zbname': key,
+                    'riqi': date,
+                    'jine': val,
+                    'fp': fp,
+                    'form': form
+                }
+                # 资产负债表所有年份指标
+                Listzcfzb.append(zcfzb_dic)
+        Listzcfzb.sort(key=itemgetter('riqi'))
+        groups = groupby(Listzcfzb, key=itemgetter('riqi'))
+        # 遍历每个分组，并打印分类结果
+        for riqi, group in groups:
+            print(f"riqi: {riqi}")
+            # 迭代表达式
+            listbydate = [item for item in group]
+            print()
--- a/comData/SEC_US/us_finance_.py
+++ b/comData/SEC_US/us_finance_.py
+"""从html页面中抽取表格"""
+import requests
+from bs4 import BeautifulSoup
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+def getRequest(url):
+    headers = {
+        'Referer': 'https://www.sec.gov/ix?doc=/Archives/edgar/data/356037/000035603723000038/cspi-20230630x10q.htm',
+        'Sec-Ch-Ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'Sec-Ch-Ua-Mobile': '?0',
+        'Sec-Ch-Ua-Platform': '"Windows"',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31',
+           }
+    for m in range(0,3):
+        try:
+            response = requests.get(url=url,headers=headers,verify=False)
+            break
+        except Exception as e:
+            log.error(f"request请求异常-------{e}")
+            continue
+    # 检查响应状态码
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content,'html.parser')
+        return soup
+    else:
+        return False
+def getzcfztable(soup):
+    table_list = soup.find_all('table')
+    for table in table_list:
+        aa = table.find_all(text='Current assets:')
+        if aa:
+            # print(table)
+            trlist = table.find_all('tr')
+            date1 = trlist[1].find_all('td')[1].text.replace('\n', '')
+            date2 = trlist[1].find_all('td')[-1].text.replace('\n', '')
+            print(date1, date2)
+            # todo:把td内容为空的去掉
+            for tr in trlist[2:]:
+                filtered_tags = tr(lambda tag: tag.name == 'td' and '$' in tag.text)
+                for tag in filtered_tags:
+                    tag.extract()
+                # filtered_tags2 = tr(lambda tag:tag.name=='td' and tag.text==' ')
+                filtered_tags2 = tr(lambda tag: tag.name == 'td' and tag.text == '')
+                for tag in filtered_tags2:
+                    tag.extract()
+                try:
+                    zbtag = tr.find_all('td')[0].text.replace('\n', '')
+                except:
+                    zbtag = ''
+                try:
+                    cash1 = tr.find_all('td')[1].text.replace('\n', '')
+                except:
+                    cash1 = ''
+                try:
+                    cash2 = tr.find_all('td')[2].text.replace('\n', '')
+                except:
+                    cash2 = ''
+                if zbtag != '' and cash1 != '' and cash2 != '':
+                    print(f'字段:{zbtag}  值1:{cash1}  值2:{cash2}')
+if __name__=='__main__':
+    url = 'https://www.sec.gov/Archives/edgar/data/320193/000032019321000105/aapl-20210925.htm'
+    soup = getRequest(url)
+    #html解析表格 资产负债表
+    getzcfztable(soup)
--- a/comData/Tyc/tyc_qydt_add.py
+++ b/comData/Tyc/tyc_qydt_add.py
 import json
+import random
 import requests, time, pymysql
 import jieba
 import sys
@@ -45,24 +47,21 @@ def beinWork(tyc_code, social_code,start_time):
    retData = {'total': 0, 'up_okCount': 0, 'up_errorCount': 0, 'up_repetCount': 0}
    t = time.time()
    url = f'https://capi.tianyancha.com/cloud-yq-news/company/detail/publicmsg/news/webSimple?_={t}&id={tyc_code}&ps={pageSize}&pn=1&emotion=-100&event=-100'
-    for m in range(0, 3):
+    try:
-        try:
+        for m in range(0, 3):
            ip = baseCore.get_proxy()
            headers['User-Agent'] = baseCore.getRandomUserAgent()
            response = requests.get(url=url, headers=headers, proxies=ip, verify=False)
-            # time.sleep(random.randint(3, 5))
+            time.sleep(random.randint(3, 5))
            break
-        except Exception as e:
+        if (response.status_code == 200):
            pass
+    except Exception as e:
-    if (response.status_code == 200):
-        pass
-    else:
        log.error(f"{tyc_code}-----获取总数接口失败")
-        e = '获取总数接口失败'
+        error = '获取总数接口失败'
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
-        baseCore.recordLog(social_code, taskType, state, takeTime, url, e)
+        baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{error}----{e}')
        return retData
    try:
        json_1 = json.loads(response.content.decode('utf-8'))
@@ -177,7 +176,7 @@ def beinWork(tyc_code, social_code,start_time):
                    pass
                continue
            try:
-                insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
+                insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,publish_time,create_time) values(%s,%s,%s,%s,%s,now())'''
                # 动态信息列表
                up_okCount = up_okCount + 1
                list_info = [
@@ -185,6 +184,7 @@ def beinWork(tyc_code, social_code,start_time):
                    link,
                    '天眼查',
                    '2',
+                    time_format
                ]
                cursor_.execute(insert_sql, tuple(list_info))
                cnx_.commit()
@@ -214,10 +214,10 @@ def beinWork(tyc_code, social_code,start_time):
                }
            except Exception as e:
                log.info(f'传输失败:{social_code}----{link}')
-                e = '数据库传输失败'
+                error = '数据库传输失败'
                state = 0
                takeTime = baseCore.getTimeCost(start_time, time.time())
-                baseCore.recordLog(social_code, taskType, state, takeTime, link, e)
+                baseCore.recordLog(social_code, taskType, state, takeTime, link, f'{error}----{e}')
                continue
                # print(dic_news)
                # 将相应字段通过kafka传输保存

--- a/comData/annualReport/证监会-年报.py
+++ b/comData/annualReport/证监会-年报.py
 import json
@@ -21,6 +21,7 @@ tracker_conf = get_tracker_conf('./client.conf')
 client = Fdfs_client(tracker_conf)
 taskType = '企业年报/证监会'
+pathType = 'ZJHAnnualReport/'
 def RequestUrl(url, payload, item_id, start_time):
    # ip = get_proxy()[random.randint(0, 3)]
@@ -43,26 +44,26 @@ def RequestUrl(url, payload, item_id, start_time):
    return soup
-def tableUpdate(year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
+# def tableUpdate(year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
-                create_by, create_time, page_size):
+#                 create_by, create_time, page_size):
+#
-    sel_sql = '''select item_id from clb_sys_attachment where item_id = %s and year = %s and type_id=1'''
+#     sel_sql = '''select item_id from clb_sys_attachment where item_id = %s and year = %s and type_id=1'''
-    cursor_.execute(sel_sql, (item_id, year))
+#     cursor_.execute(sel_sql, (item_id, year))
-    selects = cursor_.fetchone()
+#     selects = cursor_.fetchone()
-    if selects:
+#     if selects:
-        print(f'{name_pdf},{year}已存在')
+#         print(f'{name_pdf},{year}已存在')
+#
-    else:
+#     else:
-        Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+#         Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+#
-        values = (
+#         values = (
-            year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
+#             year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
-            create_by,
+#             create_by,
-            create_time, page_size)
+#             create_time, page_size)
+#
-        cursor_.execute(Upsql, values)  # 插入
+#         cursor_.execute(Upsql, values)  # 插入
-        cnx.commit()  # 提交
+#         cnx.commit()  # 提交
-        print("更新完成:{}".format(Upsql))
+#         print("更新完成:{}".format(Upsql))
 # 采集信息
 def SpiderByZJH(url, payload, dic_info, num, start_time):
@@ -121,19 +122,24 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
                cursor_.execute(sel_sql, (item_id, year))
                selects = cursor_.fetchone()
                if selects:
-                    print(f'com_name:{short_name}、{year}已存在')
+                    log.info(f'com_name:{short_name}、{year}已存在')
                    continue
                else:
-                    retData = baseCore.upLoadToServe(pdf_url, 1, social_code)
+                    retData = baseCore.uptoOBS(pdf_url,name_pdf, 1, social_code,pathType,taskType,start_time)
+                    if retData['state']:
+                        pass
+                    else:
+                        log.info(f'====pdf解析失败====')
+                        return False
                    #插入数据库获取att_id
                    num = num + 1
                    att_id = baseCore.tableUpdate(retData, short_name, year, name_pdf, num)
-                    content = retData['content']
+                    if att_id:
-                    if retData['state']:
                        pass
                    else:
-                        log.info(f'====pdf解析失败====')
                        return False
+                    content = retData['content']
                    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    dic_news = {
                        'attachmentIds': att_id,
@@ -169,7 +175,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
                            'message': '操作成功',
                            'code': '200',
                        }
-                        print(dic_result)
+                        log.info(dic_result)
                        return True
                    except Exception as e:
                        dic_result = {
@@ -181,7 +187,7 @@ def SpiderByZJH(url, payload, dic_info, num, start_time):
                        state = 0
                        takeTime = baseCore.getTimeCost(start_time, time.time())
                        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, 'Kafka操作失败')
-                        print(dic_result)
+                        log.info(dic_result)
                        return False
            else:
                    continue
@@ -311,7 +317,8 @@ if __name__ == '__main__':
            time.sleep(20)
            continue
        dic_info = baseCore.getInfomation(social_code)
-        count = dic_info[15]
+        count = dic_info[16]
+        log.info(f'====正在采集{social_code}=====')
        # 沪市http://eid.csrc.gov.cn/101111/index.html 深市http://eid.csrc.gov.cn/101811/index.html 北交所http://eid.csrc.gov.cn/102611/index.html
        # url 变量 翻页 栏目 http://eid.csrc.gov.cn/101811/index_3_f.html
        url_parms = ['101111', '101811', '102611']
@@ -322,7 +329,7 @@ if __name__ == '__main__':
        dic_parms = getUrl(code, url_parms, Catagory2_parms)
        SpiderByZJH(dic_parms["url"], dic_parms["payload"], dic_info, num, start_time)
        end_time = time.time()
-        print(f'{dic_info[4]} ---- 该企业耗时 ---- {end_time - start_time}')
+        log.info(f'{dic_info[4]} ---- 该企业耗时 ---- {end_time - start_time}')
        count += 1
        runType = 'AnnualReportCount'
        baseCore.updateRun(social_code, runType, count)

--- a/comData/annualReport/雪球网-年报.py
+++ b/comData/annualReport/雪球网-年报.py
 # -*- coding: utf-8 -*-
@@ -152,24 +152,23 @@ def spider_annual_report(dict_info,num):
            cursor.execute(sel_sql, (social_code, int(year)))
            selects = cursor.fetchone()
            if selects:
-                print(f'com_name:{com_name}、{year}已存在')
+                log.info(f'com_name:{com_name}、{year}已存在')
                continue
            else:
-                page_size = 0
+                #上传文件至obs服务器
-                #上传文件至文件服务器
+                retData = baseCore.uptoOBS(pdf_url,name_pdf,1,social_code,pathType,taskType,start_time)
-                retData = baseCore.upLoadToServe(pdf_url,1,social_code)
+                if retData['state']:
+                    pass
+                else:
+                    log.info(f'====pdf解析失败====')
+                    return False
                num = num + 1
                try:
                    att_id = baseCore.tableUpdate(retData,com_name,year,name_pdf,num)
                    content = retData['content']
-                    if retData['state']:
-                        pass
-                    else:
-                        log.info(f'====pdf解析失败====')
-                        return False
                    state = 1
                    takeTime = baseCore.getTimeCost(start_time, time.time())
-                    baseCore.recordLog(social_code, taskType, state, takeTime, year_url, '')
+                    baseCore.recordLog(social_code, taskType, state, takeTime, year_url, '成功')
                except:
                    exception = '数据库传输失败'
                    state = 0
@@ -236,6 +235,7 @@ def spider_annual_report(dict_info,num):
 if __name__ == '__main__':
    num = 0
    taskType = '企业年报/雪球网'
+    pathType = 'XQWAnnualReport/'
    while True:
        start_time = time.time()
        # 获取企业信息

--- a/comData/caiwushuju/RedisPPData.py
+++ b/comData/caiwushuju/RedisPPData.py
@@ -14,6 +14,12 @@ def conn11():
    cursor = conn.cursor()
    return conn,cursor
+def conn144():
+    conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
+                           charset='utf8')
+    cursor = conn.cursor()
+    return conn,cursor
 #企业公告
 def shizhiCodeFromSql():
    conn,cursor=conn11()
@@ -31,6 +37,7 @@ def shizhiCodeFromSql():
    finally:
        cursor.close()
        conn.close()
 #企业公告
 def yahooCodeFromSql():
    conn,cursor=conn11()
@@ -49,6 +56,25 @@ def yahooCodeFromSql():
        cursor.close()
        conn.close()
+#新浪纽交所股票对应的代码
+def sinausstockCodeFromSql():
+    conn,cursor=conn144()
+    try:
+        gn_query = "select ticker from mgzqyjwyh_list where state=2 and exchange='NYSE'; "
+        cursor.execute(gn_query)
+        gn_result = cursor.fetchall()
+        gn_social_list = [item[0] for item in gn_result]
+        print('sinausstockCodeFromSql开始将股票代码放入redis=======')
+        for item in gn_social_list:
+            r.rpush('sina_usstock:securities_code', item)
+        print('sinausstockCodeFromSql将股票代码放入redis结束')
+    except Exception as e:
+        log.info("数据查询异常")
+    finally:
+        cursor.close()
+        conn.close()
 def yahooCode_task():
    # 实例化一个调度器
    scheduler = BlockingScheduler()
@@ -58,9 +84,12 @@ def yahooCode_task():
    scheduler.add_job(yahooCodeFromSql, 'cron', day='*/3', hour=0, minute=0)
    # 每天执行一次
    scheduler.add_job(shizhiCodeFromSql, 'cron', hour=10,minute=0)
+    # 每天执行一次
+    scheduler.add_job(sinausstockCodeFromSql, 'cron', day='*/3', hour=0, minute=0)
    try:
-        yahooCodeFromSql()  # 定时开始前执行一次
+        # yahooCodeFromSql()  # 定时开始前执行一次
-        shizhiCodeFromSql()  # 定时开始前执行一次
+        # shizhiCodeFromSql()  # 定时开始前执行一次
+        sinausstockCodeFromSql()  # 定时开始前执行一次
        scheduler.start()
    except Exception as e:
        print('定时采集异常', e)

--- a/comData/caiwushuju/YAHOO财务数据4.py
+++ b/comData/caiwushuju/YAHOO财务数据4.py
 # -*- coding: utf-8 -*-
@@ -373,6 +373,28 @@ class YahooCaiwu(object):
            currency=''
        return currency
+    #对比指标计算
+    def calculateIndexReq(self):
+        get_url = 'http://114.115.236.206:8088/sync/calculateIndex'
+        try:
+            params={
+                'type':2
+            }
+            resp = requests.get(get_url,params=params)
+            print(resp.text)
+            text=json.loads(resp.text)
+            codee=text['code']
+            while codee==-200:
+                time.sleep(600)
+                resp = requests.get(get_url)
+                print(resp.text)
+                text=json.loads(resp.text)
+                codee=text['code']
+                if  codee==-200:
+                    break
+            print('调用接口成功！！')
+        except:
+            print('调用失败！')
 if __name__ == '__main__':
    # parse_excel()
    #get_content1()
@@ -383,8 +405,11 @@ if __name__ == '__main__':
            securitiescode=yahoo.getCodeFromRedis()
            yahoo.get_content2(securitiescode)
        except Exception as e:
+            print('没有数据暂停5分钟')
+            yahoo.calculateIndexReq()
            if securitiescode:
                yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
            else:
                time.sleep(300)
+                print('没有数据暂停5分钟')
--- a/comData/caiwushuju/sina_usstock财务.py
+++ b/comData/caiwushuju/sina_usstock财务.py
 import configparser
@@ -20,6 +20,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 from operator import itemgetter
 from itertools import groupby
 import datetime
+from decimal import Decimal
 class SinaUsstock(object):
@@ -54,13 +55,19 @@ class SinaUsstock(object):
            seriesValue=tddoc.find('td').text().split(' ')
            for i in range(0,len(pdate)):
                value=seriesValue[i]
-                if '亿' in value:
+                try:
-                    value = value.replace("亿", "*100000000")
+                    if '亿' in value:
-                    value = eval(value)
+                        value = value.replace("亿", "").replace(",", "")
-                elif '万' in value:
+                        value = Decimal(value) * Decimal('100000000')
-                    value = value.replace("万", "*10000")
+                        # value = eval(value)
-                    value = eval(value)
+                    elif '万' in value:
-                vvla=str(value)
+                        value = value.replace("万", "").replace(",", "")
+                        value = Decimal(value) * Decimal('10000')
+                        # value = eval(value)
+                except Exception as e:
+                    print(e)
+                    print(value)
+                vvla=str(value).replace(",", "")
                serisemsg={
                    'name':seriesName,
                    'value':vvla,
@@ -71,6 +78,31 @@ class SinaUsstock(object):
        return seriesList
+    # 判断股票代码是否存在
+    def check_code(self,com_code):
+        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+        res = r.exists('com_sinacaiwushuju_code::'+com_code)
+        #如果key存在 则不是第一次采集该企业， res = 1
+        if res:
+            return False  #表示不是第一次采集
+        else:
+            return True #表示是第一次采集
+    def check_date(self,com_code,info_date):
+        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
+        res = r.sismember('com_sinacaiwushuju_code::'+com_code, info_date)  # 注意是 保存set的方式
+        if res:
+            return True
+        else:
+            return False
+    # 将采集后的股票代码对应的报告期保存进redis
+    def add_date(self,com_code,date_list):
+        r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+        #遍历date_list 放入redis
+        for date in date_list:
+            res = r.sadd('com_sinacaiwushuju_code::'+com_code,date)
    def getCodeFromRedis(self):
        securitiescode=self.r.lpop('sina_usstock:securities_code')
        securitiescode = securitiescode.decode('utf-8')
@@ -209,7 +241,7 @@ class SinaUsstock(object):
                    #转换数据格式发送接口
                annualzb=zbl1+zbl3+zbl5
-                annualzb=self.groupZbData(annualzb,stock,social_credit_code,'annual')
+                annualzb=self.groupZbData(annualzb,stock,social_credit_code,'year')
                self.sendToFinance(annualzb)
                quarterzb=zbl2+zbl4+zbl6
                quarterzb=self.groupZbData(quarterzb,stock,social_credit_code,'quarter')
@@ -228,15 +260,26 @@ class SinaUsstock(object):
    def sendToFinance(self,zbmsg):
            for zbb in zbmsg:
+                com_code=zbb['securitiesCode']
+                com_date=zbb['date']
+                #判断股票代码是否采集过
+                if self.check_code(com_code):
+                    zbb['ynFirst']=True
                if len(zbb) != 0:
                    # 调凯歌接口存储数据
                    data = json.dumps(zbb)
                    #暂无接口
-                    url_baocun = ''
+                    url_baocun = 'http://114.115.236.206:8088/sync/finance/sina'
                    # url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
                    for nnn in range(0, 3):
                        try:
                            res_baocun = requests.post(url_baocun, data=data)
+                            #将采集到的股票代码和日期进行记录用来标记是否采集过
+                            com_date_list=[]
+                            com_date_list.append(com_date)
+                            self.add_date(com_code,com_date)
                            self.logger.info(res_baocun.text)
                            break
                        except:
@@ -309,7 +352,7 @@ class SinaUsstock(object):
 if __name__ == '__main__':
    sinaUsstock=SinaUsstock()
    # securitiescode= sinaUsstock.r.lpop('sina_usstock:securities_code')
-    securitiescode= sinaUsstock.getCodeFromRedis()
+    # securitiescode= sinaUsstock.getCodeFromRedis()
    securitiescode='AAPL'
    try:
        sinaUsstock.get_content2(securitiescode)

--- a/comData/caiwushuju/东方财富网财务数据.py
+++ b/comData/caiwushuju/东方财富网财务数据.py
 """
@@ -176,7 +176,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                dic_info_zcfzb = {
                    "name": info_name,
                    'enName': info_name_en,
-                    "value": info_data
+                    "value": info_data,
+                    "unit": "元"
                }
                list_zcfzb.append(dic_info_zcfzb)
@@ -202,7 +203,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                dic_info_lrb = {
                    "name": info_name,
                    'enName': info_name_en,
-                    "value": info_data
+                    "value": info_data,
+                    "unit": "元"
                }
                list_lrb.append(dic_info_lrb)
@@ -228,7 +230,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                dic_info_xjllb = {
                    "name": info_name,
                    'enName': info_name_en,
-                    "value": info_data
+                    "value": info_data,
+                    "unit": "元"
                }
                list_xjllb.append(dic_info_xjllb)
@@ -356,7 +359,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                    dic_info_zcfzb = {
                        "name": info_name,
                        'enName': info_name_en,
-                        "value": info_data
+                        "value": info_data,
+                        "unit": '元'
                    }
                    list_zcfzb.append(dic_info_zcfzb)
@@ -382,7 +386,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                    dic_info_lrb = {
                        "name": info_name,
                        'enName': info_name_en,
-                        "value": info_data
+                        "value": info_data,
+                        'unit': '元'
                    }
                    list_lrb.append(dic_info_lrb)
@@ -408,7 +413,8 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                    dic_info_xjllb = {
                        "name": info_name,
                        'enName': info_name_en,
-                        "value": info_data
+                        "value": info_data,
+                        'unit':'元'
                    }
                    list_xjllb.append(dic_info_xjllb)

--- a/comData/caiwushuju/nasdaq_caiwu.py
+++ b/comData/caiwushuju/nasdaq_caiwu.py
@@ -8,10 +8,8 @@ import pymysql
 import redis
 import requests
 from bs4 import BeautifulSoup
-from requests.adapters import HTTPAdapter
 from requests.packages import urllib3
 from retry import retry
 from base import BaseCore
 urllib3.disable_warnings()
@@ -20,10 +18,7 @@ log = baseCore.getLogger()
 cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
                      charset='utf8mb4')
 cursor = cnx.cursor()
-URL = 'https://www.nasdaq.com/'
+r = baseCore.r
-session = requests.session()
-session.mount('https://', HTTPAdapter(pool_connections=20, pool_maxsize=100))
-session.mount('http://', HTTPAdapter(pool_connections=20, pool_maxsize=100))
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
 }
@@ -86,7 +81,7 @@ def getUnit(gpdm):
    req.encoding = req.apparent_encoding
    soup = BeautifulSoup(req.text, 'lxml')
    unit = soup.find('div', class_='financials__note').text.split(' ')[1].lstrip().strip()
-    unit = f'(千){unit}'
+    unit = f'{unit}(千)'
    req.close()
    return unit
@@ -104,9 +99,11 @@ def getlist(table, tableName):
                value = re.sub(r"[^\d+-]", "", value)
            else:
                value = '-'
-            date = years[f'value{i}'].split('/')[2] + '-' + years[f'value{i}'].split('/')[0] + '-' + \
+            date_ = years[f'value{i}']
-                   years[f'value{i}'].split('/')[1]
+            if date_:
-            list.append({f'{tableName}': name, 'value': value, 'date': date, })
+                date = date_.split('/')[2] + '-' + date_.split('/')[0] + '-' + \
+                       date_.split('/')[1]
+                list.append({f'{tableName}': name, 'value': value, 'date': date, })
    return list
@@ -136,13 +133,12 @@ def reviseData(lists, unit, tableName):
 # 获取年度财务数据
-def getYear(start_time, session, social_code, gpdm):
+def getYear(start_time, social_code, gpdm):
    ynFirst = check_code(social_code)
    date_list = []
    url = f'https://api.nasdaq.com/api/company/{gpdm}/financials?frequency=1'
    try:
-        req = session.get(url, headers=headers, verify=False)
+        req = requests.get(url, headers=headers, verify=False)
-        req.encoding = req.apparent_encoding
        data = req.json()['data']
        if data:
            unit = getUnit(gpdm)
@@ -162,6 +158,7 @@ def getYear(start_time, session, social_code, gpdm):
                # 判断该报告期是否已采过
                panduan = check_date(social_code, date + '-year')
                if panduan:
+                    log.info(f'{social_code}=={gpdm}=={date}年度数据采集过')
                    continue
                xjll_list_f = reviseData([item for item in final_list if 'xjll' in item], unit, 'xjll')
                zcfz_list_f = reviseData([item for item in final_list if 'zcfz' in item], unit, 'zcfz')
@@ -177,13 +174,15 @@ def getYear(start_time, session, social_code, gpdm):
                    "ynFirst": ynFirst,
                }
                sendData(start_time, social_code, gpdm, dic_info)
+                log.info(f'{social_code}=={gpdm}=={date}年度财务数据采集成功')
                date_list.append(date + '-year')
        else:
            log.error(f'找不到{social_code}=={gpdm}年度财务数据')
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===无年度财务数据')
-    except:
+    except Exception as e:
+        r.rpush('FinanceFromNasdaq:nasdaqfinance_socialCode', social_code)
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===年度财务数据访问失败')
@@ -192,13 +191,12 @@ def getYear(start_time, session, social_code, gpdm):
 # 获取季度财务数据  需要判断日期是否取与年度数据日期重合，重合需要修改类型为dateFlag字段为year
-def getQuarter(start_time, session, social_code, gpdm):
+def getQuarter(start_time, social_code, gpdm):
    ynFirst = check_code(social_code)
    date_list = []
    url = f'https://api.nasdaq.com/api/company/{gpdm}/financials?frequency=2'
    try:
-        req = session.get(url, headers=headers, verify=False)
+        req = requests.get(url, headers=headers, verify=False, timeout=60)
-        req.encoding = req.apparent_encoding
        data = req.json()['data']
        if data:
            unit = getUnit(gpdm)
@@ -217,6 +215,7 @@ def getQuarter(start_time, session, social_code, gpdm):
                # 判断该报告期是否已采过
                panduan = check_date(social_code, date + '-quarter')
                if panduan:
+                    log.info(f'{social_code}=={gpdm}=={date}季度数据采集过')
                    continue
                xjll_list_f = reviseData([item for item in final_list if 'xjll' in item], unit, 'xjll')
                zcfz_list_f = reviseData([item for item in final_list if 'zcfz' in item], unit, 'zcfz')
@@ -236,13 +235,16 @@ def getQuarter(start_time, session, social_code, gpdm):
                if panduan_flag:
                    dic_info['dateFlag'] = 'year'
                sendData(start_time, social_code, gpdm, dic_info)
+                log.info(f'{social_code}=={gpdm}=={date}季度财务数据采集成功')
                date_list.append(date + '-quarter')
        else:
+            log.error(f'{social_code}=={gpdm}无季度财务数据')
            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===无季度财务数据')
-    except:
+    except Exception as e:
+        r.rpush('FinanceFromNasdaq:nasdaqfinance_socialCode', social_code)
+        log.error(f'{social_code}=={gpdm}===季度财务数据访问失败')
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===季度财务数据访问失败')
@@ -250,36 +252,55 @@ def getQuarter(start_time, session, social_code, gpdm):
    return date_list
+# 信用代码放入redis中
+def FinanceFromNasdaq():
+    sql = "select xydm from mgzqyjwyh_list where state=2 and exchange='Nasdaq;"
+    cursor.execute(sql)
+    finance = cursor.fetchall()
+    finance_list = [item[0] for item in finance]
+    for item in finance_list:
+        r.rpush('FinanceFromNasdaq:nasdaqfinance_socialCode', item)
+    print('redis放入成功')
+def getInfomation(social_code):
+    sql = f"select * from mgzqyjwyh_list where state=2 and xydm='{social_code}';"
+    cursor.execute(sql)
+    data = cursor.fetchone()
+    return data
 def doJob():
-    # while True:
+    while True:
-    # social_code = baseCore.redicPullData('')
+        social_code = baseCore.redicPullData('FinanceFromNasdaq:nasdaqfinance_socialCode')
-    # datas_enterprise = baseCore.getInfomation(social_code)
+        if not social_code or social_code == None:
-    session.get(URL, headers=headers)
+            log.info('============已没有数据============等待===============')
-    # sql = "select * from mgzqyjwyh_list where state=2 and exchange='Nasdaq';"
+            time.sleep(600)
-    # cursor.execute(sql)
+            continue
-    # datas_enterprise = cursor.fetchall()
+        data_enterprise = getInfomation(social_code)
-    # for data_enterprise in datas_enterprise:
+        start_time = time.time()
-    start_time = time.time()
+        gpdm = data_enterprise[3]
-    #     gpdm = data_enterprise[3]
+        social_code = data_enterprise[6]
-    #     social_code = data_enterprise[6]
+        # print(gpdm,social_code)
-    social_code = 'ZD0CN0012309000172'
+        # 采集年度数据
-    gpdm = 'NTES'
+        date_list_year = getYear(start_time, social_code, gpdm)
-    # 采集年度数据
+        # 保存年度数据到redis
-    date_list_year = getYear(start_time, session, social_code, gpdm)
+        add_date(social_code, date_list_year)
-    # 保存年度数据到redis
+        # 采集季度数据
-    add_date(social_code, date_list_year)
+        date_list_quarter = getQuarter(start_time, social_code, gpdm)
-    # 采集季度数据
+        # 保存季度数据到redis
-    date_list_quarter = getQuarter(start_time, session, social_code, gpdm)
+        add_date(social_code, date_list_quarter)
-    # 保存季度数据到redis
+        timeCost = baseCore.getTimeCost(start_time, time.time())
-    add_date(social_code, date_list_quarter)
+        state = 1
-    timeCost = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, timeCost, '', '')
-    state = 1
+        log.info(f'{social_code}=={gpdm}==耗时{timeCost}')
-    baseCore.recordLog(social_code, taskType, state, timeCost, '', '')
+        time.sleep(2)
-    log.info(f'{social_code}=={gpdm}==耗时{timeCost}')
-    # break
-    cursor.close()
-    cnx.close()
 if __name__ == '__main__':
+    # 财务数据采集
    doJob()
+    # 企业股票代码放入redis
+    # FinanceFromNasdaq()
+    cursor.close()
+    cnx.close()
--- a/comData/nasdaq/nasdaq_news.py
+++ b/comData/nasdaq/nasdaq_news.py
+import datetime
+import json
+import time
+import requests
+from kafka import KafkaProducer
+from retry import retry
+from bs4 import BeautifulSoup
+from requests.packages import urllib3
+from base import BaseCore
+urllib3.disable_warnings()
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+    'Accept': 'application/json, text/plain, */*',
+}
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+r = baseCore.r
+taskType = '纳斯达克/企业动态'
+# 获取企业基本信息
+def getInfomation(social_code):
+    sql = f"select * from mgzqyjwyh_list where state=2 and xydm='{social_code}';"
+    cursor.execute(sql)
+    data = cursor.fetchone()
+    return data
+# 时间转换
+def conversionTime(time):
+    try:
+        date_obj = datetime.datetime.strptime(time, "%B %d, %Y")
+    except:
+        date_obj = datetime.datetime.strptime(time, "%b%d,%Y")
+    pub_time = date_obj.strftime("%Y-%m-%d")
+    return pub_time
+# 获取总页数
+@retry(tries=3, delay=1)
+def getTotal(gpdm):
+    url = f'https://api.nasdaq.com/api/news/topic/articlebysymbol?q={gpdm}|stocks&offset=0&limit=100&fallback=false'
+    req = requests.get(url, headers=headers, verify=False)
+    req.encoding = req.apparent_encoding
+    total = req.json()['data']['totalrecords']
+    req.close()
+    return total
+# 获取信息列表
+@retry(tries=3, delay=1)
+def getDataList(gpdm, offest, social_code):
+    data_list = []
+    url = f'https://api.nasdaq.com/api/news/topic/articlebysymbol?q={gpdm}|stocks&offset={offest}&limit=100&fallback=false'
+    # print(url)
+    req = requests.get(url, headers=headers, verify=False)
+    req.encoding = req.apparent_encoding
+    datas = req.json()['data']['rows']
+    if datas != []:
+        for data in datas:
+            title = data['title']
+            author = data['publisher']
+            url = data['url']
+            if 'http' not in url:
+                url = 'https://www.nasdaq.com' + url
+            data_list.append([url, title, author, social_code])
+    req.close()
+    return data_list
+@retry(tries=3, delay=1)
+def getsoup(url):
+    req = requests.get(url, headers=headers, verify=False)
+    # req = session.get(url)
+    req.encoding = req.apparent_encoding
+    soup = BeautifulSoup(req.text, 'lxml')
+    return soup
+# 页面A类型解析
+def getDicA(data, soup):
+    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    url = data[0]
+    pub_time = soup.find('p', class_='jupiter22-c-author-byline__timestamp').text.split('—')[0].lstrip().strip()
+    pub_time = conversionTime(pub_time)
+    contentWithTag = soup.find('div', class_='nsdq-l-grid__item syndicated-article-body')
+    try:
+        contentWithTag.find('div', class_='jupiter22-c-tags jupiter22-c-tags-default').decompose()
+    except:
+        pass
+    try:
+        contentWithTag.find('div', class_='taboola-placeholder').decompose()
+    except:
+        pass
+    try:
+        divs_del = contentWithTag.find_all('div', class_='ads__inline')
+        for div_del in divs_del:
+            div_del.decompose()
+    except:
+        pass
+    try:
+        divs_del = contentWithTag.find_all('script')
+        for div_del in divs_del:
+            div_del.decompose()
+    except:
+        pass
+    content = contentWithTag.text
+    dic_news = {
+        'attachmentIds': '',
+        'author': data[2],
+        'content': content,
+        'contentWithTag': str(contentWithTag),
+        'createDate': time_now,
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'en',
+        'origin': '纳斯达克',
+        'publishDate': pub_time,
+        'sid': '1684032033495392257',
+        'sourceAddress': url,  # 原文链接
+        'summary': '',
+        'title': data[1],
+        'type': 2,
+        'socialCreditCode': data[3],
+        'year': pub_time[:4]
+    }
+    return dic_news
+# 页面B类型解析
+def getDicB(data, soup):
+    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    url = data[0]
+    pub_time = soup.find('div', class_='timestamp').find('time').text
+    pub_time = pub_time.split(' ')[0] + pub_time.split(' ')[1] + pub_time.split(' ')[2]
+    pub_time = conversionTime(pub_time)
+    contentWithTag = soup.find('div', class_='body__content')
+    try:
+        divs_del = contentWithTag.find_all('div', class_='ads__inline')
+        for div_del in divs_del:
+            div_del.decompose()
+    except:
+        pass
+    try:
+        divs_del = contentWithTag.find_all('script')
+        for div_del in divs_del:
+            div_del.decompose()
+    except:
+        pass
+    content = contentWithTag.text
+    imgs = contentWithTag.find_all('img')
+    for img in imgs:
+        src = img.get('src')
+        src_ = 'https://www.nasdaq.com' + src
+        contentWithTag = str(contentWithTag).replace(src, src_)
+    dic_news = {
+        'attachmentIds': '',
+        'author': data[2],
+        'content': content,
+        'contentWithTag': str(contentWithTag),
+        'createDate': time_now,
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'en',
+        'origin': '纳斯达克',
+        'publishDate': pub_time,
+        'sid': '1684032033495392257',
+        'sourceAddress': url,  # 原文链接
+        'summary': '',
+        'title': data[1],
+        'type': 2,
+        'socialCreditCode': data[3],
+        'year': pub_time[:4]
+    }
+    return dic_news
+# 数据发送至Kafka
+@retry(tries=3, delay=1)
+def sendKafka(dic_news, start_time):
+    producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+    kafka_result = producer.send("researchReportTopic",
+                                 json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+    print(kafka_result.get(timeout=10))
+    dic_result = {
+        'success': 'ture',
+        'message': '操作成功',
+        'code': '200',
+    }
+    log.info(dic_result)
+    # 传输成功,写入日志中
+    state = 1
+    takeTime = baseCore.getTimeCost(start_time, time.time())
+    baseCore.recordLog(dic_news['socialCreditCode'], taskType, state, takeTime, dic_news['sourceAddress'], '')
+# 数据保存入库，用于判重
+@retry(tries=3, delay=1)
+def insertMysql(social_code, link):
+    insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
+    # 动态信息列表
+    list_info = [
+        social_code,
+        link,
+        '纳斯达克',
+        '2',
+    ]
+    cursor.execute(insert_sql, tuple(list_info))
+    cnx.commit()
+# 判断动态是否采集过
+@retry(tries=3, delay=1)
+def selectUrl(url, social_code):
+    sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' '''
+    cursor.execute(sel_sql, (url, social_code))
+    selects = cursor.fetchone()
+    return selects
+def doJob():
+    while True:
+        social_code = ''
+        data_enterprise = getInfomation(social_code)
+        gpdm = data_enterprise[3]
+        social_code = data_enterprise[6]
+        # gpdm = 'GOOGL'
+        # social_code = 'ZZSN22080900000013'
+        start_time = time.time()
+        try:
+            total = getTotal(gpdm)
+        except:
+            log.error(f'{social_code}==={gpdm}===获取总数失败')
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'{social_code}==={gpdm}===获取总数失败')
+            baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+            continue
+        for offest in range(0, total + 1, 100):
+            try:
+                data_list = getDataList(gpdm, offest, social_code)
+            except:
+                log.error(f'{social_code}==={gpdm}===获取信息列表失败({offest}~{offest + 100}条)')
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(social_code, taskType, state, takeTime, '',
+                                   f'{social_code}==={gpdm}===获取信息列表失败({offest}~{offest + 100}条)')
+                continue
+            # 只能获取前10000条数据
+            if data_list != []:
+                for data in data_list:
+                    start_time = time.time()
+                    url = data[0]
+                    selects = selectUrl(url, social_code)
+                    if selects:
+                        log.info(f'{url}===已采集过')
+                        # 全量使用
+                        continue
+                        # 增量使用
+                        # break
+                    try:
+                        soup = getsoup(url)
+                        try:
+                            try:
+                                dic_info = getDicA(data, soup)
+                            except:
+                                dic_info = getDicB(data, soup)
+                        except:
+                            log.error(f'{url}===正文解析失败')
+                            state = 0
+                            takeTime = baseCore.getTimeCost(start_time, time.time())
+                            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'{url}===正文解析失败')
+                            continue
+                        try:
+                            sendKafka(dic_info, start_time)
+                            try:
+                                insertMysql(social_code, url)
+                            except:
+                                log.error(f'{url}===数据入库失败')
+                                state = 0
+                                takeTime = baseCore.getTimeCost(start_time, time.time())
+                                baseCore.recordLog(social_code, taskType, state, takeTime, '', f'{url}===数据入库失败')
+                        except Exception as e:
+                            print(e)
+                            log.error(f'{url}===发送kafka失败')
+                            state = 0
+                            takeTime = baseCore.getTimeCost(start_time, time.time())
+                            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'{url}===发送kafka失败')
+                        time.sleep(1)
+                    except:
+                        log.error(f'{url}===页面访问失败')
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start_time, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', f'{url}===页面访问失败')
+                    break
+            else:
+                break
+            break
+if __name__ == "__main__":
+    doJob()
+    baseCore.close()
--- a/comData/newlist/champion/BaseCore.py
+++ b/comData/newlist/champion/BaseCore.py
+# 核心工具包
+import os
+import random
+import socket
+import sys
+import time
+import fitz
+import logbook
+import logbook.more
+import pandas as pd
+import requests
+import zhconv
+import pymysql
+import redis
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from openpyxl import Workbook
+import langid
+#创建连接池
+import pymysql
+from pymysql import connections
+from DBUtils.PooledDB import PooledDB
+# import sys
+# sys.path.append('D://zzsn_spider//base//fdfs_client')
+from fdfs_client.client import get_tracker_conf, Fdfs_client
+tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\base\\client.conf')
+client = Fdfs_client(tracker_conf)
+# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
+class BaseCore:
+    # 序列号
+    __seq = 0
+    # 代理池 数据库连接
+    # __cnx_proxy =None
+    # __cursor_proxy = None
+    cnx = None
+    cursor = None
+    cnx_ = None
+    cursor_ = None
+    r = None
+    # agent 池
+    __USER_AGENT_LIST = [
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Lunascape 5.0 alpha2)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.7 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon;',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.169.0 Safari/530.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040614 Firefox/0.9',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.500.0 Safari/534.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; TencentTraveler)',
+        'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.370.0 Safari/533.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.31 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.42 Safari/525.19',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.3 (KHTML, like Gecko) Chrome/4.0.227.0 Safari/532.3',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.463.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5',
+        'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.4 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.9 (KHTML, like Gecko) Chrome/7.0.531.0 Safari/534.9',
+        'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
+        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1',
+        'ozilla/5.0 (Windows; U; Windows NT 5.0; de-DE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
+        'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.127 Safari/533.4',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; zh-cn) Opera 8.50',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/7.0.0 Safari/700.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/0.9.2 StumbleUpon/1.994',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) Gecko/20041110 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.168.0 Safari/530.1',
+        'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.8 (KHTML, like Gecko) Chrome/2.0.177.1 Safari/530.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; rv:1.7.3) Gecko/20041001 Firefox/0.10.1',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.548.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10 ChromePlus/1.5.2.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.2 Safari/533.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.4 Safari/532.1',
+        'Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; sv-SE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.7.5) Gecko/20041122 Firefox/1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; uZardWeb/1.0; Server_JP)',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HCI0449; .NET CLR 1.0.3705)',
+        'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt); Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1);',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.23 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0a1) Gecko/20110623 Firefox/7.0a1 Fennec/7.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; WOW64; SV1; uZardWeb/1.0; Server_HK)',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)',
+        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
+        'Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.2 (KHTML, like Gecko) Chrome/3.0.191.3 Safari/531.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.38 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8b) Gecko/20050118 Firefox/1.0+',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040707 Firefox/0.9.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.171.0 Safari/530.4',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; nl-NL; rv:1.7.5) Gecko/20041202 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/1.0.156.0 Safari/528.8',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.43 Safari/534.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13',
+        'Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.6 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.8 (KHTML, like Gecko) Chrome/7.0.521.0 Safari/534.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1',
+        'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
+    ]
+    #Android agent池
+    __USER_PHONE_AGENT_LIST = ['Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36']
+    def __init__(self):
+        # self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='clb_project',
+        #                                    charset='utf8mb4')
+        # self.__cursor_proxy = self.__cnx_proxy.cursor()
+        self.cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                                   charset='utf8mb4')
+        self.cursor = self.cnx.cursor()
+        #11数据库
+        self.cnx_ = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='clb_project',
+                               charset='utf8mb4')
+        self.cursor_ = self.cnx_.cursor()
+        # 连接到Redis
+        self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
+        self.pool_caiji = PooledDB(
+            creator=pymysql,
+            maxconnections=5,
+            mincached=2,
+            maxcached=5,
+            blocking=True,
+            host='114.115.159.144',
+            port=3306,
+            user='caiji',
+            password='zzsn9988',
+            database='caiji',
+            charset='utf8mb4'
+        )
+    def close(self):
+        try:
+            self.cursor.close()
+            self.cnx.close()
+        except :
+            pass
+    # 计算耗时
+    def getTimeCost(self,start, end):
+        seconds = int(end - start)
+        m, s = divmod(seconds, 60)
+        h, m = divmod(m, 60)
+        if (h > 0):
+            return "%d小时%d分钟%d秒" % (h, m, s)
+        elif (m > 0):
+            return "%d分钟%d秒" % (m, s)
+        elif (seconds > 0):
+            return "%d秒" % (s)
+        else:
+            ms = int((end - start) * 1000)
+            return "%d毫秒" % (ms)
+    # 当前时间格式化
+    # 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
+    # 2 : 010101120000 %y%m%d%H%M%S
+    # 时间戳 3:1690179526555  精确到秒
+    def getNowTime(self, type):
+        now_time = ""
+        if type == 1:
+            now_time = time.strftime("%Y-%m-%d %H:%M:%S")
+        if type == 2:
+            now_time = time.strftime("%y%m%d%H%M%S")
+        if type == 3:
+            now_time = int(time.time() * 1000)
+        return now_time
+    # 获取流水号
+    def getNextSeq(self):
+        self.__seq += 1
+        if self.__seq > 1000:
+            self.__seq = 0
+        return self.getNowTime(2) + str(self.__seq).zfill(3)
+    # 获取信用代码
+    def getNextXydm(self):
+        self.__seq += 1
+        if self.__seq > 1000:
+            self.__seq = 0
+        return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
+    # 日志格式
+    def logFormate(self,record, handler):
+        formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
+            date=record.time,  # 日志时间
+            level=record.level_name,  # 日志等级
+            filename=os.path.split(record.filename)[-1],  # 文件名
+            func_name=record.func_name,  # 函数名
+            lineno=record.lineno,  # 行号
+            msg=record.message  # 日志内容
+        )
+        return formate
+    # 获取logger
+    def getLogger(self,fileLogFlag=True, stdOutFlag=True):
+        dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
+        dirname = os.path.join(dirname, "logs")
+        filename = filename.replace(".py", "") + ".log"
+        if not os.path.exists(dirname):
+            os.mkdir(dirname)
+        logbook.set_datetime_format('local')
+        logger = logbook.Logger(filename)
+        logger.handlers = []
+        if fileLogFlag:  # 日志输出到文件
+            logFile = logbook.TimedRotatingFileHandler(os.path.join(dirname, filename), date_format='%Y-%m-%d',
+                                                       bubble=True, encoding='utf-8')
+            logFile.formatter = self.logFormate
+            logger.handlers.append(logFile)
+        if stdOutFlag:  # 日志打印到屏幕
+            logStd = logbook.more.ColorizedStderrHandler(bubble=True)
+            logStd.formatter = self.logFormate
+            logger.handlers.append(logStd)
+        return logger
+    # 获取随机的userAgent
+    def getRandomUserAgent(self):
+        return random.choice(self.__USER_AGENT_LIST)
+    # 获取代理
+    def get_proxy(self):
+        sql = "select proxy from clb_proxy"
+        self.cursor.execute(sql)
+        proxy_lists = self.cursor.fetchall()
+        ip_list = []
+        for proxy_ in proxy_lists:
+            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
+        proxy_list = []
+        for str_ip in ip_list:
+            str_ip_list = str_ip.split('-')
+            proxyMeta = "http://%(host)s:%(port)s" % {
+                "host": str_ip_list[0],
+                "port": str_ip_list[1],
+            }
+            proxy = {
+                "HTTP": proxyMeta,
+                "HTTPS": proxyMeta
+            }
+            proxy_list.append(proxy)
+        return proxy_list[random.randint(0, 3)]
+    #字符串截取
+    def getSubStr(self,str,beginStr,endStr):
+        if beginStr=='':
+            pass
+        else:
+            begin=str.rfind(beginStr)
+            if begin==-1:
+                begin=0
+            str=str[begin:]
+        if endStr=='':
+            pass
+        else:
+            end=str.rfind(endStr)
+            if end==-1:
+                pass
+            else:
+                str = str[0:end+1]
+        return str
+    # 繁体字转简体字
+    def hant_2_hans(self,hant_str: str):
+        '''
+        Function: 将 hant_str 由繁体转化为简体
+        '''
+        return zhconv.convert(hant_str, 'zh-hans')
+    # 判断字符串里是否含数字
+    def str_have_num(self,str_num):
+        panduan = False
+        for str_1 in str_num:
+            ppp = str_1.isdigit()
+            if ppp:
+                panduan = ppp
+        return panduan
+    # # 从Redis的List中获取并移除一个元素
+    # def redicPullData(self,type,key):
+    # #1 表示国内 2 表示国外
+    #     if type == 1:
+    #         gn_item = self.r.lpop(key)
+    #         return gn_item.decode() if gn_item else None
+    #     if type == 2:
+    #         gw_item = self.r.lpop(key)
+    #         return gw_item.decode() if gw_item else None
+    # 从Redis的List中获取并移除一个元素
+    def redicPullData(self,key):
+        item = self.r.lpop(key)
+        return item.decode() if item else None
+    # 获得脚本进程PID
+    def getPID(self):
+        PID = os.getpid()
+        return PID
+    # 获取本机IP
+    def getIP(self):
+        IP = socket.gethostbyname(socket.gethostname())
+        return IP
+    def mkPath(self,path):
+        folder = os.path.exists(path)
+        if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
+            os.makedirs(path)  # makedirs 创建文件时如果路径不存在会创建这个路径
+        else:
+            pass
+    # 生成google模拟浏览器  必须传入值为googledriver位置信息
+    # headless用于决定是否为无头浏览器,初始默认为无头浏览器
+    # 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
+    # 无头浏览器用于后续对信息采集时不会有浏览器一直弹出，
+    def buildDriver(self, path, headless=True):
+        service = Service(path)
+        chrome_options = webdriver.ChromeOptions()
+        if headless:
+            chrome_options.add_argument('--headless')
+            chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_experimental_option(
+            "excludeSwitches", ["enable-automation"])
+        chrome_options.add_experimental_option('useAutomationExtension', False)
+        chrome_options.add_argument('lang=zh-CN,zh,zh-TW,en-US,en')
+        chrome_options.add_argument('user-agent=' + self.getRandomUserAgent())
+        # 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
+        driver = webdriver.Chrome(options=chrome_options, service=service)
+        # with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
+        #     js = f.read()
+        #
+        # driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+        #     "source": js
+        # })
+        return driver
+    # 根据社会信用代码获取企业信息
+    def getInfomation(self, com_name):
+        data = []
+        try:
+            sql = f"SELECT * FROM champion WHERE CompanyName = '{com_name}'"
+            # self.cursor.execute(sql)
+            # data = self.cursor.fetchone()
+            conn = self.pool_caiji.connection()
+            cursor = conn.cursor()
+            cursor.execute(sql)
+            data = cursor.fetchone()
+            conn.commit()
+            data = list(data)
+            cursor.close()
+            conn.close()
+        except:
+            log = self.getLogger()
+            log.info('=========数据库操作失败========')
+        return data
+    # 更新企业采集次数
+    def updateRun(self, social_code, runType, count):
+        try:
+            sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
+            # self.cursor.execute(sql_update)
+            # self.cnx.commit()
+            conn = self.pool_caiji.connection()
+            cursor = conn.cursor()
+            cursor.execute(sql_update)
+            conn.commit()
+            cursor.close()
+            conn.close()
+        except:
+            log = self.getLogger()
+            log.info('======更新数据库失败======')
+    # 保存日志入库
+    def recordLog(self, xydm, taskType, state, takeTime, url, e):
+        try:
+            createTime = self.getNowTime(1)
+            ip = self.getIP()
+            pid = self.getPID()
+            sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+            values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
+            # try:
+            #     self.cursor.execute(sql, values)
+            # except Exception as e:
+            #     print(e)
+            # self.cnx.commit()
+            cnn = self.pool_caiji.connection()
+            cursor = cnn.cursor()
+            cursor.execute(sql,values)
+            cnn.commit()
+            cursor.close()
+            cnn.close()
+        except:
+            log = self.getLogger()
+            log.info('======保存日志失败=====')
+    #获取企查查token
+    def GetToken(self):
+        #获取企查查token
+        query = "select token from QCC_token "
+        # token = '67ec7402166df1da84ae83c4b95cefc0'  # 需要隔两个小时左右抓包修改
+        self.cursor.execute(query)
+        token_list = self.cursor.fetchall()
+        self.cnx.commit()
+        token = token_list[random.randint(0, len(token_list)-1)][0]
+        return token
+    # 删除失效的token
+    def delete_token(self,token):
+        deletesql = f"delete from QCC_token where token='{token}' "
+        self.cursor.execute(deletesql)
+        self.cnx.commit()
+    #获取天眼查token
+    def GetTYCToken(self):
+        query = 'select token from TYC_token'
+        self.cursor.execute(query)
+        token = self.cursor.fetchone()[0]
+        self.cnx.commit()
+        return token
+    #检测语言
+    def detect_language(self, text):
+        # 使用langid.py判断文本的语言
+        result = langid.classify(text)
+        if result == '':
+            return 'cn'
+        if result[0] == '':
+            return 'cn'
+        return result[0]
+    #追加接入excel
+    def writerToExcel(self,detailList,filename):
+        # filename='baidu搜索.xlsx'
+        # 读取已存在的xlsx文件
+        existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
+        # 创建新的数据
+        new_data = pd.DataFrame(data=detailList)
+        # 将新数据添加到现有数据的末尾
+        combined_data = existing_data.append(new_data, ignore_index=True)
+        # 将结果写入到xlsx文件
+        combined_data.to_excel(filename, index=False)
+        # return combined_data
+    #对失败或者断掉的企业 重新放入redis
+    def rePutIntoR(self,key,item):
+        self.r.rpush(key, item)
+    #增加计数器的值并返回增加后的值
+    def incrSet(self,key):
+        # 增加计数器的值并返回增加后的值
+        new_value = self.r.incr(key)
+        print("增加后的值：", new_value)
+        return new_value
+    #获取key剩余的过期时间
+    def getttl(self,key):
+        # 获取key的剩余过期时间
+        ttl = self.r.ttl(key)
+        print("剩余过期时间：", ttl)
+        # 判断key是否已过期
+        if ttl < 0:
+            # key已过期，将key的值重置为0
+            self.r.set(key, 0)
+            self.r.expire(key, 3600)
+            time.sleep(2)
+    #上传至文件服务器,并解析pdf的内容和页数
+    def upLoadToServe(self,pdf_url,type_id,social_code):
+        headers = {}
+        retData = {'state':False,'type_id':type_id,'item_id':social_code,'group_name':'group1','path':'','full_path':'',
+                   'category':'pdf','file_size':'','status':1,'create_by':'XueLingKun',
+                   'create_time':'','page_size':'','content':''}
+        headers['User-Agent'] = self.getRandomUserAgent()
+        for i in range(0, 3):
+            try:
+                resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content
+                break
+            except:
+                time.sleep(3)
+                continue
+        page_size = 0
+        for i in range(0, 3):
+            try:
+                result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
+                with fitz.open(stream=resp_content, filetype='pdf') as doc:
+                    page_size = doc.page_count
+                    for page in doc.pages():
+                        retData['content'] += page.get_text()
+                break
+            except:
+                time.sleep(3)
+                continue
+        if page_size < 1:
+            # pdf解析失败
+            print(f'======pdf解析失败=====')
+            return retData
+        else:
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            retData['state'] = True
+            retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '')
+            retData['full_path'] = bytes.decode(result['Remote file_id'])
+            retData['file_size'] = result['Uploaded size']
+            retData['create_time'] = time_now
+            retData['page_size'] = page_size
+            return retData
+    def secrchATT(self,item_id,year,type_id):
+        sel_sql = '''select id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
+        self.cursor_.execute(sel_sql, (item_id, year, type_id))
+        selects = self.cursor_.fetchone()
+        return selects
+    #插入到att表 返回附件id
+    def tableUpdate(self,retData,com_name,year,pdf_name,num):
+            item_id = retData['item_id']
+            type_id = retData['type_id']
+            group_name = retData['group_name']
+            path = retData['path']
+            full_path = retData['full_path']
+            category = retData['category']
+            file_size = retData['file_size']
+            status = retData['status']
+            create_by = retData['create_by']
+            page_size = retData['page_size']
+            create_time = retData['create_time']
+            order_by = num
+            selects = self.secrchATT(item_id,year,type_id)
+            # sel_sql = '''select id,item_id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
+            # self.cursor.execute(sel_sql, (item_id, year,type_id))
+            # selects = self.cursor.fetchone()
+            if selects:
+                self.getLogger().info(f'com_name:{com_name}已存在')
+                id = selects[0]
+                return id
+            else:
+                Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+                values = (
+                    year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
+                    status, create_by,
+                    create_time, page_size)
+                self.cursor_.execute(Upsql, values)  # 插入
+                self.cnx_.commit()  # 提交
+                self.getLogger().info("更新完成:{}".format(Upsql))
+                selects = self.secrchATT(item_id,year,type_id)
+                id = selects[0]
+                return id
+    # 更新企业的CIK
+    def updateCIK(self,social_code,cik):
+        try:
+            sql = f"UPDATE EnterpriseInfo SET CIK = '{cik}' WHERE SocialCode = '{social_code}'"
+            cnn = self.pool_caiji.connection()
+            cursor = cnn.cursor()
+            cursor.execute(sql)
+            cnn.commit()
+            cursor.close()
+            cnn.close()
+        except:
+            log = self.getLogger()
+            log.info('======保存企业CIK失败=====')
--- a/comData/newlist/champion/baseinfo_champion.py
+++ b/comData/newlist/champion/baseinfo_champion.py
+# -*- coding: utf-8 -*-
+import pandas as pd
+import time
+import requests
+import json
+from kafka import KafkaProducer
+from BaseCore import BaseCore
+from getQccId import find_id_by_name
+baseCore = BaseCore()
+cnx_ = baseCore.cnx
+cursor_ = baseCore.cursor
+log = baseCore.getLogger()
+# 通过企查查id获取企业基本信息
+def info_by_id(com_id,com_name):
+    aa_dict_list = []
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+    com_jc_name = ''
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        log.info(com_name + ":获取失败===========重新放入redis")
+        baseCore.rePutIntoR('champion:baseinfo',com_name)
+        return aa_dict_list
+    company_name = result_dict['Name']
+    CreditCode = result_dict['CreditCode']
+    if CreditCode is None:
+        CreditCode = ''
+    try:
+        OperName = result_dict['Oper']['Name']
+    except:
+        OperName = ''
+    if OperName is None:
+        OperName = ''
+    if baseCore.str_have_num(OperName):
+        OperName = ''
+    try:
+        Status = result_dict['ShortStatus']
+    except:
+        Status = ''
+    if Status is None:
+        Status = ''
+    try:
+        StartDate = result_dict['StartDate']
+    except:
+        StartDate = ''
+    if StartDate is None:
+        StartDate = ''
+    try:
+        RegistCapi = result_dict['RegistCapi']
+    except:
+        RegistCapi = ''
+    if RegistCapi is None:
+        RegistCapi = ''
+    RecCap = ''  # result_dict['RecCap']  #实际缴纳金额，现已没有显示
+    if RecCap is None:
+        RecCap = ''
+    try:
+        OrgNo = result_dict['CreditCode'][8:-2] + '-' + result_dict['CreditCode'][-2]  # 组织机构代码，现已没有显示
+    except:
+        OrgNo = ''
+    if OrgNo is None:
+        OrgNo = ''
+    try:
+        TaxNo = result_dict['TaxNo']
+    except:
+        TaxNo = ''
+    if TaxNo is None:
+        TaxNo = ''
+    try:
+        EconKind = result_dict['EconKind']
+    except:
+        EconKind = ''
+    if EconKind is None:
+        EconKind = ''
+    TermStart = ''  # result_dict['TermStart']  营业期限自，现已没有显示
+    if TermStart is None:
+        TermStart = ''
+    TeamEnd = ''  # result_dict['TeamEnd']营业期限至，现已没有显示
+    if TeamEnd is None:
+        TeamEnd = ''
+    try:
+        SubIndustry = result_dict['Industry']['SubIndustry']
+    except:
+        SubIndustry = ''
+    if SubIndustry is None:
+        SubIndustry = ''
+    try:
+        Province = result_dict['Area']['Province']
+    except:
+        Province = ''
+    try:
+        City = result_dict['Area']['City']
+    except:
+        City = ''
+    try:
+        County = result_dict['Area']['County']
+    except:
+        County = ''
+    try:
+        region = Province + City + County
+    except:
+        region = ''
+    BelongOrg = ''  # result_dict['BelongOrg']登记机关，现已没有显示
+    can_bao = ''
+    CommonList = []  # result_dict['CommonList']参保人数，现已没有显示
+    for Common_dict in CommonList:
+        try:
+            KeyDesc = Common_dict['KeyDesc']
+        except:
+            continue
+        if KeyDesc == '参保人数':
+            can_bao = Common_dict['Value']
+    if can_bao == '0':
+        can_bao = ''
+    OriginalName = ''
+    try:
+        OriginalName_lists = result_dict['OriginalName']
+        for OriginalName_dict in OriginalName_lists:
+            OriginalName += OriginalName_dict['Name'] + ' '
+    except:
+        OriginalName = ''
+    try:
+        OriginalName.strip()
+    except:
+        OriginalName = ''
+    EnglishName = ''  # result_dict['EnglishName']企业英文名，现已没有显示
+    if EnglishName is None:
+        EnglishName = ''
+    IxCode = ''  # result_dict['IxCode']进出口企业代码，现已没有显示
+    if IxCode is None:
+        IxCode = ''
+    Address = result_dict['Address']
+    if Address is None:
+        Address = ''
+    Scope = ''  # result_dict['Scope']经营范围，现已没有显示
+    if Scope is None:
+        Scope = ''
+    try:
+        PhoneNumber = result_dict['companyExtendInfo']['Tel']
+    except:
+        PhoneNumber = ''
+    if PhoneNumber is None:
+        PhoneNumber = ''
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    try:
+        Email = result_dict['companyExtendInfo']['Email']
+    except:
+        Email = ''
+    if Email is None:
+        Email = ''
+    try:
+        Desc = result_dict['companyExtendInfo']['Desc']
+    except:
+        Desc = ''
+    if Desc is None:
+        Desc = ''
+    try:
+        Info = result_dict['companyExtendInfo']['Info']
+    except:
+        Info = ''
+    if Info is None:
+        Info = ''
+    company_name = baseCore.hant_2_hans(company_name)
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v6/base/getEntDetail?token={}&t={}&unique={}".format(token, t,
+                                                                                                         com_id)
+    resp_dict2 = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(1)
+    try:
+        com2 = resp_dict2['result']['Company']
+    except:
+        com2 = ''
+    try:
+        Scope = com2['Scope']
+    except:
+        Scope = ''
+    try:
+        CheckDate = com2['CheckDate']
+    except:
+        CheckDate = ''
+    if CheckDate is None:
+        CheckDate = ''
+    try:
+        TaxpayerType = com2['TaxpayerType']     #纳税人资质
+    except:
+        TaxpayerType = ''
+    if TaxpayerType is None:
+        TaxpayerType = ''
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    if No is None:
+        No = ''
+    try:
+        IxCode = com2['IxCode']
+    except:
+        IxCode = ''
+    try:
+        OrgNo = com2['OrgNo']
+    except:
+        OrgNo = ''
+    try:
+        for Common_t in com2['CommonList']:
+            try:
+                if Common_t['KeyDesc'] == '参保人数':
+                    can_bao = Common_t['Value']
+            except:
+                pass
+    except:
+        can_bao = ''
+    try:
+        TermStart = com2['TermStart']
+    except:
+        TermStart = ''
+    try:
+        TeamEnd = com2['TeamEnd']
+    except:
+        TeamEnd = ''
+    try:
+        RecCap = com2['RecCap']
+    except:
+        RecCap = ''
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    try:
+        SubIndustry = com2['IndustryArray'][-1]
+    except:
+        SubIndustry = ''
+    try:
+        BelongOrg = com2['BelongOrg']
+    except:
+        BelongOrg = ''
+    try:
+        EnglishName = com2['EnglishName']
+    except:
+        EnglishName = ''
+    aa_dict = {
+        'qccId': com_id,  # 企查查企业id
+        'name': company_name,  # 企业名称
+        'shortName': com_jc_name,  # 企业简称
+        'socialCreditCode': CreditCode,  # 统一社会信用代码
+        'legalPerson': OperName,  # 法定代表人
+        'officialPhone': PhoneNumber,  # 电话
+        'officialUrl': WebSite,  # 官网
+        'officialEmail': Email,  # 邮箱
+        'briefInfo': Desc,  # 简介
+        'registerStatus': Status,  # 登记状态
+        'incorporationDate': StartDate,  # 成立日期
+        'capital': RegistCapi,  # 注册资本
+        'paidCapital': RecCap,  # 实缴资本
+        'approvalDate': CheckDate,  # 核准日期
+        'organizationCode': OrgNo,  # 组织机构代码
+        'registerNo': No,  # 工商注册号
+        'taxpayerNo': CreditCode,  # 纳税人识别号
+        'type': EconKind,  # 企业类型
+        'businessStartDate': TermStart,  # 营业期限自
+        'businessEndDate': TeamEnd,  # 营业期限至
+        'taxpayerQualification': TaxpayerType,  # 纳税人资质
+        'industry': SubIndustry,  # 所属行业
+        'region': region,
+        'province': Province,  # 所属省
+        'city': City,  # 所属市
+        'county': County,  # 所属县
+        'registerDepartment': BelongOrg,  # 登记机关
+        'scale': Info,  # 人员规模
+        'insured': can_bao,  # 参保人数
+        'beforeName': OriginalName,  # 曾用名
+        'englishName': EnglishName,  # 英文名
+        'importExportEnterpriseCode': IxCode,  # 进出口企业代码
+        'address': Address,  # 地址
+        'businessRange': Scope,  # 经营范围
+        'status': 0,  # 状态
+    }
+    aa_dict_list.append(aa_dict)
+    log.info(company_name + "：爬取完成")
+    return aa_dict_list
+if __name__ == '__main__':
+    taskType = '基本信息/企查查/单项冠军'
+    headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-Timestamp': '',
+        'Qcc-Version': '1.0.0',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+        'content-type': 'application/json',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br,'
+    }
+    list_weicha = []
+    name_list = []
+    #从redis里拿数据
+    while True:
+        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
+        token = baseCore.GetToken()
+        if token:
+            pass
+        else:
+            log.info('==========已无token==========')
+            time.sleep(30)
+            continue
+        # list_all_info = []
+        start_time = time.time()
+        # 获取企业信息
+        com_name = baseCore.redicPullData('champion:baseinfo')
+        # com_name = '卓新市万达铸业有限公司'
+        if com_name == '':
+            time.sleep(20)
+            continue
+        dic_info = baseCore.getInfomation(com_name)
+        log.info(f'----当前企业{com_name}--开始处理---')
+        social_code = dic_info[5]
+        #企查查id
+        company_id = dic_info[6]
+        #如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
+        if company_id == None:
+            if social_code:
+                company_id = find_id_by_name(start_time,token,social_code)
+            else:
+                company_id = find_id_by_name(start_time,token,com_name)
+            if company_id == 'null':
+                log.info('=====搜索不到该企业====')
+                #todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
+                baseCore.rePutIntoR('champion:baseinfo', com_name + '：搜索不到')
+                continue
+            if not company_id:
+                log.info(com_name + "：企业ID获取失败===重新放入redis")
+                list_weicha.append(com_name + "：企业ID获取失败")
+                baseCore.rePutIntoR('champion:baseinfo',com_name)
+                baseCore.delete_token(token)
+                log.info('=====已重新放入redis,失效token已删除======')
+                time.sleep(20)
+                continue
+            else:
+                log.info(f'====={com_name}===={company_id}=====获取企业id成功=====')
+                # todo:写入数据库
+                updateqccid = f"update champion set qccid = '{company_id}' where CompanyName = '{com_name}'"
+                cursor_.execute(updateqccid)
+                cnx_.commit()
+        try:
+            post_data_list = info_by_id(company_id, com_name)
+        except:
+            log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
+            baseCore.rePutIntoR('champion:baseInfo', com_name)
+            baseCore.delete_token(token)
+            log.info('=====已重新放入redis,失效token已删除======')
+            continue
+        if post_data_list:
+            pass
+        else:
+            # log.info(f'======{social_code}====企查查token失效====')
+            time.sleep(20)
+            continue
+        for post_data in post_data_list:
+            # list_all_info.append(post_data)
+            if post_data is None:
+                print(com_name + "：企业信息获取失败")
+                list_weicha.append(com_name + "：企业信息获取失败")
+                continue
+            get_name = post_data['name']
+            get_socialcode = post_data['socialCreditCode']
+            #todo:将信用代码更新到表中
+            updatesocialcode = f"update champion set SocialCode = '{get_socialcode}' where CompanyName = '{com_name}'"
+            cursor_.execute(updatesocialcode)
+            cnx_.commit()
+            name_compile = {
+                'yuan_name':com_name,
+                'get_name':get_name
+            }
+            name_list.append(name_compile)
+            log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
+            try:
+                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
+                kafka_result = producer.send("regionInfo", json.dumps(post_data, ensure_ascii=False).encode('utf8'))
+                print(kafka_result.get(timeout=10))
+            except:
+                exception = 'kafka传输失败'
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
+                log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
+        # break
+    nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
+    companyName = pd.DataFrame(name_list)
+    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
+    false_com = pd.DataFrame(list_weicha)
+    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
--- a/comData/newlist/champion/getQccId.py
+++ b/comData/newlist/champion/getQccId.py
+# -*- coding: utf-8 -*-
+import time
+from urllib.parse import quote
+import requests
+import urllib3
+from BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+# headers = {
+#         'Host': 'xcx.qcc.com',
+#         'Connection': 'keep-alive',
+#         'Qcc-Platform': 'mp-weixin',
+#         'Qcc-Timestamp': '',
+#         'Qcc-Version': '1.0.0',
+#         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+#         'content-type': 'application/json',
+#         'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+#         'Accept-Encoding': 'gzip, deflate, br,'
+#     }
+headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'x-request-device-type': 'Android',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391',
+        'Content-Type': 'application/json',
+        'Qcc-Version': '1.0.0',
+        'authMini': 'Bearer f51dae1a2fcb109fa9ec58bd4a85e5c5',
+        'xweb_xhr': '1',
+        'xcx-version': '2023.09.27',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-CurrentPage': '/company-subpackages/business/index',
+        'Qcc-Timestamp': '1696661787803',
+        'Qcc-RefPage': '/company-subpackages/detail/index',
+        'Accept': '*/*',
+        'Sec-Fetch-Site': 'cross-site',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Dest': 'empty',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/307/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh'
+}
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(start,token,name):
+    urllib3.disable_warnings()
+    qcc_key = name
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except Exception as e:
+            print(f'{e}-------------重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    #{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频，请升级小程序版本'}
+    if resp_dict['status']==40101:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    if resp_dict['status']==401:
+        KeyNo = False
+        log.info(f'=======您的账号访问超频，请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    try:
+        if resp_dict['result']['Result']:
+            result_dict = resp_dict['result']['Result'][0]
+            KeyNo = result_dict['KeyNo']
+            Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+            if Name == '':
+                KeyNo = 'null'
+        else:
+            KeyNo = 'null'
+    except:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
+        return KeyNo
+    log.info("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
\ No newline at end of file
--- a/comData/newlist/technological/BaseCore.py
+++ b/comData/newlist/technological/BaseCore.py
+# 核心工具包
+import os
+import random
+import socket
+import sys
+import time
+import fitz
+import logbook
+import logbook.more
+import pandas as pd
+import requests
+import zhconv
+import pymysql
+import redis
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from openpyxl import Workbook
+import langid
+#创建连接池
+import pymysql
+from pymysql import connections
+from DBUtils.PooledDB import PooledDB
+# import sys
+# sys.path.append('D://zzsn_spider//base//fdfs_client')
+from fdfs_client.client import get_tracker_conf, Fdfs_client
+tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\base\\client.conf')
+client = Fdfs_client(tracker_conf)
+# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
+class BaseCore:
+    # 序列号
+    __seq = 0
+    # 代理池 数据库连接
+    # __cnx_proxy =None
+    # __cursor_proxy = None
+    cnx = None
+    cursor = None
+    cnx_ = None
+    cursor_ = None
+    r = None
+    # agent 池
+    __USER_AGENT_LIST = [
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Lunascape 5.0 alpha2)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.7 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon;',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.169.0 Safari/530.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040614 Firefox/0.9',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.500.0 Safari/534.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; TencentTraveler)',
+        'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.370.0 Safari/533.4',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.31 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.42 Safari/525.19',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.3 (KHTML, like Gecko) Chrome/4.0.227.0 Safari/532.3',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.463.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5',
+        'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.4 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.9 (KHTML, like Gecko) Chrome/7.0.531.0 Safari/534.9',
+        'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
+        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1',
+        'ozilla/5.0 (Windows; U; Windows NT 5.0; de-DE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
+        'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.127 Safari/533.4',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; zh-cn) Opera 8.50',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/7.0.0 Safari/700.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/0.9.2 StumbleUpon/1.994',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) Gecko/20041110 Firefox/1.0',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.168.0 Safari/530.1',
+        'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.8 (KHTML, like Gecko) Chrome/2.0.177.1 Safari/530.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2',
+        'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.0; rv:1.7.3) Gecko/20041001 Firefox/0.10.1',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2',
+        'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 5.1; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.548.0 Safari/534.10',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10 ChromePlus/1.5.2.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.2 Safari/533.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.4 Safari/532.1',
+        'Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; sv-SE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.7.5) Gecko/20041122 Firefox/1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; uZardWeb/1.0; Server_JP)',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HCI0449; .NET CLR 1.0.3705)',
+        'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt); Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1);',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.23 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0a1) Gecko/20110623 Firefox/7.0a1 Fennec/7.0a1',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; WOW64; SV1; uZardWeb/1.0; Server_HK)',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)',
+        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
+        'Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.2 (KHTML, like Gecko) Chrome/3.0.191.3 Safari/531.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.38 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8b) Gecko/20050118 Firefox/1.0+',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040707 Firefox/0.9.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.171.0 Safari/530.4',
+        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; nl-NL; rv:1.7.5) Gecko/20041202 Firefox/1.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/1.0.156.0 Safari/528.8',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)',
+        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.43 Safari/534.7',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13',
+        'Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13',
+        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.6 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.8 (KHTML, like Gecko) Chrome/7.0.521.0 Safari/534.8',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1',
+        'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
+    ]
+    #Android agent池
+    __USER_PHONE_AGENT_LIST = ['Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36']
+    def __init__(self):
+        # self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='clb_project',
+        #                                    charset='utf8mb4')
+        # self.__cursor_proxy = self.__cnx_proxy.cursor()
+        self.cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                                   charset='utf8mb4')
+        self.cursor = self.cnx.cursor()
+        #11数据库
+        self.cnx_ = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='clb_project',
+                               charset='utf8mb4')
+        self.cursor_ = self.cnx_.cursor()
+        # 连接到Redis
+        self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
+        self.pool_caiji = PooledDB(
+            creator=pymysql,
+            maxconnections=5,
+            mincached=2,
+            maxcached=5,
+            blocking=True,
+            host='114.115.159.144',
+            port=3306,
+            user='caiji',
+            password='zzsn9988',
+            database='caiji',
+            charset='utf8mb4'
+        )
+    def close(self):
+        try:
+            self.cursor.close()
+            self.cnx.close()
+        except :
+            pass
+    # 计算耗时
+    def getTimeCost(self,start, end):
+        seconds = int(end - start)
+        m, s = divmod(seconds, 60)
+        h, m = divmod(m, 60)
+        if (h > 0):
+            return "%d小时%d分钟%d秒" % (h, m, s)
+        elif (m > 0):
+            return "%d分钟%d秒" % (m, s)
+        elif (seconds > 0):
+            return "%d秒" % (s)
+        else:
+            ms = int((end - start) * 1000)
+            return "%d毫秒" % (ms)
+    # 当前时间格式化
+    # 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
+    # 2 : 010101120000 %y%m%d%H%M%S
+    # 时间戳 3:1690179526555  精确到秒
+    def getNowTime(self, type):
+        now_time = ""
+        if type == 1:
+            now_time = time.strftime("%Y-%m-%d %H:%M:%S")
+        if type == 2:
+            now_time = time.strftime("%y%m%d%H%M%S")
+        if type == 3:
+            now_time = int(time.time() * 1000)
+        return now_time
+    # 获取流水号
+    def getNextSeq(self):
+        self.__seq += 1
+        if self.__seq > 1000:
+            self.__seq = 0
+        return self.getNowTime(2) + str(self.__seq).zfill(3)
+    # 获取信用代码
+    def getNextXydm(self):
+        self.__seq += 1
+        if self.__seq > 1000:
+            self.__seq = 0
+        return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
+    # 日志格式
+    def logFormate(self,record, handler):
+        formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
+            date=record.time,  # 日志时间
+            level=record.level_name,  # 日志等级
+            filename=os.path.split(record.filename)[-1],  # 文件名
+            func_name=record.func_name,  # 函数名
+            lineno=record.lineno,  # 行号
+            msg=record.message  # 日志内容
+        )
+        return formate
+    # 获取logger
+    def getLogger(self,fileLogFlag=True, stdOutFlag=True):
+        dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
+        dirname = os.path.join(dirname, "logs")
+        filename = filename.replace(".py", "") + ".log"
+        if not os.path.exists(dirname):
+            os.mkdir(dirname)
+        logbook.set_datetime_format('local')
+        logger = logbook.Logger(filename)
+        logger.handlers = []
+        if fileLogFlag:  # 日志输出到文件
+            logFile = logbook.TimedRotatingFileHandler(os.path.join(dirname, filename), date_format='%Y-%m-%d',
+                                                       bubble=True, encoding='utf-8')
+            logFile.formatter = self.logFormate
+            logger.handlers.append(logFile)
+        if stdOutFlag:  # 日志打印到屏幕
+            logStd = logbook.more.ColorizedStderrHandler(bubble=True)
+            logStd.formatter = self.logFormate
+            logger.handlers.append(logStd)
+        return logger
+    # 获取随机的userAgent
+    def getRandomUserAgent(self):
+        return random.choice(self.__USER_AGENT_LIST)
+    # 获取代理
+    def get_proxy(self):
+        sql = "select proxy from clb_proxy"
+        self.cursor.execute(sql)
+        proxy_lists = self.cursor.fetchall()
+        ip_list = []
+        for proxy_ in proxy_lists:
+            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
+        proxy_list = []
+        for str_ip in ip_list:
+            str_ip_list = str_ip.split('-')
+            proxyMeta = "http://%(host)s:%(port)s" % {
+                "host": str_ip_list[0],
+                "port": str_ip_list[1],
+            }
+            proxy = {
+                "HTTP": proxyMeta,
+                "HTTPS": proxyMeta
+            }
+            proxy_list.append(proxy)
+        return proxy_list[random.randint(0, 3)]
+    #字符串截取
+    def getSubStr(self,str,beginStr,endStr):
+        if beginStr=='':
+            pass
+        else:
+            begin=str.rfind(beginStr)
+            if begin==-1:
+                begin=0
+            str=str[begin:]
+        if endStr=='':
+            pass
+        else:
+            end=str.rfind(endStr)
+            if end==-1:
+                pass
+            else:
+                str = str[0:end+1]
+        return str
+    # 繁体字转简体字
+    def hant_2_hans(self,hant_str: str):
+        '''
+        Function: 将 hant_str 由繁体转化为简体
+        '''
+        return zhconv.convert(hant_str, 'zh-hans')
+    # 判断字符串里是否含数字
+    def str_have_num(self,str_num):
+        panduan = False
+        for str_1 in str_num:
+            ppp = str_1.isdigit()
+            if ppp:
+                panduan = ppp
+        return panduan
+    # # 从Redis的List中获取并移除一个元素
+    # def redicPullData(self,type,key):
+    # #1 表示国内 2 表示国外
+    #     if type == 1:
+    #         gn_item = self.r.lpop(key)
+    #         return gn_item.decode() if gn_item else None
+    #     if type == 2:
+    #         gw_item = self.r.lpop(key)
+    #         return gw_item.decode() if gw_item else None
+    # 从Redis的List中获取并移除一个元素
+    def redicPullData(self,key):
+        item = self.r.lpop(key)
+        return item.decode() if item else None
+    # 获得脚本进程PID
+    def getPID(self):
+        PID = os.getpid()
+        return PID
+    # 获取本机IP
+    def getIP(self):
+        IP = socket.gethostbyname(socket.gethostname())
+        return IP
+    def mkPath(self,path):
+        folder = os.path.exists(path)
+        if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
+            os.makedirs(path)  # makedirs 创建文件时如果路径不存在会创建这个路径
+        else:
+            pass
+    # 生成google模拟浏览器  必须传入值为googledriver位置信息
+    # headless用于决定是否为无头浏览器,初始默认为无头浏览器
+    # 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
+    # 无头浏览器用于后续对信息采集时不会有浏览器一直弹出，
+    def buildDriver(self, path, headless=True):
+        service = Service(path)
+        chrome_options = webdriver.ChromeOptions()
+        if headless:
+            chrome_options.add_argument('--headless')
+            chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_experimental_option(
+            "excludeSwitches", ["enable-automation"])
+        chrome_options.add_experimental_option('useAutomationExtension', False)
+        chrome_options.add_argument('lang=zh-CN,zh,zh-TW,en-US,en')
+        chrome_options.add_argument('user-agent=' + self.getRandomUserAgent())
+        # 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
+        driver = webdriver.Chrome(options=chrome_options, service=service)
+        # with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
+        #     js = f.read()
+        #
+        # driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+        #     "source": js
+        # })
+        return driver
+    # 根据社会信用代码获取企业信息
+    def getInfomation(self, com_name):
+        data = []
+        try:
+            sql = f"SELECT * FROM technological WHERE CompanyName = '{com_name}'"
+            # self.cursor.execute(sql)
+            # data = self.cursor.fetchone()
+            conn = self.pool_caiji.connection()
+            cursor = conn.cursor()
+            cursor.execute(sql)
+            data = cursor.fetchone()
+            conn.commit()
+            data = list(data)
+            cursor.close()
+            conn.close()
+        except:
+            log = self.getLogger()
+            log.info('=========数据库操作失败========')
+        return data
+    # 更新企业采集次数
+    def updateRun(self, social_code, runType, count):
+        try:
+            sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
+            # self.cursor.execute(sql_update)
+            # self.cnx.commit()
+            conn = self.pool_caiji.connection()
+            cursor = conn.cursor()
+            cursor.execute(sql_update)
+            conn.commit()
+            cursor.close()
+            conn.close()
+        except:
+            log = self.getLogger()
+            log.info('======更新数据库失败======')
+    # 保存日志入库
+    def recordLog(self, xydm, taskType, state, takeTime, url, e):
+        try:
+            createTime = self.getNowTime(1)
+            ip = self.getIP()
+            pid = self.getPID()
+            sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+            values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
+            # try:
+            #     self.cursor.execute(sql, values)
+            # except Exception as e:
+            #     print(e)
+            # self.cnx.commit()
+            cnn = self.pool_caiji.connection()
+            cursor = cnn.cursor()
+            cursor.execute(sql,values)
+            cnn.commit()
+            cursor.close()
+            cnn.close()
+        except:
+            log = self.getLogger()
+            log.info('======保存日志失败=====')
+    #获取企查查token
+    def GetToken(self):
+        #获取企查查token
+        query = "select token from QCC_token "
+        # token = '67ec7402166df1da84ae83c4b95cefc0'  # 需要隔两个小时左右抓包修改
+        self.cursor.execute(query)
+        token_list = self.cursor.fetchall()
+        self.cnx.commit()
+        token = token_list[random.randint(0, len(token_list)-1)][0]
+        return token
+    # 删除失效的token
+    def delete_token(self,token):
+        deletesql = f"delete from QCC_token where token='{token}' "
+        self.cursor.execute(deletesql)
+        self.cnx.commit()
+    #获取天眼查token
+    def GetTYCToken(self):
+        query = 'select token from TYC_token'
+        self.cursor.execute(query)
+        token = self.cursor.fetchone()[0]
+        self.cnx.commit()
+        return token
+    #检测语言
+    def detect_language(self, text):
+        # 使用langid.py判断文本的语言
+        result = langid.classify(text)
+        if result == '':
+            return 'cn'
+        if result[0] == '':
+            return 'cn'
+        return result[0]
+    #追加接入excel
+    def writerToExcel(self,detailList,filename):
+        # filename='baidu搜索.xlsx'
+        # 读取已存在的xlsx文件
+        existing_data = pd.read_excel(filename,engine='openpyxl',dtype=str)
+        # 创建新的数据
+        new_data = pd.DataFrame(data=detailList)
+        # 将新数据添加到现有数据的末尾
+        combined_data = existing_data.append(new_data, ignore_index=True)
+        # 将结果写入到xlsx文件
+        combined_data.to_excel(filename, index=False)
+        # return combined_data
+    #对失败或者断掉的企业 重新放入redis
+    def rePutIntoR(self,key,item):
+        self.r.rpush(key, item)
+    #增加计数器的值并返回增加后的值
+    def incrSet(self,key):
+        # 增加计数器的值并返回增加后的值
+        new_value = self.r.incr(key)
+        print("增加后的值：", new_value)
+        return new_value
+    #获取key剩余的过期时间
+    def getttl(self,key):
+        # 获取key的剩余过期时间
+        ttl = self.r.ttl(key)
+        print("剩余过期时间：", ttl)
+        # 判断key是否已过期
+        if ttl < 0:
+            # key已过期，将key的值重置为0
+            self.r.set(key, 0)
+            self.r.expire(key, 3600)
+            time.sleep(2)
+    #上传至文件服务器,并解析pdf的内容和页数
+    def upLoadToServe(self,pdf_url,type_id,social_code):
+        headers = {}
+        retData = {'state':False,'type_id':type_id,'item_id':social_code,'group_name':'group1','path':'','full_path':'',
+                   'category':'pdf','file_size':'','status':1,'create_by':'XueLingKun',
+                   'create_time':'','page_size':'','content':''}
+        headers['User-Agent'] = self.getRandomUserAgent()
+        for i in range(0, 3):
+            try:
+                resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content
+                break
+            except:
+                time.sleep(3)
+                continue
+        page_size = 0
+        for i in range(0, 3):
+            try:
+                result = client.upload_by_buffer(resp_content, file_ext_name='pdf')
+                with fitz.open(stream=resp_content, filetype='pdf') as doc:
+                    page_size = doc.page_count
+                    for page in doc.pages():
+                        retData['content'] += page.get_text()
+                break
+            except:
+                time.sleep(3)
+                continue
+        if page_size < 1:
+            # pdf解析失败
+            print(f'======pdf解析失败=====')
+            return retData
+        else:
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            retData['state'] = True
+            retData['path'] = bytes.decode(result['Remote file_id']).replace('group1', '')
+            retData['full_path'] = bytes.decode(result['Remote file_id'])
+            retData['file_size'] = result['Uploaded size']
+            retData['create_time'] = time_now
+            retData['page_size'] = page_size
+            return retData
+    def secrchATT(self,item_id,year,type_id):
+        sel_sql = '''select id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
+        self.cursor_.execute(sel_sql, (item_id, year, type_id))
+        selects = self.cursor_.fetchone()
+        return selects
+    #插入到att表 返回附件id
+    def tableUpdate(self,retData,com_name,year,pdf_name,num):
+            item_id = retData['item_id']
+            type_id = retData['type_id']
+            group_name = retData['group_name']
+            path = retData['path']
+            full_path = retData['full_path']
+            category = retData['category']
+            file_size = retData['file_size']
+            status = retData['status']
+            create_by = retData['create_by']
+            page_size = retData['page_size']
+            create_time = retData['create_time']
+            order_by = num
+            selects = self.secrchATT(item_id,year,type_id)
+            # sel_sql = '''select id,item_id from clb_sys_attachment where item_id = %s and year = %s and type_id=%s '''
+            # self.cursor.execute(sel_sql, (item_id, year,type_id))
+            # selects = self.cursor.fetchone()
+            if selects:
+                self.getLogger().info(f'com_name:{com_name}已存在')
+                id = selects[0]
+                return id
+            else:
+                Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+                values = (
+                    year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
+                    status, create_by,
+                    create_time, page_size)
+                self.cursor_.execute(Upsql, values)  # 插入
+                self.cnx_.commit()  # 提交
+                self.getLogger().info("更新完成:{}".format(Upsql))
+                selects = self.secrchATT(item_id,year,type_id)
+                id = selects[0]
+                return id
+    # 更新企业的CIK
+    def updateCIK(self,social_code,cik):
+        try:
+            sql = f"UPDATE EnterpriseInfo SET CIK = '{cik}' WHERE SocialCode = '{social_code}'"
+            cnn = self.pool_caiji.connection()
+            cursor = cnn.cursor()
+            cursor.execute(sql)
+            cnn.commit()
+            cursor.close()
+            cnn.close()
+        except:
+            log = self.getLogger()
+            log.info('======保存企业CIK失败=====')
--- a/comData/newlist/technological/baseinfo_tech.py
+++ b/comData/newlist/technological/baseinfo_tech.py
+# -*- coding: utf-8 -*-
+import pandas as pd
+import time
+import requests
+import json
+from kafka import KafkaProducer
+from BaseCore import BaseCore
+from getQccId import find_id_by_name
+baseCore = BaseCore()
+cnx_ = baseCore.cnx
+cursor_ = baseCore.cursor
+log = baseCore.getLogger()
+# 通过企查查id获取企业基本信息
+def info_by_id(com_id,com_name):
+    aa_dict_list = []
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+    com_jc_name = ''
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        log.info(com_name + ":获取失败===========重新放入redis")
+        baseCore.rePutIntoR('technological:baseinfo',com_name)
+        return aa_dict_list
+    company_name = result_dict['Name']
+    CreditCode = result_dict['CreditCode']
+    if CreditCode is None:
+        CreditCode = ''
+    try:
+        OperName = result_dict['Oper']['Name']
+    except:
+        OperName = ''
+    if OperName is None:
+        OperName = ''
+    if baseCore.str_have_num(OperName):
+        OperName = ''
+    try:
+        Status = result_dict['ShortStatus']
+    except:
+        Status = ''
+    if Status is None:
+        Status = ''
+    try:
+        StartDate = result_dict['StartDate']
+    except:
+        StartDate = ''
+    if StartDate is None:
+        StartDate = ''
+    try:
+        RegistCapi = result_dict['RegistCapi']
+    except:
+        RegistCapi = ''
+    if RegistCapi is None:
+        RegistCapi = ''
+    RecCap = ''  # result_dict['RecCap']  #实际缴纳金额，现已没有显示
+    if RecCap is None:
+        RecCap = ''
+    try:
+        OrgNo = result_dict['CreditCode'][8:-2] + '-' + result_dict['CreditCode'][-2]  # 组织机构代码，现已没有显示
+    except:
+        OrgNo = ''
+    if OrgNo is None:
+        OrgNo = ''
+    try:
+        TaxNo = result_dict['TaxNo']
+    except:
+        TaxNo = ''
+    if TaxNo is None:
+        TaxNo = ''
+    try:
+        EconKind = result_dict['EconKind']
+    except:
+        EconKind = ''
+    if EconKind is None:
+        EconKind = ''
+    TermStart = ''  # result_dict['TermStart']  营业期限自，现已没有显示
+    if TermStart is None:
+        TermStart = ''
+    TeamEnd = ''  # result_dict['TeamEnd']营业期限至，现已没有显示
+    if TeamEnd is None:
+        TeamEnd = ''
+    try:
+        SubIndustry = result_dict['Industry']['SubIndustry']
+    except:
+        SubIndustry = ''
+    if SubIndustry is None:
+        SubIndustry = ''
+    try:
+        Province = result_dict['Area']['Province']
+    except:
+        Province = ''
+    try:
+        City = result_dict['Area']['City']
+    except:
+        City = ''
+    try:
+        County = result_dict['Area']['County']
+    except:
+        County = ''
+    try:
+        region = Province + City + County
+    except:
+        region = ''
+    BelongOrg = ''  # result_dict['BelongOrg']登记机关，现已没有显示
+    can_bao = ''
+    CommonList = []  # result_dict['CommonList']参保人数，现已没有显示
+    for Common_dict in CommonList:
+        try:
+            KeyDesc = Common_dict['KeyDesc']
+        except:
+            continue
+        if KeyDesc == '参保人数':
+            can_bao = Common_dict['Value']
+    if can_bao == '0':
+        can_bao = ''
+    OriginalName = ''
+    try:
+        OriginalName_lists = result_dict['OriginalName']
+        for OriginalName_dict in OriginalName_lists:
+            OriginalName += OriginalName_dict['Name'] + ' '
+    except:
+        OriginalName = ''
+    try:
+        OriginalName.strip()
+    except:
+        OriginalName = ''
+    EnglishName = ''  # result_dict['EnglishName']企业英文名，现已没有显示
+    if EnglishName is None:
+        EnglishName = ''
+    IxCode = ''  # result_dict['IxCode']进出口企业代码，现已没有显示
+    if IxCode is None:
+        IxCode = ''
+    Address = result_dict['Address']
+    if Address is None:
+        Address = ''
+    Scope = ''  # result_dict['Scope']经营范围，现已没有显示
+    if Scope is None:
+        Scope = ''
+    try:
+        PhoneNumber = result_dict['companyExtendInfo']['Tel']
+    except:
+        PhoneNumber = ''
+    if PhoneNumber is None:
+        PhoneNumber = ''
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    try:
+        Email = result_dict['companyExtendInfo']['Email']
+    except:
+        Email = ''
+    if Email is None:
+        Email = ''
+    try:
+        Desc = result_dict['companyExtendInfo']['Desc']
+    except:
+        Desc = ''
+    if Desc is None:
+        Desc = ''
+    try:
+        Info = result_dict['companyExtendInfo']['Info']
+    except:
+        Info = ''
+    if Info is None:
+        Info = ''
+    company_name = baseCore.hant_2_hans(company_name)
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v6/base/getEntDetail?token={}&t={}&unique={}".format(token, t,
+                                                                                                         com_id)
+    resp_dict2 = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(1)
+    try:
+        com2 = resp_dict2['result']['Company']
+    except:
+        com2 = ''
+    try:
+        Scope = com2['Scope']
+    except:
+        Scope = ''
+    try:
+        CheckDate = com2['CheckDate']
+    except:
+        CheckDate = ''
+    if CheckDate is None:
+        CheckDate = ''
+    try:
+        TaxpayerType = com2['TaxpayerType']     #纳税人资质
+    except:
+        TaxpayerType = ''
+    if TaxpayerType is None:
+        TaxpayerType = ''
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    if No is None:
+        No = ''
+    try:
+        IxCode = com2['IxCode']
+    except:
+        IxCode = ''
+    try:
+        OrgNo = com2['OrgNo']
+    except:
+        OrgNo = ''
+    try:
+        for Common_t in com2['CommonList']:
+            try:
+                if Common_t['KeyDesc'] == '参保人数':
+                    can_bao = Common_t['Value']
+            except:
+                pass
+    except:
+        can_bao = ''
+    try:
+        TermStart = com2['TermStart']
+    except:
+        TermStart = ''
+    try:
+        TeamEnd = com2['TeamEnd']
+    except:
+        TeamEnd = ''
+    try:
+        RecCap = com2['RecCap']
+    except:
+        RecCap = ''
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    try:
+        SubIndustry = com2['IndustryArray'][-1]
+    except:
+        SubIndustry = ''
+    try:
+        BelongOrg = com2['BelongOrg']
+    except:
+        BelongOrg = ''
+    try:
+        EnglishName = com2['EnglishName']
+    except:
+        EnglishName = ''
+    aa_dict = {
+        'qccId': com_id,  # 企查查企业id
+        'name': company_name,  # 企业名称
+        'shortName': com_jc_name,  # 企业简称
+        'socialCreditCode': CreditCode,  # 统一社会信用代码
+        'legalPerson': OperName,  # 法定代表人
+        'officialPhone': PhoneNumber,  # 电话
+        'officialUrl': WebSite,  # 官网
+        'officialEmail': Email,  # 邮箱
+        'briefInfo': Desc,  # 简介
+        'registerStatus': Status,  # 登记状态
+        'incorporationDate': StartDate,  # 成立日期
+        'capital': RegistCapi,  # 注册资本
+        'paidCapital': RecCap,  # 实缴资本
+        'approvalDate': CheckDate,  # 核准日期
+        'organizationCode': OrgNo,  # 组织机构代码
+        'registerNo': No,  # 工商注册号
+        'taxpayerNo': CreditCode,  # 纳税人识别号
+        'type': EconKind,  # 企业类型
+        'businessStartDate': TermStart,  # 营业期限自
+        'businessEndDate': TeamEnd,  # 营业期限至
+        'taxpayerQualification': TaxpayerType,  # 纳税人资质
+        'industry': SubIndustry,  # 所属行业
+        'region': region,
+        'province': Province,  # 所属省
+        'city': City,  # 所属市
+        'county': County,  # 所属县
+        'registerDepartment': BelongOrg,  # 登记机关
+        'scale': Info,  # 人员规模
+        'insured': can_bao,  # 参保人数
+        'beforeName': OriginalName,  # 曾用名
+        'englishName': EnglishName,  # 英文名
+        'importExportEnterpriseCode': IxCode,  # 进出口企业代码
+        'address': Address,  # 地址
+        'businessRange': Scope,  # 经营范围
+        'status': 0,  # 状态
+    }
+    aa_dict_list.append(aa_dict)
+    log.info(company_name + "：爬取完成")
+    return aa_dict_list
+if __name__ == '__main__':
+    taskType = '基本信息/企查查/科改示范企业'
+    headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-Timestamp': '',
+        'Qcc-Version': '1.0.0',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+        'content-type': 'application/json',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br,'
+    }
+    list_weicha = []
+    name_list = []
+    #从redis里拿数据
+    while True:
+        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
+        token = baseCore.GetToken()
+        if token:
+            pass
+        else:
+            log.info('==========已无token==========')
+            time.sleep(30)
+            continue
+        # list_all_info = []
+        start_time = time.time()
+        # 获取企业信息
+        # com_name = baseCore.redicPullData('technological:baseinfo')
+        com_name = '深圳市城市公共安全技术研究院有限公司'
+        if com_name == '':
+            time.sleep(20)
+            continue
+        dic_info = baseCore.getInfomation(com_name)
+        log.info(f'----当前企业{com_name}--开始处理---')
+        social_code = dic_info[5]
+        #企查查id
+        company_id = dic_info[6]
+        #如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
+        if company_id == None:
+            if social_code:
+                company_id = find_id_by_name(start_time,token,social_code)
+            else:
+                company_id = find_id_by_name(start_time,token,com_name)
+            if company_id == 'null':
+                log.info('=====搜索不到该企业====')
+                #todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
+                baseCore.rePutIntoR('technological:baseinfo', com_name + '：搜索不到')
+                continue
+            if not company_id:
+                log.info(com_name + "：企业ID获取失败===重新放入redis")
+                list_weicha.append(com_name + "：企业ID获取失败")
+                baseCore.rePutIntoR('technological:baseinfo',com_name)
+                baseCore.delete_token(token)
+                log.info('=====已重新放入redis,失效token已删除======')
+                time.sleep(20)
+                continue
+            else:
+                log.info(f'====={com_name}===={company_id}=====获取企业id成功=====')
+                # todo:写入数据库
+                updateqccid = f"update technological set qccid = '{company_id}' where CompanyName = '{com_name}'"
+                cursor_.execute(updateqccid)
+                cnx_.commit()
+        try:
+            post_data_list = info_by_id(company_id, com_name)
+        except:
+            log.info(f'====={social_code}=====获取基本信息失败，重新放入redis=====')
+            baseCore.rePutIntoR('technological:baseInfo', com_name)
+            baseCore.delete_token(token)
+            log.info('=====已重新放入redis,失效token已删除======')
+            continue
+        if post_data_list:
+            pass
+        else:
+            # log.info(f'======{social_code}====企查查token失效====')
+            time.sleep(20)
+            continue
+        for post_data in post_data_list:
+            # list_all_info.append(post_data)
+            if post_data is None:
+                print(com_name + "：企业信息获取失败")
+                list_weicha.append(com_name + "：企业信息获取失败")
+                continue
+            get_name = post_data['name']
+            get_socialcode = post_data['socialCreditCode']
+            #todo:将信用代码更新到表中
+            updatesocialcode = f"update technological set SocialCode = '{get_socialcode}' where CompanyName = '{com_name}'"
+            cursor_.execute(updatesocialcode)
+            cnx_.commit()
+            name_compile = {
+                'yuan_name':com_name,
+                'get_name':get_name
+            }
+            name_list.append(name_compile)
+            log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
+            try:
+                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
+                kafka_result = producer.send("regionInfo", json.dumps(post_data, ensure_ascii=False).encode('utf8'))
+                print(kafka_result.get(timeout=10))
+            except:
+                exception = 'kafka传输失败'
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
+                log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
+        break
+    nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
+    companyName = pd.DataFrame(name_list)
+    companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
+    false_com = pd.DataFrame(list_weicha)
+    false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
--- a/comData/newlist/technological/getQccId.py
+++ b/comData/newlist/technological/getQccId.py
+# -*- coding: utf-8 -*-
+import time
+from urllib.parse import quote
+import requests
+import urllib3
+from BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+# headers = {
+#         'Host': 'xcx.qcc.com',
+#         'Connection': 'keep-alive',
+#         'Qcc-Platform': 'mp-weixin',
+#         'Qcc-Timestamp': '',
+#         'Qcc-Version': '1.0.0',
+#         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+#         'content-type': 'application/json',
+#         'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+#         'Accept-Encoding': 'gzip, deflate, br,'
+#     }
+headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'x-request-device-type': 'Android',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391',
+        'Content-Type': 'application/json',
+        'Qcc-Version': '1.0.0',
+        'authMini': 'Bearer f51dae1a2fcb109fa9ec58bd4a85e5c5',
+        'xweb_xhr': '1',
+        'xcx-version': '2023.09.27',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-CurrentPage': '/company-subpackages/business/index',
+        'Qcc-Timestamp': '1696661787803',
+        'Qcc-RefPage': '/company-subpackages/detail/index',
+        'Accept': '*/*',
+        'Sec-Fetch-Site': 'cross-site',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Dest': 'empty',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/307/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh'
+}
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(start,token,name):
+    urllib3.disable_warnings()
+    qcc_key = name
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except Exception as e:
+            print(f'{e}-------------重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    #{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频，请升级小程序版本'}
+    if resp_dict['status']==40101:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    if resp_dict['status']==401:
+        KeyNo = False
+        log.info(f'=======您的账号访问超频，请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    try:
+        if resp_dict['result']['Result']:
+            result_dict = resp_dict['result']['Result'][0]
+            KeyNo = result_dict['KeyNo']
+            Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+            if Name == '':
+                KeyNo = 'null'
+        else:
+            KeyNo = 'null'
+    except:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
+        return KeyNo
+    log.info("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
\ No newline at end of file
--- a/comData/noticeReport/证监会-公告.py
+++ b/comData/noticeReport/证监会-公告.py
 import json
@@ -5,7 +5,9 @@ import requests
 from bs4 import BeautifulSoup
 from kafka import KafkaProducer
 from base import BaseCore
+from obs import ObsClient
+import fitz
+from urllib.parse import unquote
 baseCore = BaseCore.BaseCore()
 log = baseCore.getLogger()
@@ -17,14 +19,79 @@ cursor_ = baseCore.cursor_
 taskType = '企业公告/证监会'
+obsClient = ObsClient(
+        access_key_id='VEHN7D0TJ9316H8AHCAV',  # 你的华为云的ak码
+        secret_access_key='heR353lvSWVPNU8pe2QxDtd8GDsO5L6PGH5eUoQY',  # 你的华为云的sk
+        server='https://obs.cn-north-1.myhuaweicloud.com'  # 你的桶的地址
+    )
+#获取文件大小
+def convert_size(size_bytes):
+    # 定义不同单位的转换值
+    units = ['bytes', 'KB', 'MB', 'GB', 'TB']
+    i = 0
+    while size_bytes >= 1024 and i < len(units)-1:
+        size_bytes /= 1024
+        i += 1
+    return f"{size_bytes:.2f} {units[i]}"
+def uptoOBS(pdf_url,pdf_name,type_id,social_code):
+    headers = {}
+    retData = {'state': False, 'type_id': type_id, 'item_id': social_code, 'group_name': 'group1', 'path': '',
+               'full_path': '',
+               'category': 'pdf', 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
+               'create_time': '', 'page_size': '', 'content': ''}
+    headers['User-Agent'] = baseCore.getRandomUserAgent()
+    for i in range(0, 3):
+        try:
+            response = requests.get(pdf_url, headers=headers,verify=False, timeout=20)
+            file_size = int(response.headers.get('Content-Length'))
+            break
+        except:
+            time.sleep(3)
+            continue
+    page_size = 0
+    for i in range(0, 3):
+        try:
+            name = pdf_name + '.pdf'
+            now_time = time.strftime("%Y-%m")
+            result = obsClient.putContent('zzsn', f'ZJH/{now_time}/'+name, content=response.content)
+            with fitz.open(stream=response.content, filetype='pdf') as doc:
+                page_size = doc.page_count
+                for page in doc.pages():
+                    retData['content'] += page.get_text()
+            break
+        except:
+            time.sleep(3)
+            continue
-def secrchATT(item_id, name, type_id):
+    if page_size < 1:
-    sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s '''
+        # pdf解析失败
-    cursor_.execute(sel_sql, (item_id, name, type_id))
+        # print(f'======pdf解析失败=====')
+        return retData
+    else:
+        try:
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            retData['state'] = True
+            retData['path'] = result['body']['objectUrl'].split('.com')[1]
+            retData['full_path'] = unquote(result['body']['objectUrl'])
+            retData['file_size'] = convert_size(file_size)
+            retData['create_time'] = time_now
+            retData['page_size'] = page_size
+        except Exception as e:
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, f'{e}')
+            return retData
+        return retData
+def secrchATT(item_id, name, type_id,order_by):
+    sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s and order_by=%s '''
+    cursor_.execute(sel_sql, (item_id, name, type_id,order_by))
    selects = cursor_.fetchone()
    return selects
 # 插入到att表 返回附件id
 def tableUpdate(retData, com_name, year, pdf_name, num):
    item_id = retData['item_id']
@@ -39,26 +106,26 @@ def tableUpdate(retData, com_name, year, pdf_name, num):
    page_size = retData['page_size']
    create_time = retData['create_time']
    order_by = num
-    selects = secrchATT(item_id, pdf_name, type_id)
+    # selects = secrchATT(item_id, pdf_name, type_id)
+    #
-    if selects:
+    # if selects:
-        log.info(f'com_name:{com_name}已存在')
+    #     log.info(f'pdf_name:{pdf_name}已存在')
-        id = selects[0]
+    #     id = ''
-        return id
+    #     return id
-    else:
+    # else:
-        Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+    Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
-        values = (
+    values = (
-            year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
+        year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
-            status, create_by,
+        status, create_by,
-            create_time, page_size)
+        create_time, page_size)
-        cursor_.execute(Upsql, values)  # 插入
+    cursor_.execute(Upsql, values)  # 插入
-        cnx_.commit()  # 提交
+    cnx_.commit()  # 提交
-        log.info("更新完成:{}".format(Upsql))
+    log.info("更新完成:{}".format(Upsql))
-        selects = secrchATT(item_id, pdf_name, type_id)
+    selects = secrchATT(item_id, pdf_name, type_id,order_by)
-        id = selects[0]
+    id = selects[0]
-        return id
+    return id
 def RequestUrl(url, payload, social_code,start_time):
    # ip = get_proxy()[random.randint(0, 3)]
@@ -73,13 +140,20 @@ def RequestUrl(url, payload, social_code,start_time):
            pass
    # 检查响应状态码
-    if response.status_code == 200:
+    try:
-        # 请求成功，处理响应数据
+        if response.status_code == 200:
-        # print(response.text)
+            # 请求成功，处理响应数据
-        soup = BeautifulSoup(response.text, 'html.parser')
+            # print(response.text)
-        pass
+            soup = BeautifulSoup(response.text, 'html.parser')
-    else:
+            pass
-        # 请求失败，输出错误信息
+        else:
+            # 请求失败，输出错误信息
+            log.error('请求失败:', url)
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, url, '请求失败')
+            soup = ''
+    except:
        log.error('请求失败:', url)
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
@@ -163,26 +237,32 @@ def getUrl(code, url_parms, Catagory2_parms):
        }
    return dic_parms
+def ifInstert(short_name, social_code, pdf_url):
-def InsterInto(short_name, social_code, pdf_url):
+    ifexist = True
-    inster = False
    sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s and origin='证监会' and type='1' '''
    cursor.execute(sel_sql, (social_code, pdf_url))
    selects = cursor.fetchone()
+    #如果数据库中存在 则跳过
    if selects:
-        print(f'com_name:{short_name}、{pdf_url}已存在')
+        ifexist = False
-        return inster
+        log.info(f'com_name:{short_name}、{pdf_url}已存在')
+        return ifexist
+    else:
+        return ifexist
+def InsterInto(social_code, pdf_url,pub_time):
+    insert = False
    # 信息插入数据库
    try:
-        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
+        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,publish_time,create_time) values(%s,%s,%s,%s,%s,now())'''
        list_info = [
            social_code,
            pdf_url,
            '证监会',
            '1',
+            pub_time,
        ]
        #144数据库
        cursor.execute(insert_sql, tuple(list_info))
@@ -195,10 +275,20 @@ def InsterInto(short_name, social_code, pdf_url):
        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '数据库传输失败')
        return insert
 def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_name,num):
-    #上传至文件服务器
+    #判断文件是否已经存在obs服务器中
-    retData = baseCore.upLoadToServe(pdf_url,8,social_code)
+    # file_path = 'XQWAnnualReport/2023-10/浙江国祥股份有限公司首次公开发行股票并在主板上市暂缓发行公告.doc'
+    now_time = time.strftime("%Y-%m")
+    file_path = 'ZJH/'+now_time+'/'+pdf_name+'.pdf'
+    response = obsClient.getObjectMetadata('zzsn', file_path)
+    if response.status >= 300:
+        log.info('=====文件不存在obs=====')
+        pass
+    else:
+        log.info(f'=====文件存在obs========{file_path}')
+        return False
+    #上传至华为云服务器
+    retData = uptoOBS(pdf_url,pdf_name,8,social_code)
    #附件插入att数据库
    if retData['state']:
        pass
@@ -207,12 +297,11 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
        return False
    num = num + 1
    att_id = tableUpdate(retData,com_name,year,pdf_name,num)
-    content = retData['content']
+    if att_id:
-    if retData['state']:
        pass
    else:
-        log.info(f'====pdf解析失败====')
        return False
+    content = retData['content']
    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    dic_news = {
@@ -248,7 +337,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
            'message': '操作成功',
            'code': '200',
        }
-        print(dic_result)
+        log.info(dic_result)
        return True
    except Exception as e:
        dic_result = {
@@ -260,14 +349,11 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
        state = 0
        takeTime = baseCore.getTimeCost(start_time, time.time())
        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, 'Kafka操作失败')
-        print(dic_result)
+        log.info(dic_result)
        return False
 # 采集信息
 def SpiderByZJH(url, payload, dic_info, start_time,num):  # dic_info 数据库中获取到的基本信息
-    okCount = 0
-    errorCount = 0
    social_code = dic_info[2]
    short_name = dic_info[4]
    com_name = dic_info[1]
@@ -279,26 +365,26 @@ def SpiderByZJH(url, payload, dic_info, start_time,num):  # dic_info 数据库
    try:
        is_exist = soup.find('div',class_='con').text
        if is_exist == '没有查询到数据':
-            state = 1
+            state = 0
            takeTime = baseCore.getTimeCost(start_time, time.time())
-            baseCore.recordLog(social_code, taskType, state, takeTime, url, '')
+            baseCore.recordLog(social_code, taskType, state, takeTime, url, '没有查询到数据')
            return
    except:
        pass
-    # 先获取页数
+    # # 先获取页数
-    page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
+    # page = soup.find('div', class_='pages').find('ul', class_='g-ul').text
+    #
-    total = re.findall(r'\d+', page)[0]
+    # total = re.findall(r'\d+', page)[0]
+    #
-    r_page = int(total) % 15
+    # r_page = int(total) % 15
-    if r_page == 0:
+    # if r_page == 0:
-        Maxpage = int(total) // 15
+    #     Maxpage = int(total) // 15
-    else:
+    # else:
-        Maxpage = int(total) // 15 + 1
+    #     Maxpage = int(total) // 15 + 1
-    log.info(f'{short_name}====={code}===========一共{total}条,{Maxpage}页')
+    # log.info(f'{short_name}====={code}===========一共{total}条,{Maxpage}页')
-    # 首页和其他页不同，遍历 如果是首页 修改一下链接
+    # # 首页和其他页不同，遍历 如果是首页 修改一下链接
-    for i in range(1, Maxpage + 1):
+    for i in range(1,51):
        log.info(f'==========正在采集第{i}页=========')
        if i == 1:
            href = url
@@ -310,9 +396,9 @@ def SpiderByZJH(url, payload, dic_info, start_time,num):  # dic_info 数据库
        if soup == '':
            continue
        tr_list = soup.find('div', id='txt').find_all('tr')
-        pageIndex = 0
+        # pageIndex = 0
        for tr in tr_list[1:]:
-            pageIndex += 1
+            # pageIndex += 1
            td_list = tr.find_all('td')
            pdf_url_info = td_list[2]
            # print(pdf_url)
@@ -320,37 +406,35 @@ def SpiderByZJH(url, payload, dic_info, start_time,num):  # dic_info 数据库
            name_pdf = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[1].strip('\'')
            pub_time = pdf_url_info['onclick'].strip('downloadPdf1(').split('\',')[2].strip('\'')
+            #todo:判断发布日期是否是日期格式
+            pattern = r"^\d{4}-\d{2}-\d{2}$"  # 正则表达式匹配YYYY-MM-DD格式的日期
+            if re.match(pattern, pub_time):
+                pass
+            else:
+                continue
            year = pub_time[:4]
            report_type = td_list[4].text.strip()
-            # 信息插入数据库
+            # 判断数据库中是否有该条资讯
-            insert = InsterInto(short_name, social_code, name_pdf)
+            ifexist = ifInstert(short_name, social_code, pdf_url)
+            #如果不存在 ifexist = True
-            if insert:
+            if ifexist:
-                #     # 公告信息列表
-                #     okCount = okCount + 1
                # 解析PDF内容，先获取PDF链接 下载 解析成功，解析失败 ，传输成功，传输失败
-                log.info(f'======={short_name}========{code}===插入公告库成功')
                result = GetContent(pdf_url, name_pdf, social_code, year, pub_time, start_time,com_name,num)
                if result:
                    # 公告信息列表
-                    okCount = okCount + 1
                    log.info(f'{short_name}==============解析传输操作成功')
                    state = 1
                    takeTime = baseCore.getTimeCost(start_time, time.time())
-                    baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '')
+                    baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '成功')
+                    #发送kafka成功之后 再插入数据库
+                    insert = InsterInto(social_code,pdf_url,pub_time)
+                    if insert:
+                        log.info(f'===={social_code}========{name_pdf}=====插入库成功')
                    pass
                else:
-                    errorCount += 1
-                    # time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-                    log.error(f'{short_name}=============解析或传输操作失败')
-                    # try:
-                    #     insert_err_sql = f"insert into dt_err(xydm,`from`,url,title,pub_date,zhaiyao,create_date,state,pageNo,pageIndex,type) values('{social_code}','证监会','{pdf_url}','{name_pdf}','{pub_time}',' ',now(),1,{i},{pageIndex},'1')"
-                    #     cursor_.execute(insert_err_sql)
-                    #     cnx_.commit()
-                    # except:
-                    #     pass
                    continue
            else:
                log.info(f'======={short_name}========{code}===已存在')
@@ -393,14 +477,15 @@ if __name__ == '__main__':
    while True:
        start_time = time.time()
        # 获取企业信息
-        social_code = baseCore.redicPullData('NoticeEnterprise:gnqy_socialCode')
+        # social_code = baseCore.redicPullData('NoticeEnterprise:gnqy_socialCode')
-        # social_code = '9110000071092841XX'
+        social_code = '91440500617540496Q'
        # 判断 如果Redis中已经没有数据，则等待
        if social_code == None:
            time.sleep(20)
            continue
        dic_info = baseCore.getInfomation(social_code)
-        count = dic_info[16]
+        count = dic_info[17]
        # 沪市http://eid.csrc.gov.cn/101111/index.html 深市http://eid.csrc.gov.cn/101811/index.html 北交所http://eid.csrc.gov.cn/102611/index.html
        # url 变量 翻页 栏目 http://eid.csrc.gov.cn/101811/index_3_f.html
@@ -418,11 +503,14 @@ if __name__ == '__main__':
        com_name = dic_info[1]
        dic_parms = getUrl(code, url_parms, Catagory2_parms)
        dic_parms_ls = getUrl(code, url_parms_ls, Catagory2_parms_ls)
        if dic_parms:
            start_time_cj = time.time()
+            log.info(f'======开始处理{com_name}=====发行公告=======')
            SpiderByZJH(dic_parms["url"], dic_parms["payload"], dic_info, start_time,num)
            log.info(f'{code}==========={short_name},{com_name},发行公告,耗时{baseCore.getTimeCost(start_time_cj, time.time())}')
            start_time_ls = time.time()
+            log.info(f'======开始处理{com_name}=====临时报告=======')
            SpiderByZJH(dic_parms_ls['url'], dic_parms_ls['payload'], dic_info, start_time,num)
            log.info(f'{code}==========={short_name},{com_name},临时报告,耗时{baseCore.getTimeCost(start_time_ls, time.time())}')
            # UpdateInfoSql(retData,retData_ls,social_code)
@@ -431,11 +519,7 @@ if __name__ == '__main__':
            log.info(f'{short_name} ---- 该企业耗时 ---- {baseCore.getTimeCost(start_time, end_time)}-----------')
            count += 1
            runType = 'NoticeReportCount'
-            baseCore.updateRun(code, runType, count)
+            baseCore.updateRun(social_code, runType, count)
    cursor.close()
    cnx.close()
-    # cursor_.close()
-    # cnx_.close()
-    # 释放资源
    baseCore.close()
--- a/comData/sinafinance_news/nyse_news.py
+++ b/comData/sinafinance_news/nyse_news.py
+"""
+    新浪财经美股企业动态
+"""
+import json
+import time
+import jieba
+import requests
+from bs4 import BeautifulSoup
+from kafka import KafkaProducer
+from retry import retry
+from base.smart import smart_extractor
+from base.BaseCore import BaseCore
+# 初始化，设置中文分词
+jieba.cut("必须加载jieba")
+smart = smart_extractor.SmartExtractor('cn')
+baseCore = BaseCore()
+log = baseCore.getLogger()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+    'Cache-Control': 'no-cache',
+    'Pragma': 'no-cache'
+}
+taskType = '新浪财经/天眼查'
+# 获取企业信息
+def getinfomation(social_code):
+    selectSql = f"select * from mgzqjywyh_list where state = '2' and xydm='{social_code}' "
+    cursor.execute(selectSql)
+    data = cursor.fetchone()
+    cnx.commit()
+    data = list(data)
+    cursor.close()
+    cnx.close()
+    return data
+# 获取响应页面
+@retry(tries=3, delay=1)
+def getrequests(url):
+    req = requests.get(url, headers=headers)
+    req.encoding = req.apparent_encoding
+    soup = BeautifulSoup(req.text, 'html.parser')
+    return soup
+# 解析内容
+def getDic(social_code, li):
+    start_time = time.time()
+    title = li.find('a').text
+    href = li.find('a').get('href')
+    tag_at = li.find('span', class_='xb_list_r').text
+    author = tag_at.split('|')[0].lstrip().strip()
+    pub_time = tag_at.split('|')[1].lstrip().strip()
+    pub_time = pub_time.split(' ')[0].replace('年', '-').replace('月', '-').replace('日', '')
+    if 'http' not in href:
+        href = 'https://finance.sina.com.cn' + href
+    href_ = href.replace('https', 'http')
+    try:
+        # 带标签正文
+        contentText = smart.extract_by_url(href_).text
+        # 不带标签正文
+        content = smart.extract_by_url(href_).cleaned_text
+        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    except:
+        log.error(f'{href}===页面解析失败')
+        state = 0
+        takeTime = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===页面解析失败')
+        return
+    dic_news = {
+        'attachmentIds': '',
+        'author': author,
+        'content': content,
+        'contentWithTag': contentText,
+        'createDate': time_now,
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'zh',
+        'origin': '新浪财经',
+        'publishDate': pub_time,
+        'sid': '1684032033495392257',
+        'sourceAddress': href,  # 原文链接
+        'summary': '',
+        'title': title,
+        'type': 2,
+        'socialCreditCode': social_code,
+        'year': pub_time[:4]
+    }
+    # print(dic_news)
+    try:
+        sendKafka(dic_news, start_time)
+        log.info(f'Kafka发送成功')
+        try:
+            insertMysql(social_code, href)
+            log.info(f'数据库保存成功')
+        except:
+            log.error(f'{href}===数据入库失败')
+            state = 0
+            takeTime = baseCore.getTimeCost(start_time, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===数据入库失败')
+    except:
+        log.error(f'{href}===发送Kafka失败')
+        state = 0
+        takeTime = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===发送Kafka失败')
+# 数据发送至Kafka
+@retry(tries=3, delay=1)
+def sendKafka(dic_news, start_time):
+    producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+    kafka_result = producer.send("researchReportTopic",
+                                 json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+    print(kafka_result.get(timeout=10))
+    dic_result = {
+        'success': 'ture',
+        'message': '操作成功',
+        'code': '200',
+    }
+    log.info(dic_result)
+    # 传输成功,写入日志中
+    state = 1
+    takeTime = baseCore.getTimeCost(start_time, time.time())
+    baseCore.recordLog(dic_news['socialCreditCode'], taskType, state, takeTime, dic_news['sourceAddress'], '')
+# 数据保存入库，用于判重
+@retry(tries=3, delay=1)
+def insertMysql(social_code, link):
+    insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
+    # 动态信息列表
+    list_info = [
+        social_code,
+        link,
+        '新浪财经',
+        '2',
+    ]
+    cursor.execute(insert_sql, tuple(list_info))
+    cnx.commit()
+# 判断动态是否采集过
+@retry(tries=3, delay=1)
+def selectUrl(url, social_code):
+    sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s and type='2' '''
+    cursor.execute(sel_sql, (url, social_code))
+    selects = cursor.fetchone()
+    return selects
+def doJob():
+    # while True:
+    # social_code = ''
+    # # 从redis中获取企业信用代码
+    # try:
+    # data = getinfomation(social_code)
+    # com_code = data[6]
+    com_code = 'AAPL'
+    social_code = 'ZZSN22080900000004'
+    log.info(f'{social_code}==={com_code}===开始采集')
+    start_time = time.time()
+    pageIndex = 1
+    while True:
+        # 拼接链接
+        # url = 'http://biz.finance.sina.com.cn/usstock/usstock_news.php?pageIndex=1&symbol=AAPL&type=1'
+        url = f'http://biz.finance.sina.com.cn/usstock/usstock_news.php?pageIndex={pageIndex}&symbol={com_code}&type=1'
+        soup_home = getrequests(url)
+        li_list = soup_home.select('body > div > div.xb_news > ul > li')
+        # 有可能第一次获取的li标签列表为空
+        for i in range(5):
+            if len(li_list) == 0:
+                li_list = soup_home.select('body > div > div.xb_news > ul > li')
+            else:
+                break
+        for li in li_list:
+            title = li.find('a').text
+            if title == '':
+                continue
+            href = li.find('a').get('href')
+            selects = selectUrl(href, social_code)
+            if selects:
+                log.info(f'{url}==已采集过')
+            else:
+                getDic(social_code, li)
+            break
+        break
+        # # 如果采集到已采集过动态,证明最新发布动态已经全部采集过
+        # 增量使用
+        # if selects:
+        #     break
+        next = soup_home.select('body > div > div.xb_news > div.xb_pages > a')
+        for i in range(5):
+            if len(next) == 0:
+                next = soup_home.select('body > div > div.xb_news > div.xb_pages > a')
+            else:
+                break
+        if len(next) == 2:
+            break
+        pageIndex += 1
+        time.sleep(2)
+    log.info(f'{social_code}==={com_code}===企业整体耗时{baseCore.getTimeCost(start_time,time.time())}')
+    # except:
+    #     log.info(f'==={social_code}=====获取企业信息失败====')
+    #     #重新塞入redis
+    #     baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode',social_code)
+    #     state = 0
+    #     takeTime = baseCore.getTimeCost(start, time.time())
+    #     baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+    #     time.sleep(5)
+if __name__ == "__main__":
+    doJob()
--- a/comData/weixin_solo/oneWeixin2.py
+++ b/comData/weixin_solo/oneWeixin2.py
@@ -33,7 +33,7 @@ def updatewxLink(link,info_source_code,state):
 def getjsonInfo():
    #从数据库中获取信息 一条
-    select_sql = "select * from wx_link  where state=100 order by id asc limit 1"
+    select_sql = "select * from wx_link  where state=0 order by id asc limit 1"
    cursor_.execute(select_sql)
    row = cursor_.fetchone()
    cnx_.commit()

--- a/pyWenShu/.gitignore
+++ b/pyWenShu/.gitignore
+# created by virtualenv automatically
+*
--- a/pyWenShu/App.py
+++ b/pyWenShu/App.py
+import gc
+from flask import Flask, render_template, request, current_app
+import configparser
+from controller.Main import Main  # 导入全部蓝图变量
+import datetime
+from apscheduler.schedulers.blocking import BlockingScheduler
+from datetime import datetime
+from dao.Conn import ConnMySql
+import sys
+import io
+# 清除登录状态
+def clearLoginStateIn24H():
+    conn = ConnMySql()
+    conn.userClearLoginStateIn24H()
+    print("清除登录状态-" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+app = Flask(__name__)  # 初始化Flask对象
+app.register_blueprint(Main)  # 将所有蓝图对象注册到app这个flask对象内
+# 上传文件最大16M字节
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+# App配置信息，键=段名+键名，如：db.port=3306
+cfg = configparser.ConfigParser()
+cfg.optionxform = str  # 保持配置文件中键的大小写
+cfg.read("static/conf/sys.ini", encoding='utf-8')
+sections = cfg.sections()
+for section in sections:
+    items = cfg.items(section)
+    for key, val in items:
+        app.config[section + '.' + key] = val
+# 个别取值进行特殊处理
+app.config['db.port'] = int(app.config['db.port'])
+if app.config['sys.useProxy'] == "0":
+    app.config['sys.useProxy'] = False
+else:
+    app.config['sys.useProxy'] = True
+app.config['sys.proxyid'] = 0 #当前使用的代理id
+app.config['sys.userid'] = 0 #当前使用的账号id
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+if __name__ == '__main__':
+    # webbrowser.open("0.0.0.0:5000")
+    app.run(host='0.0.0.0', port=5201, debug=True)  # 启动入口
+    # 启动定时任务，定时清除异常登录状态，每半小时一次
+    # sched = BlockingScheduler()
+    # sched.add_job(clearLoginStateIn24H, 'interval', seconds=1800, id='task-clearLoginStateIn24H')
+    # sched.start()
--- a/pyWenShu/controller/Main.py
+++ b/pyWenShu/controller/Main.py
+import gc
+from flask import Blueprint, request, current_app, make_response, send_file  # 导入蓝图
+import datetime
+import re
+import os
+import logging
+import sys
+import io
+import tempfile
+import openpyxl
+import string
+import json
+from selenium.webdriver.common.proxy import Proxy, ProxyType
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from util import UtilDate
+from service.Service02 import Service02
+Main = Blueprint('Main', __name__)  # 初始化一个蓝图，而不是Flask对象
+# 接受请求，读取请求体中的JSON参数，根据参数进行抓取动作
+# {"from":"1900-01-01","last":最近x天数, "orgs":["单位1全称","单位2全称","单位3全称",...]}
+@Main.route('/Main/getData', methods=["POST"])
+def getData():
+    print("POST /Main/getData")
+    paras = request.get_json(force=True)
+    dateFrom = paras['from']
+    lastDays = paras['last']
+    orgs = paras['orgs']
+    if dateFrom == "":
+        if lastDays == "":
+            lastDays = 0
+        else:
+            lastDays = -(int(lastDays) - 1)
+        dateFrom = UtilDate.dateAdd("", "d", lastDays)
+    service02 = Service02()
+    return service02.getData(dateFrom, orgs) #"https://wenshu.court.gov.cn/website/wenshu/181029CR4M5A62CH/index.html"
--- a/pyWenShu/controller/__init__.py
+++ b/pyWenShu/controller/__init__.py
--- a/pyWenShu/dao/Conn.py
+++ b/pyWenShu/dao/Conn.py
+import json
+import configparser
+# import mysql.connector
+from flask import current_app
+import pymysql
+from pymssql import Cursor
+from vo.ProxyInfo import ProxyInfo
+from vo.LoginInfo import LoginInfo
+class Conn(object):
+    def __init__(self, conn):
+        self._conn: pymysql.Connect = conn
+    def close(self) -> None:
+        '''
+        关闭游标对象和连接对象
+        :param:NULL
+        :return:None
+        '''
+        if self._conn is not None:
+            self._conn.close()
+    def genDict(self,oCursor: Cursor) -> {}:
+        ret={}
+        try:
+            i = -1
+            for field in oCursor.description:
+                i = i + 1
+                ret[field[0]] = i
+        except Exception as err:
+            print('error:', err)
+        return ret
+    # 获取所有proxy
+    def proxyGetAll(self) -> Cursor:
+        '''
+        执行SQL语句
+        :param sqlstring: Sql语句
+        :return: 返回结果
+        '''
+        cursor: Cursor = None
+        try:
+            sql = "SELECT id, proxy FROM caiji.clb_proxy"
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+        except Exception as err:
+            print('error:', err)
+        return cursor
+    # 获取下一个proxy
+    def proxyGetNext(self, id: int)->ProxyInfo:
+        '''
+        执行SQL语句
+        :param sqlstring: Sql语句
+        :return: 返回结果
+        '''
+        ret: ProxyInfo = None
+        sql = "SELECT id, proxy FROM caiji.clb_proxy where id>" + str(id) + " order by id asc limit 1"
+        try:
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+            results = cursor.fetchall()
+            if cursor.rowcount>0:
+                ret = ProxyInfo()
+                fields = self.genDict(cursor)
+                for row in results:
+                    ret.id = row[fields["id"]]
+                    sProxy = row[fields["proxy"]]
+                    proxyInfos = sProxy.split('-')
+                    ret.ip = proxyInfos[0]
+                    ret.port = proxyInfos[1]
+                    ret.user_name = proxyInfos[2]
+                    ret.user_passwd = proxyInfos[3]
+            cursor.close()
+        except Exception as err:
+            print('error:', err)
+        return ret
+    # 获取一个空闲账号
+    def userGetFree(self, userGroup: str, id: int) -> LoginInfo:
+        """
+        执行SQL语句
+        :param userGroup:
+        :return: 返回结果
+        """
+        ret: LoginInfo = None
+        sql = f"SELECT * FROM caiji.login_info where user_group='{userGroup}' and id > {id} and login_time is null order by id asc limit 1"
+        try:
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+            results = cursor.fetchall()
+            if cursor.rowcount>0:
+                ret = LoginInfo()
+                fields = self.genDict(cursor)
+                for row in results:
+                    ret.id = row[fields["id"]]
+                    ret.user_group = row[fields["user_group"]]
+                    ret.user_name = row[fields["user_name"]]
+                    ret.user_passwd = row[fields["user_passwd"]]
+            cursor.close()
+        except Exception as err:
+            print('error:', err)
+        return ret
+    # 清除24小时未主动退出(异常退出)的用户的登录状态，
+    def userClearLoginStateIn24H(self):
+        '''
+        执行SQL语句
+        :param sqlstring: Sql语句
+        :return: 返回结果
+        '''
+        sql = "update caiji.login_info set login_time=null where TIME_TO_SEC(TIMEDIFF(now(), login_time))>86400"
+        try:
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+            self._conn.commit()
+        except Exception as err:
+            print('error:', err)
+    # 主动退出登录状态，退出后下次可继续使用，可能需要满足一定的条件
+    def userSetLoginStateByID(self, id: int):
+        '''
+        执行SQL语句
+        :param id:
+        :return: 返回结果
+        '''
+        sql = "update caiji.login_info set login_time=now() where id=" + str(id)
+        try:
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+            self._conn.commit()
+        except Exception as err:
+            print('error:', err)
+    # 主动退出登录状态，退出后下次可继续使用，可能需要满足一定的条件
+    def userClearLoginStateByID(self, id: int):
+        '''
+        执行SQL语句
+        :param id:
+        :return: 返回结果
+        '''
+        sql = "update caiji.login_info set login_time=null where id=" + str(id)
+        try:
+            cursor = self._conn.cursor()
+            cursor.execute(sql)
+            self._conn.commit()
+        except Exception as err:
+            print('error:', err)
+    def doSelectByColumns(self, tbname: str, *columns: str) -> list:
+        '''
+        通过列名进行Select查询
+        :param tbname: 表名
+        :param columns: 需要查询的列名
+        :return: 查询结果
+        '''
+        col = str(columns).replace("[", "").replace("]", "").replace("'", "").replace("(", "").replace(")", "")
+        sqlstring = f"select {col} from {tbname} "
+        if len(columns) == 0: sqlstring = f"select *from {tbname}"
+        self._cursor.execute(sqlstring)
+        strjson = self.transToJson(self._cursor)
+        return strjson
+    def doSelectWhere(self, tbname: str, where: str) -> list:
+        '''
+        通过where子句表达式进行Select查询
+        :param tbname: 表名
+        :param expression:where子句
+        :return: 查询结果
+        '''
+        sqlstring = f"select *from {tbname} where {where}"
+        self._cursor.execute(sqlstring)
+        strjson = self.transToJson(self._cursor)
+        return strjson
+    def doInsertRecord(self, tbname: str, *values) -> None:
+        '''
+        通过全部字段值新增数据到表
+        :param tbname: 表名
+        :param values: 所有字段的值
+        :return: None
+        '''
+        vls = str(values).replace("[", "").replace("]", "")
+        sqlstring = f"insert into {tbname} values {vls}"
+        print(sqlstring)
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+    def doInsertByKV(self, tbname: str, **keyvalues) -> None:
+        '''
+        通过 字段名=值 的键值对新增记录
+        :param tbname: 表名
+        :param keyvalues: 字段名=值的字典
+        :return: None
+        '''
+        keys = str(keyvalues.keys()).replace("dict_keys", "").replace("'", "").replace("[", "").replace("]", "")
+        values = str(keyvalues.values()).replace("dict_keys", "").replace("[", "").replace("]", "")
+        sqlstring = f"insert into {tbname} {keys} values {values}"
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+    def doDeleteByKV(self, tbname: str, **keyvalues) -> None:
+        '''
+        通过 字段名=值 的方式查找到对于键值对并删除
+        :param tbname: 表名
+        :param keyvalues: 键值对
+        :return: None
+        '''
+        keys = list(keyvalues.keys())
+        values = list(keyvalues.values())
+        pairs = []
+        for i in range(len(keys)):
+            pairs.append(f"{keys[i]}={values[i]}")
+            pairs.append("and")  # 使用and连接词
+        del pairs[len(pairs) - 1]  # 删除最后一个and连接词
+        pairs = str(pairs).replace("[", "").replace("]", "").replace("'", "").replace(",", "")
+        sqlstring = f"delete from {tbname} where {pairs}"
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+    def doDeleteWhere(self, tbname: str, where: str) -> None:
+        '''
+        通过where表达式进行查询并删除
+        :param tbname: 表名
+        :param expression:表达式
+        :return: None
+        '''
+        sqlstring = f"delete from {tbname} where {where}"
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+    def doUpdateKV(self, tbname: str, expression: str, **keyvalues) -> None:
+        '''
+        通过expression表达式找到数据后对 字段名=值 进行修改
+        :param tbname: 表名
+        :param expression:where表达式
+        :param keyvalues: 修改的字段名=值对
+        :return: None
+        '''
+        keys = list(keyvalues.keys())
+        values = list(keyvalues.values())
+        keypairs = []
+        for i in range(len(keys)):
+            temp = f"{keys[i]}=\"{(values[i])}\""
+            keypairs.append(temp)
+        keypairs = str(keypairs).replace("[", "").replace("]", "").replace("'", "")
+        sqlstring = f"update {tbname} set {keypairs} where {expression}"
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+    # 执行返回单值的操作，适用于返回行计数等
+    def selectCount(self, sqlstring):
+        cnt = self._cursor.execute_scalar(sqlstring)
+        return cnt
+    # 获取标题，以及标题类型字典
+    def MSSQL_GetTitleDict(self, cursor):
+        titleDict = {}
+        for rows in cursor.get_header():
+            titleDict[rows[0]] = rows[1]
+        # 如果调用conn完成后千万记得，要吧连接关闭。
+        return titleDict
+    def createtable(self, tbname: str, *args: list) -> None:
+        '''
+        通过List创建新表格
+        比如createtable("TB_TestTbale",["ID","nchar(10)"],["Password","nchar(20)","NOT NULL"])
+        每个字段用一个list表示 顺序为 [字段名,类型名,*约束，*其他]
+        :param tbname: 表格名称
+        :param args: 参数
+        :return:None
+        '''
+        data = []
+        for i in range(len(args)):
+            temp = str(args[i]).replace("[", "").replace("]", "").replace("'", "").replace(",", "")
+            data.append(temp)
+        data = str(data).replace("[", "(").replace("]", ")").replace("'", "")
+        sqlstring = f"create table {tbname} {data}"
+        self._cursor.execute(sqlstring)
+        self._conn.commit()
+class ConnMySql(Conn):
+    def __init__(self):
+        oConn: pymysql.Connect = None
+        try:
+            oConn = pymysql.Connect(
+                host=current_app.config["db.host"],
+                user=current_app.config["db.user"],
+                passwd=current_app.config["db.passwd"],
+                db=current_app.config["db.db"],
+                port=int(current_app.config["db.port"]),
+                charset=current_app.config["db.charset"]
+            )
+        except Exception as err:
+            print('error:', err)
+        Conn.__init__(self, oConn)
+class MySqlTemp(Conn):
+    def __init__(self):
+        oConn: pymysql.Connect = pymysql.Connect(
+            host="114.115.159.144",
+            user="caiji",
+            passwd="zzsn9988",
+            db="caiji",
+            port=3306,
+            charset="utf8"
+        )
+        Conn.__init__(self, oConn)
+# 测试
+# conn=MySqlTemp()
+# o=conn.userGetFree("wenshu")
+# print (o.user_name)
+# for row in results:
+#     id = row[0]
+#     proxy = row[1]
+#     print(id, proxy)
+#     proxyInfos=proxy.split('-')
+#     for i in range(0,4): #proxyInfos:
+#         print("----",proxyInfos[i])
--- a/pyWenShu/dao/ProxyDao.py
+++ b/pyWenShu/dao/ProxyDao.py
+class ProxyDao():
+    def t(self):
+        pass
--- a/pyWenShu/dao/__init__.py
+++ b/pyWenShu/dao/__init__.py
--- a/pyWenShu/entity/BaseInfo.py
+++ b/pyWenShu/entity/BaseInfo.py
+# 基本信息
+from util import UtilDate
+from util import UtilNumber
+class BaseInfo:
+    info_title: str  # 标题
+    key_word: str  # 关键词
+    info_bianhao: str  # 案号
+    info_address: str  # 管辖法院
+    info_time: str  # 发布日期 #yyyy-mm-dd
+    info_id: str  # 案件ID
+    info_yuanyou: str  # 裁判理由
+    info_content: str  # 正文内容
+    # 判断本条信息日期是否在指定日期（含）之后
+    def isAfter(self, sDate: str) -> bool:
+        if sDate == "":
+            return False
+        else:
+            if self.info_time >= sDate:
+                return True
+            else:
+                return False
+    def toString(self):
+        return self.info_title + "\t" + self.key_word + "\t" + self.info_bianhao + "\t" + self.info_address + "\t" + self.info_time + "\t" + self.info_id
--- a/pyWenShu/entity/__init__.py
+++ b/pyWenShu/entity/__init__.py
--- a/pyWenShu/pyvenv.cfg
+++ b/pyWenShu/pyvenv.cfg
+home = C:\Program Files\Python
+implementation = CPython
+version_info = 3.8.0.final.0
+virtualenv = 20.13.0
+include-system-site-packages = true
+base-prefix = C:\Program Files\Python
+base-exec-prefix = C:\Program Files\Python
+base-executable = C:\Program Files\Python\python.exe
--- a/pyWenShu/service/Service02.py
+++ b/pyWenShu/service/Service02.py
+# 裁判文书抓取
+from datetime import datetime, timedelta
+import json
+import time
+from flask import current_app as app
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.proxy import Proxy, ProxyType
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from typing import List
+import io
+import sys
+from dao.Conn import ConnMySql
+from util import UtilBrowser
+from util import UtilCaptcha
+from entity.BaseInfo import BaseInfo
+from selenium.webdriver.chrome.webdriver import WebDriver
+from selenium.webdriver.remote.webelement import WebElement
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import jsonpickle
+from util.UtilCaptcha import getCaptchaMode1
+from vo.LoginInfo import LoginInfo
+class Service02:
+    browser: WebDriver
+    url = ""
+    dateFrom = ""
+    loginInfo: LoginInfo
+    #在浏览器的命令行直接指定打开的url时，chrome会保留原来的默认打开的标签页，此时有2个标签页。通过driver.get打开网页，则直接在默认的标签页打开，此时有1个标签页
+    tab1=1 # 主页、列表页；driver.get打开时为0
+    tab2=2 # 裁判文书网页；driver.get打开时为1
+    baseInfo = []
+    nRetry = 100  # 重试次数，暂未使用
+    lstRet = []
+    # 主过程
+    def getData(self, sDateFrom: str, orgs: List[str]):
+        # 循环抓取数据，直到指定日期前的数据都抓取完成，基本信息总是抓取。
+        self.dateFrom = sDateFrom
+        # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+        print("getData...",flush=True)
+        for org in orgs:
+            ok1 = 0
+            print(org, flush=True)
+            # 循环采集一个单位的数据，直到全部完成
+            while True:
+                # 打开浏览器，查找单位并切换到单位详情，失败则更换代理IP后重来
+                while True:
+                    if self.openBrowser(org):
+                        break
+                    else:
+                        self.quitBrowser()
+                if ok1 == 0:
+                    # 采集未完成，继续采集
+                    ok1 = self.getData1()
+                if ok1 == 1:
+                    # 采集已完成，退出并继续下一单位
+                    break
+                conn = ConnMySql()
+                if self.loginInfo is not None:
+                    conn.userClearLoginStateByID(self.loginInfo.id)
+                conn.close()
+            # 保存数据到缓冲区
+            o = {"org": org, "baseInfo": self.baseInfo}
+            self.lstRet.append(o)
+        # 全部单位数据采集完成，退出并返回数据给调用者
+        retData = jsonpickle.encode(self.lstRet, unpicklable=False)
+        print(json.loads(retData))
+        return retData
+    # 打开浏览器，查找单位并转到单位信息页面
+    def openBrowser(self, org: str) -> bool:
+        ret = False
+        print("openBrowser...", flush=True)
+        conn = ConnMySql()
+        self.loginInfo = conn.userGetFree("wenshu", app.config['sys.userid'])
+        if self.loginInfo is None:
+            app.config['sys.userid'] = 0
+            self.loginInfo = conn.userGetFree("wenshu", app.config['sys.userid'])
+        app.config['sys.userid'] = self.loginInfo.id
+        #conn.userSetLoginStateByID(self.loginInfo.id)
+        conn.close()
+        self.browser = UtilBrowser.newChrome(app.config['sys.mainUrl'], False , app.config['sys.useProxy'])
+        #打开浏览器及裁判文书主页后，删除可能存在的多余的窗口页签。暂未使用
+        # if len(self.browser.window_handles)>1:
+        #     self.browser.switch_to.window(self.browser.window_handles[0])
+        #     self.browser.close()
+        #     self.browser.switch_to.window(self.browser.window_handles[0])
+        loginMode = app.config['sys.loginMode']
+        # 登录，后续放到单独的过程中统一处理
+        if loginMode == "0":
+            pass
+        elif loginMode == "1":
+            # 需要账号登录，账号存在数据库中，轮换使用
+            objLogin = UtilBrowser.waitElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginButton'], 30, 1)
+            if objLogin != None:
+                objLogin.click()
+                #可能随机出现独立的图形验证码输入界面，4个大小写字母和数字组成
+                nTry = 0
+                hasPass = False
+                objLoginCaptchaButton0 = UtilBrowser.waitElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginCaptchaButton'], 10, 1)
+                if objLoginCaptchaButton0 is not None:
+                    # 尝试5次，若验证通过，则继续，否则退出，切换账号重新尝试
+                    while nTry < 10:
+                        nTry = nTry + 1
+                        objLoginCaptchaButton = UtilBrowser.waitElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginCaptchaButton'], 10, 1)
+                        # 获取验证码图片Url
+                        # url = self.browser.find_element(By.CSS_SELECTOR, app.config['sys.loginCaptchaImage']).get_attribute('src')
+                        strCaptcha = UtilCaptcha.getCaptchaMode1(self.browser, app.config['sys.loginCaptchaImage'])
+                        self.browser.find_element(By.CSS_SELECTOR, app.config['sys.loginCaptchaInput']).send_keys(strCaptcha)
+                        objLoginCaptchaButton.click()
+                        time.sleep(1)
+                        objLoginCaptchaButton = UtilBrowser.waitElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginCaptchaButton'], 10, 1)
+                        if objLoginCaptchaButton is None:
+                            hasPass = True
+                            break
+                if hasPass:
+                    nTry = 0
+                else:
+                    nTry = 4
+                iframe = None  # 登录信息
+                while nTry < 3:
+                    iframe = UtilBrowser.waitElement(self.browser, By.CSS_SELECTOR, "#contentIframe", 20, 1)
+                    # WebDriverWait(driver=driver, timeout=20, poll_frequency=1, ignored_exceptions=None).until(expected_conditions.presence_of_element_located((By.ID,'contentIframe')))
+                    if iframe is None:
+                        self.browser.refresh()
+                    else:
+                        break
+                    nTry = nTry + 1
+                if iframe is not None:
+                    self.browser.switch_to.frame(iframe)
+                    objUser = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginUser'])
+                    if objUser is not None:
+                        objUser.send_keys(self.loginInfo.user_name)
+                        objPass = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginPasswd'])
+                        objPass.send_keys(self.loginInfo.user_passwd)
+                        objLogin = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['sys.loginOk'])
+                        # 登录时需要短信验证码，暂未处理
+                        if app.config['sys.loginSMSCode'] != "":
+                            objSMS = UtilBrowser.hasElement(self.browser, By.CSS_SELECTOR,
+                                                            app.config['sys.loginSMSCode'])
+                            if objSMS is not None:
+                                self.browser.find_element(By.CSS_SELECTOR, app.config['sys.loginSMSCode']).send_keys("")
+                        objLogin.click()
+                        time.sleep(5)
+                        self.browser.refresh()
+                        #ret = True  # 此处应确认出现特定元素后才返回True
+            # 若填写列口令则自动登录，否则等待人工登录
+            # self.browser.find_element(By.CSS_SELECTOR, app.config['sys.loginButton']).click()
+        elif loginMode == "2":
+            # cookie登录，暂未处理
+            pass
+        # self.browser.get(app.config['sys.mainUrl'])
+        # 搜索框填写单位名称并单击提交
+        objSearchInput = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['css.searchInput'])
+        #登录失败则页面不会有搜索框
+        if objSearchInput is None:
+            ret = False
+        else:
+            objSearchInput.send_keys(org)
+            time.sleep(2)
+            # 模拟单击搜索按钮
+            objSearchButton = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['css.searchButton'])
+            objSearchButton.click()
+            time.sleep(5)
+            self.browser.refresh()
+            # 设置为按日期倒排序
+            objDateSort = UtilBrowser.getElement(self.browser, By.CSS_SELECTOR, app.config['css.listDateSort'])
+            if objDateSort is not None:
+                objDateSort.click()
+            time.sleep(5)
+            ret = True
+        return ret
+    # 退出浏览器
+    def quitBrowser(self):
+        try:
+            self.browser.quit()
+            self.browser = None
+        except:
+            pass
+    # 裁判文书信息，重新查找某单位的文书时，和上次打开浏览器相比可能增加了新的裁判文书，所以每次都从首页开始，无需从上次中断的页开始
+    def getData1(self) -> int:
+        ret = 0
+        print("getData1...", flush=True)
+        # 以下在列表页
+        selector_title = app.config['css.listTitle']  # "#_view_1545184311000 > div:nth-child(?) > div.list_title.clearfix > h4 > a"
+        # 以下在详情页，需打开裁判文书，裁判文书在新页签打开
+        baseInfo1: BaseInfo
+        # 裁判文书数量
+        s = self.getAttr(By.CSS_SELECTOR, app.config['css.listCount'], "textContent")
+        n = self.toInt(s)
+        if n == 0:
+            # 若无数据则退出，且不在继续采集本类数据
+            return 1
+        pageNo = 0
+        while True:
+            # 循环采集多页数据
+            pageNo = pageNo + 1
+            for i in range(1, 6):  # 每页5条数据
+                print(f"----文书数量：{n}，每页文书个数：5，当前页号：{pageNo}，当前序号：{i}", flush=True)
+                # 不满一页时遇到不存在的行则退出
+                # 文书列表从nth-child(3)开始
+                if not UtilBrowser.hasElement(self.browser, By.CSS_SELECTOR,
+                                              selector_title.replace("?", str(i + 2), 1)):
+                    break
+                baseInfo1 = BaseInfo()
+                baseInfo1.info_title = self.getAttr(By.CSS_SELECTOR, selector_title.replace("?", str(i + 2), 1), "textContent")
+                baseInfo1.info_bianhao = self.getAttr(By.CSS_SELECTOR,
+                                                      app.config['css.listBianhao'].replace("?", str(i + 2), 1),
+                                                      "textContent")
+                baseInfo1.info_address = self.getAttr(By.CSS_SELECTOR,
+                                                      app.config['css.listAddress'].replace("?", str(i + 2), 1),
+                                                      "textContent")
+                baseInfo1.info_time = self.getAttr(By.CSS_SELECTOR,
+                                                   app.config['css.listTime'].replace("?", str(i + 2), 1),
+                                                   "textContent")
+                baseInfo1.info_yuanyou = self.getAttr(By.CSS_SELECTOR,
+                                                      app.config['css.listYuanyou'].replace("?", str(i + 2), 1),
+                                                      "textContent")
+                # https://wenshu.court.gov.cn/website/wenshu/181107ANFZ0BXSK4/index.html?docId=OUD3Tm7EvEQVkiexnBa5S3nnG9zDkQyxiWoR8jr7QJJtFc9Y6vX89Z/dgBYosE2gstL9HQn+C934OzwMvqVgk+DtAz+qRVZWr9dI7ybeiFnaPaFBceYmelTK0+qydxfd
+                link = self.getAttr(By.CSS_SELECTOR, selector_title.replace("?", str(i + 2), 1),
+                                                           "href")
+                pos = link.index("=") + 1
+                baseInfo1.info_id = link[pos:]
+                if baseInfo1.isAfter(self.dateFrom):
+                    # 当前数据条目在指定日期之后，则如果缓冲区没有的话追加到缓冲区，已经存在的，则忽略
+                    exist = False
+                    for e in self.baseInfo:
+                        # 法律文书可抓取到原始的文书ID，可直接用ID查找是否已经抓取
+                        if e.info_id == baseInfo1.info_id:  # e.toString == baseInfo1.toString()
+                            exist = True
+                            break
+                    if exist == False:
+                        # 获取裁判文书正文，未抓取的才抓取正文
+                        # 模拟单击正文链接
+                        self.browser.find_element(By.CSS_SELECTOR, selector_title.replace("?", str(i + 2), 1)).click()
+                        # 单击后会自动在新标签页打开正文链接，并且为活动页签
+                        t1 = datetime.now()
+                        while True:
+                            if len(self.browser.window_handles) > self.tab2:
+                                break
+                            t2 = datetime.now()
+                            if (t2 - t1).seconds > 60:
+                                break
+                            time.sleep(1)
+                        if len(self.browser.window_handles) > self.tab2:
+                            self.browser.switch_to.window(self.browser.window_handles[self.tab2])
+                            baseInfo1.info_content = self.getAttr(By.CSS_SELECTOR, app.config['css.contContent'],
+                                                                  "textContent")
+                            time.sleep(5)
+                            # 关闭文书正文页签，回到文书列表页签
+                            self.browser.close()
+                            self.browser.switch_to.window(self.browser.window_handles[self.tab1])
+                            print("--------当前文书长度：", len(baseInfo1.info_content), flush=True)
+                            self.baseInfo.append(baseInfo1)
+                else:
+                    # 当前数据条目在指定日期之后
+                    ret = 1
+                    break
+            # 如果有下一页，则继续，否则数据采集完成
+            if self.getAttr(By.CSS_SELECTOR, app.config['css.listNextPage'], "class").find("disabled"):
+                # 存在disabled则无下一页
+                ret = 1
+            else:
+                self.browser.find_element(By.CSS_SELECTOR, app.config['css.listNextPage']).click()
+            time.sleep(5)
+            # 出现验证码窗口或IP锁定界面则退出重新切换IP采集
+            if self.hasCaptcha() or self.hasBlock():
+                break
+            if ret == 1:
+                break
+        return ret
+    def toInt(self, s) -> int:
+        ret = 0
+        try:
+            ret = int(s)
+        except:
+            pass
+        return ret
+    # 返回页面元素指定属性的值，如class
+    def getAttr(self, by: str, selector: str, attr: str) -> str:
+        ret = ""
+        try:
+            if attr == "text":
+                ret = self.browser.find_element(by, selector).text
+            else:
+                ret = self.browser.find_element(by, selector).get_attribute(attr)
+        except:
+            pass
+        return ret
+    # 判断是否出现了验证码。
+    def hasCaptcha(self) -> bool:
+        ret = False
+        # we: WebElement  #
+        # wes = self.browser.find_elements(By.TAG_NAME, "div")
+        # for we in wes:
+        #     if we.get_attribute("class").find("geetest_box"):  #
+        #         if we.get_attribute("style").find("display: block;"):
+        #             ret = True
+        return ret
+    # 判断是否出现了IP锁定。
+    def hasBlock(self) -> bool:
+        ret = False
+        # if self.getAttr(By.CSS_SELECTOR, "body > div > p", "text").find("夹带攻击行为"):  # p.prom 您的地址（1.2.3.4）访问疑似夹带攻击行为，请稍后重试，或注册/登录
+        #     print("*******夹带攻击行为*******")
+        #     ret = True
+        return ret
--- a/pyWenShu/service/__init__.py
+++ b/pyWenShu/service/__init__.py
--- a/pyWenShu/static/conf/sys.ini
+++ b/pyWenShu/static/conf/sys.ini
+#系统配置
+[sys]
+#文字识别Url，用于识别裁判文书网的验证码
+ocrUrl=http://114.116.49.86:8013/wzsb_app?withCrLf=false
+#登录模式，0-无需登录，1-账号登录(需要口令、短信、验证码相应的选择器不能为空)，2-cookie登录
+loginMode=1
+#是否使用代理，0-不用，1-使用，需登录的一般不适用代理
+useProxy=1
+#验证码识别，0-不识别，1-识别，暂采用固定的方法识别验证码，后续扩展为不同的识别模式
+verifiCode=0
+#登录Url ?open=login
+loginUrl=https://wenshu.court.gov.cn/website/wenshu/181010CARHS5BS3C/index.html
+#正常Url，登录后可能会自动跳转到正常Url
+mainUrl=https://wenshu.court.gov.cn
+#登录-用户
+loginUser=#root > div > form > div > div:nth-child(1) > div > div > div > input
+#登录-口令
+loginPasswd=#root > div > form > div > div:nth-child(2) > div > div > div > input
+#裁判文书网的图形验证码在单独的页面，输入正确后返回到登录界面
+#登录-图形验证码输入框，不为空时则需要识别验证码
+loginCaptchaInput=body > div > div.card-body > div > form > div.captcha > input
+#登录-图形验证码图片
+loginCaptchaImage=#Image1
+#登录-图形验证码确认按钮
+loginCaptchaButton=body > div > div.card-body > div > form > div.warnbtn > input
+#登录-短信验证码，可能和图形验证码同时需要，暂未处理
+loginSMSCode=
+#主界面登录按钮
+loginButton=#loginLi > a
+#登录界面确认登录按钮
+loginOk=#root > div > form > div > div.login-button-container > span
+#数据库配置
+[db]
+host=114.115.159.144
+port=3306
+user=caiji
+passwd=zzsn9988
+db=caiji
+charset=utf8
+#css选择器配置
+[css]
+#搜索-文本框
+searchInput=#_view_1540966814000 > div > div.search-wrapper.clearfix > div.search-middle > input
+#搜索-按钮
+searchButton=#_view_1540966814000 > div > div.search-wrapper.clearfix > div.search-rightBtn.search-click
+#列表-日期倒排按钮
+listDateSort=#_view_1545184311000 > div.LM_tool.clearfix > div:nth-child(2) > a
+#列表-案件数量
+listCount=#_view_1545184311000 > div.LM_con.clearfix > div.fr.con_right > span
+#列表-案件名称
+listTitle=#_view_1545184311000 > div:nth-child(?) > div.list_title.clearfix > h4 > a
+#列表-编号
+listBianhao=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.ah
+#列表-法院
+listAddress=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.slfyName
+#列表-审结日期
+listTime=#_view_1545184311000 > div:nth-child(?) > div.list_subtitle > span.cprq
+#列表-案由
+listYuanyou=#_view_1545184311000 > div:nth-child(?) > div.list_reason > p
+#下一页按钮
+listNextPage=#_view_1545184311000 > div.left_7_3 > a:last-child
+#正文-链接，一般和title相同
+contLink=#_view_1545184311000 > div:nth-child(?) > div.list_title.clearfix > h4 > a
+#正文-正文
+contContent=#_view_1541573883000 > div > div.PDF_box > div.PDF_pox
--- a/pyWenShu/util/UtilBrowser.py
+++ b/pyWenShu/util/UtilBrowser.py
+from flask import current_app
+from datetime import datetime, timedelta
+import time
+import random
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.proxy import Proxy, ProxyType
+from selenium.webdriver.chrome.options import Options
+from msedge.selenium_tools import EdgeOptions
+from msedge.selenium_tools import Edge
+from selenium.webdriver.chrome.webdriver import DesiredCapabilities
+from selenium.webdriver.chrome.service import Service as ChromeService
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.chrome.webdriver import WebDriver
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions
+from selenium.webdriver.remote.webelement import WebElement
+import seleniumwire.undetected_chromedriver.v2
+from dao.Conn import ConnMySql
+from vo.ProxyInfo import ProxyInfo
+# 等待元素出现，timeout-等待时长，frequency等待期间检查频率，均为秒
+def waitElement(browser: WebDriver, by: str, selecter: str, timeout: int = 20, frequency: int = 1) -> WebElement:
+    ret = None
+    t1 = datetime.now()
+    while (datetime.now() - t1).seconds < timeout:
+        if hasElement(browser, by, selecter):
+            ret = getElement(browser, by, selecter)
+            break
+        time.sleep(frequency)
+    return ret
+# 获取元素，无则返回None
+def getElement(browser: WebDriver, by: str, selecter: str) -> WebElement:
+    ret = None
+    try:
+        ret = browser.find_element(by, selecter)
+    except:
+        pass
+    return ret
+# 判断元素是否存在。
+def hasElement(browser: WebDriver, by: str, selecter: str) -> bool:
+    ret = True
+    try:
+        browser.find_element(by, selecter)
+    except:
+        ret = False
+    return ret
+# 返回页面元素指定属性的值，如class，若未找到元素，则返回空串
+def getAttr(brow: webdriver, by: str, selector: str, attr: str) -> str:
+    ret = ""
+    try:
+        s = brow.find_element(str, selector).get_attribute(attr)  #
+    except:
+        pass
+    return ret
+# 打开Edge浏览器
+def newEdge(useProxy):
+    edge_options = EdgeOptions()
+    edge_options.use_chromium = True
+    edge_options.add_argument('--disable-blink-features=AutomationControlled')
+    driver = webdriver.Edge(options=edge_options)
+    # driver.get('https://bing.com')
+    # element = driver.find_element(By.ID, 'sb_form_q')
+    # element.send_keys('WebDriver')
+    # element.submit()
+    return driver
+# 打开Chrome浏览器
+# url：浏览器启动时需要打开的url
+# useProxy：使用使用代理，True-使用
+# cookie：需要设置的cookie信息，如登录信息。
+def newChrome(url: str = "", debugMode: bool = False, useProxy: bool = False, cookie: str = "") -> WebDriver:
+    # 禁止浏览器自动关闭
+    option = webdriver.ChromeOptions()
+    if debugMode == False:
+        option.add_experimental_option("detach", True)
+        option.add_experimental_option('excludeSwitches',['enable-automation'])  # 去掉web自动化，window.navigator.webdriver=undefined
+        option.add_experimental_option('useAutomationExtension', False)  # 取消chrome受自动控制提示
+    option.add_argument("--disable-blink-features=AutomationControlled")
+    option.add_argument('disable-infobars')  # 不显示Chrome正在受自动软件控制
+    option.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
+    option.add_argument('--ignore-certificate-errors')
+    # option.add_argument("--user-data-dir=C:/Users/Administrator/AppData/Local/Google/Chrome/User Data/Default");
+    # option.add_argument("--test-type=allow-running-insecure-content");
+    # option.add_argument('--headless')  # 后台运行Chrome
+    # 随机设置代理IP，分为http代理和socks5代理。useProxy=False时，需要清除原来设置的代理
+    isHttpProxy = False
+    seleniumwire_options = {}
+    proxy = None
+    if useProxy:
+        # 读取一个IP代理，如果已经到最后一个，则重新从0开始
+        conn = ConnMySql()
+        proxyInfo: ProxyInfo
+        proxyInfo = conn.proxyGetNext(current_app.config['sys.proxyid'])
+        print("proxy_id："+str(current_app.config['sys.proxyid'])+"，"+proxyInfo.ip,flush=True)
+        if proxyInfo is None:
+            current_app.config['sys.proxyid'] = 0
+            proxyInfo = conn.proxyGetNext(current_app.config['sys.proxyid'])
+        current_app.config['sys.proxyid'] = proxyInfo.id
+        desired_capabilities = webdriver.DesiredCapabilities.CHROME.copy()
+        sProxy = ""
+        if proxyInfo.user_name == "":
+            # option.add_argument(f'--proxy-server=http://{proxy_ip}:{proxy_port}')
+            # sProxy = f'--proxy-server=http://{proxy_ip}:{proxy_port}'
+            sProxy = f'http://{proxyInfo.ip}:{proxyInfo.port}'
+        else:
+            # option.add_argument(f'--proxy-server=http://{proxy_username}:{proxy_password}@{proxy_ip}:{proxy_port}')
+            # sProxy = f'--proxy-server=http://{proxy_username}:{proxy_password}@{proxy_ip}:{proxy_port}'
+            sProxy = f'http://{proxyInfo.user_name}:{proxyInfo.user_passwd}@{proxyInfo.ip}:{proxyInfo.port}'
+        webdriver.DesiredCapabilities.CHROME['proxy'] = {
+            "httpProxy": sProxy,
+            "sslProxy": sProxy,
+            "proxyType": "manual"
+        }
+        conn.close()
+    # 随机设置UserAgent
+    userAgent = getUserAgent()
+    # option.add_argument('user-agent=%s' %userAgent)
+    if url != "":
+        option.add_argument('--app ' + url)  # 在默认窗口打开链接 https://wenshu.court.gov.cn
+    if debugMode:
+        #option.debugger_address = "127.0.0.1:9222"
+        option.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
+    # 创建 Chrome 浏览器实例，同时设置代理信息
+    driver = webdriver.Chrome(
+        options=option)  # service=ChromeService(ChromeDriverManager().install()), chrome_options  options=option , desired_capabilities=desired_capabilities, seleniumwire_options=seleniumwire_options
+    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+        "source": """
+            Object.defineProperty(navigator, 'webdriver', {
+              get: () => undefined
+            })
+          """
+    })
+    if debugMode == False:
+        driver.maximize_window()  # 窗口最大化
+    driver.delete_all_cookies()  # 清除cookies
+    # cookie后续改为bool型，=True则每次从数据库中读取一个保存的cookie登录，登录后可能需要保存cookie，因为过期时间可能被重新设置。
+    # 需要时也可由专门的后台任务定期对最近未登录的cookie进行重新登录并保存新的cookie
+    if cookie != "":
+        cookie_dict = eval(cookie)
+        driver.add_cookie(cookie_dict)
+        driver.refresh()
+    return driver
+# 返回代理IP及端口号，例：1.2.3.4:555
+def getProxyIP():
+    return ""
+# 随机返回一个浏览器的UserAgent
+def getUserAgent():
+    user_agents = [
+        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
+        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
+        "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
+        "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
+        "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
+        "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
+        "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
+        "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
+        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
+        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
+        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
+        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
+        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
+        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
+        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
+        "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
+        "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
+    ]
+    # random.choice返回列表的随机项
+    user_agent = random.choice(user_agents)
+    return user_agent
--- a/pyWenShu/util/UtilCaptcha.py
+++ b/pyWenShu/util/UtilCaptcha.py
+# 验证码识别，暂只处理裁判文书网的验证码
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.webdriver import WebDriver
+import requests
+from flask import current_app
+from pathlib import Path
+import tempfile
+import uuid
+import hashlib
+import os
+import json
+# selecter: 验证码图片css选择器
+def getCaptchaMode1(browser: WebDriver,selecter: str):
+    ret = ""
+    # 通过requests发送一个get请求到图片地址，返回的响应就是图片内容
+    out_path = "./Temp_file"
+    try:
+        Path(out_path).mkdir(parents=True, exist_ok=True)
+        # 将获取到的图片二进制流写入本地文件
+        path_name = os.path.join(out_path, str(uuid.uuid4())) + ".png"
+        print(path_name)
+        # 保存验证码图片
+        img = browser.find_element(By.CSS_SELECTOR, selecter)
+        img.screenshot(path_name)
+        # #url方式下载
+        # r = requests.get(imgUrl)
+        # with open(path_name, 'wb') as f:
+        #     # 对于图片类型的通过r.content方式访问响应内容，将响应内容写入baidu.png中
+        #     f.write(r.content)
+        ocrUrl = current_app.config['sys.ocrUrl']
+        # 调用文字识别服务
+        file = open(path_name, "rb")
+        response = requests.post(ocrUrl, files={"multiRequest": file})
+        file.close()
+        os.remove(path_name)
+        # 返回：{"code":200,"logs":null,"message":"success","resultData":"2rVK"}
+        oRet = json.loads(response.text)
+        ret = oRet["resultData"]
+        #os.remove(path_name)
+        print(ret)
+    except Exception as err:
+        print('getCaptchaMode1 error:', err)
+    return ret
--- a/pyWenShu/util/UtilDate.py
+++ b/pyWenShu/util/UtilDate.py
+from datetime import datetime,timedelta
+from dateutil.relativedelta import relativedelta
+#将yyyy月m月d日格式的日期转为yyyy-mm-dd格式的日期
+def convertDate(sDate:str):
+    sDate = sDate.replace("年","-")
+    sDate = sDate.replace("月", "-")
+    sDate = sDate.replace("日", "")
+    date_obj = datetime.strptime(sDate, '%Y-%m-%d')
+    sDate = date_obj.strftime('%Y-%m-%d')
+    return sDate
+#日期加减偏置，参数ymd为单位，y=年，m=月，d=日
+def dateAdd(sDate:str,ymd:str="d",diff:int=1):
+    if sDate=="":
+        sDate = datetime.now()
+        sDate = sDate.strftime('%Y-%m-%d')
+    date_obj = datetime.strptime(sDate, '%Y-%m-%d')
+    if ymd=="y":
+        if diff > 0:
+            date_obj = date_obj+relativedelta(years=diff)
+        else:
+            diff=-diff
+            date_obj = date_obj - relativedelta(years=diff)
+    elif ymd=="m":
+        if diff>0:
+            date_obj = date_obj + relativedelta(months=diff)
+        else:
+            diff=-diff
+            date_obj = date_obj - relativedelta(months=diff)
+    elif ymd=="d":
+        date_obj = date_obj + timedelta(days=diff)
+    else:
+        pass
+    sDate = date_obj.strftime('%Y-%m-%d')
+    return sDate
\ No newline at end of file
--- a/pyWenShu/util/UtilNumber.py
+++ b/pyWenShu/util/UtilNumber.py
+#数值处理类
+#将字符串的金额转换为数值型金额，字符串金额可能包含万元，人民币等
+def convertMoney(sMoney:str):
+    sMoney = sMoney.replace("万", "")
+    sMoney = sMoney.replace("亿", "")
+    sMoney = sMoney.replace("人民币", "")
+    sMoney = sMoney.replace("元", "")
+    return float(sMoney)
--- a/pyWenShu/util/UtilProxy.py
+++ b/pyWenShu/util/UtilProxy.py
+from selenium.webdriver.chrome.webdriver import WebDriver
+from seleniumwire import webdriver
+from selenium.webdriver.chrome.service import Service as ChromeService
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.common.by import By
+#IP代理池
+class UtilProxy:
+    id:int
+    ip:str
+    port:str
+    name:str
+    password:str
+    #切换IP代理
+    def alterIP(self,browser:WebDriver):
+        pass
--- a/pyWenShu/util/__init__.py
+++ b/pyWenShu/util/__init__.py
--- a/pyWenShu/vo/LoginInfo.py
+++ b/pyWenShu/vo/LoginInfo.py
+# 账号信息
+from util import UtilDate
+from util import UtilNumber
+class LoginInfo:
+    id: int  # 标题
+    user_group: str
+    user_name: str
+    user_passwd: str
--- a/pyWenShu/vo/ProxyInfo.py
+++ b/pyWenShu/vo/ProxyInfo.py
+# 代理IP信息
+from util import UtilDate
+from util import UtilNumber
+class ProxyInfo:
+    id: int  # 标题
+    ip: str
+    port: str
+    user_name: str
+    user_passwd: str
--- a/研报/combin.py
+++ b/研报/combin.py
+import pandas as pd
+import pandas as pd
+import glob
+# 查找当前目录及其子目录下所有以.txt结尾的文件
+csv_files = glob.glob(r"D:\机械项目研报\机械项目研报*.xlsx", recursive=True)
+# 创建一个空的DataFrame用于存储合并后的数据
+merged_data = pd.DataFrame()
+# 逐个读取CSV文件并合并到DataFrame中
+for file in csv_files:
+    data = pd.read_excel(file,dtype=str)
+    # 去掉最后一列
+    # data = data.iloc[:, :-1]
+    dad=pd.DataFrame(data,dtype=str)
+    merged_data = merged_data.append(dad, ignore_index=True)
+sorted_df = merged_data.sort_values('industry')
+grouped = merged_data.groupby('industry')
+# 将合并后的数据保存到新的CSV文件中
+# merged_data.to_csv(r"D:\hg\tmp\11.csv", encoding='gbk', index=False, quoting=1, quotechar='"', escapechar='\\')
+# merged_data.to_excel(r"D:\机械项目研报\机械项目研报汇总.xlsx", index=False, engine='openpyxl')
+with pd.ExcelWriter(r'D:\机械项目研报\机械项目研报汇总2.xlsx') as writer:
+    for group_name, group_df in grouped:
+        group_df.to_excel(writer, sheet_name=group_name, index=False)
\ No newline at end of file