12/21

08e4725c · 薛凌堃 · 8f2915d4 · 08e4725c · 08e4725c · 08e4725c
--- a/REITs专题数据/BaseCore.py
+++ b/REITs专题数据/BaseCore.py
--- a/REITs专题数据/DisInfo-shanghai.py
+++ b/REITs专题数据/DisInfo-shanghai.py
--- a/REITs专题数据/FundAnncmnt-shenzhen.py
+++ b/REITs专题数据/FundAnncmnt-shenzhen.py
--- a/REITs专题数据/FundsList-shenzhen.py
+++ b/REITs专题数据/FundsList-shenzhen.py
--- a/REITs专题数据/LawRules_2_shenzhen.py
+++ b/REITs专题数据/LawRules_2_shenzhen.py
--- a/REITs专题数据/LawRules_shenzhen.py
+++ b/REITs专题数据/LawRules_shenzhen.py
--- a/REITs专题数据/MarketOverview-shenzhen.py
+++ b/REITs专题数据/MarketOverview-shenzhen.py
--- a/REITs专题数据/ProductQuotes-shanghai.py
+++ b/REITs专题数据/ProductQuotes-shanghai.py
--- a/REITs专题数据/ProjectDynamics-shanghai.py
+++ b/REITs专题数据/ProjectDynamics-shanghai.py
--- a/REITs专题数据/ProjectDynamics-shenzhen.py
+++ b/REITs专题数据/ProjectDynamics-shenzhen.py
--- a/REITs专题数据/REITsDailyFund-shanghai.py
+++ b/REITs专题数据/REITsDailyFund-shanghai.py
--- a/REITs专题数据/RuleGuide_shanghai.py
+++ b/REITs专题数据/RuleGuide_shanghai.py
--- a/REITs专题数据/RuleGuide_shenzhen.py
+++ b/REITs专题数据/RuleGuide_shenzhen.py
--- a/REITs专题数据/cushman.py
+++ b/REITs专题数据/cushman.py
--- a/REITs专题数据/info-shanghai.py
+++ b/REITs专题数据/info-shanghai.py
--- a/REITs专题数据/policy_beijing.py
+++ b/REITs专题数据/policy_beijing.py
--- a/REITs专题数据/policy_chongqing.py
+++ b/REITs专题数据/policy_chongqing.py
--- a/REITs专题数据/policy_fujian.py
+++ b/REITs专题数据/policy_fujian.py
--- a/REITs专题数据/policy_guangdong.py
+++ b/REITs专题数据/policy_guangdong.py
--- a/REITs专题数据/policy_guangxi.py
+++ b/REITs专题数据/policy_guangxi.py
--- a/REITs专题数据/policy_gwy.py
+++ b/REITs专题数据/policy_gwy.py
--- a/REITs专题数据/policy_hainan.py
+++ b/REITs专题数据/policy_hainan.py
--- a/REITs专题数据/policy_heilongjiang.py
+++ b/REITs专题数据/policy_heilongjiang.py
--- a/REITs专题数据/policy_hubei.py
+++ b/REITs专题数据/policy_hubei.py
--- a/REITs专题数据/policy_jiangsu.py
+++ b/REITs专题数据/policy_jiangsu.py
--- a/REITs专题数据/policy_jiangxi.py
+++ b/REITs专题数据/policy_jiangxi.py
--- a/REITs专题数据/policy_jilin.py
+++ b/REITs专题数据/policy_jilin.py
--- a/REITs专题数据/policy_liaoning.py
+++ b/REITs专题数据/policy_liaoning.py
--- a/REITs专题数据/policy_neimenggu.py
+++ b/REITs专题数据/policy_neimenggu.py
--- a/REITs专题数据/policy_shandong.py
+++ b/REITs专题数据/policy_shandong.py
--- a/REITs专题数据/policy_shanghai.py
+++ b/REITs专题数据/policy_shanghai.py
--- a/REITs专题数据/policy_shanxi.py
+++ b/REITs专题数据/policy_shanxi.py
--- a/REITs专题数据/policy_sichuan.py
+++ b/REITs专题数据/policy_sichuan.py
--- a/REITs专题数据/policy_tianjin.py
+++ b/REITs专题数据/policy_tianjin.py
--- a/REITs专题数据/policy_yunnan.py
+++ b/REITs专题数据/policy_yunnan.py
--- a/REITs专题数据/policy_zhejiang.py
+++ b/REITs专题数据/policy_zhejiang.py
--- a/REITs专题数据/reits.py
+++ b/REITs专题数据/reits.py
--- a/REITs专题数据/start.py
+++ b/REITs专题数据/start.py
-import reits
+import reits
 import reits
-import policy_beijing, policy_chongqing, policy_fujian, policy_guangdong
+import policy_chongqing, policy_fujian, policy_guangdong
 import policy_guangxi, policy_gwy, policy_hainan, policy_heilongjiang, policy_hubei, policy_jiangsu

 import policy_jiangxi, policy_jilin, policy_liaoning, policy_neimenggu, policy_shandong, policy_hubei
 import policy_shanxi, policy_sichuan, policy_tianjin, policy_yunnan, policy_zhejiang
 import RuleGuide_shanghai, RuleGuide_shenzhen
 import LawRules_shenzhen, LawRules_2_shenzhen
+from REITs_policyData.policy_beijing import beijing
+

 if __name__ == "__mian__":
-    policy_beijing.beijing()
+    beijing()
    reits.sse()
    reits.reform()
    reits.hebei()

--- a/REITs专题数据/国际市场/Singapore Exchange.py
+++ b/REITs专题数据/国际市场/Singapore Exchange.py
--- a/comData/BaseInfo_qcc/baseinfo1122.py
+++ b/comData/BaseInfo_qcc/baseinfo1122.py
 # -*- coding: utf-8 -*-
-
-"""
-模拟点击的方法不行，涉及到需要账号登录
-"""
 import json
 import re
 import time
@@ -296,7 +292,7 @@ def dic_handle(result_dic):
    return aa_dict

 # 采集准备
-def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):

    # if social_code:
    #     dic_info = baseCore.getInfomation(social_code)
@@ -342,7 +338,7 @@ def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listin
        else:
            # 开始采集
            try:
-                if spiderwork(soup, com_name, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+                if spiderwork(soup, com_name, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
                    count += 1
                    log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time, time.time())}')
                    token.updateTokeen(id_cookie,3)
@@ -377,7 +373,7 @@ def ifbeforename(company_url):
        return ''

 # 采集基本信息和工商信息
-def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
    qccid = company_url.split('firm/')[1].split('.html')[0]
    # 将采集到的企查查id更新
    updateSql = f"update EnterpriseInfo set QCCID = '{qccid}' where SocialCode = '{social_code}'"
@@ -467,7 +463,7 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
        aa_dic['listingDate'] = listingDate
        aa_dic['category'] = category
        aa_dic['exchange'] = exchange
-
+        aa_dic['listingType'] = listType
        # print(aa_dic)
        sendkafka(aa_dic)

@@ -486,11 +482,11 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
        aa_dic['listingDate'] = listingDate
        aa_dic['category'] = category
        aa_dic['exchange'] = exchange
-
+        aa_dic['listingType'] = listType
        sendkafka(aa_dic)

 # 判断名称是否统一
-def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
    company_url = ''
    try:
        company_list = soup.find('table', class_='app-ltable ntable ntable-list ntable ntable-list')
@@ -530,7 +526,7 @@ def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDat
        company_url = info_t.find('a')['href']
        beforename = ifbeforename(company_url)
        if beforename == receptname:
-            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name)
+            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange,listType, ynDomestic, countryName, file_name)
        else:
            #没有搜到相同的企业名称
            data = [com_name, social_code]
@@ -544,7 +540,7 @@ if __name__ == '__main__':

    while True:
        nowtime = baseCore.getNowTime(1).replace('-', '')[:8]
-        file_name = f'./data/国内企业基本信息采集情况_{nowtime}.xlsx'
+        file_name = f'./data/国内企业基本信息采集情况.xlsx'
        file.createFile(file_name)

        cookieinfo = token.getToken()
@@ -553,6 +549,7 @@ if __name__ == '__main__':
        else:
            log.info('==========已无cookies==========')
            time.sleep(30)
+
            continue
        id_cookie = cookieinfo[0]
        cookie_ = json.loads(cookieinfo[1])
@@ -599,6 +596,11 @@ if __name__ == '__main__':
            while flag:
                log.info('--------已没有数据---------')
                time.sleep(30)
+                if not baseCore.check_mysql_conn(cnx_):
+                    # 144数据库
+                    cnx_ = baseCore.cnx
+                    cursor_ = cnx_.cursor()
+                    log.info('===11数据库重新连接成功===')
                company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
                if company_field:
                    flag = False
@@ -608,7 +610,7 @@ if __name__ == '__main__':
            continue

        social_code = company_field.split('|')[0]
-        com_name = company_field.split('|')[2].replace(' ', '')
+        com_name = company_field.split('|')[1].replace(' ', '')

        ynDomestic = company_field.split('|')[15]
        countryName = company_field.split('|')[16]
@@ -617,6 +619,7 @@ if __name__ == '__main__':
        listingDate = company_field.split('|')[21]
        category = company_field.split('|')[19]
        exchange = company_field.split('|')[20]
+        listType = company_field.split('|')[21]
        # ynDomestic = ''
        # countryName = ''
        # securitiesCode = ''
@@ -625,8 +628,8 @@ if __name__ == '__main__':
        # category = ''
        # exchange = ''

-        count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,ynDomestic, countryName, file_name)
-        time.sleep(40)
+        count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,listType, ynDomestic, countryName, file_name)
+        time.sleep(2)
        # break
        # baseCore.r.close()
        # baseCore.sendEmail(file_name)

--- a/comData/bond_zjh/zhaiquan.py
+++ b/comData/bond_zjh/zhaiquan.py
+# 证监会沪市、gong深市 公司债券和企业债券采集
+"""
+证监会企业名单
+"""
+import time
+import random
+import requests
+from bs4 import BeautifulSoup
+from retry import retry
+from base import BaseCore
+from obs import ObsClient
+
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+baseCore = BaseCore.BaseCore()
+log = baseCore.getLogger()
+
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+
+cnx_ = baseCore.cnx_
+cursor_ = baseCore.cursor_
+
+taskType = '企业名单/证监会'
+
+def createDriver():
+    chrome_driver = r'D:\cmd100\chromedriver.exe'
+    path = Service(chrome_driver)
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
+    chrome_options.add_argument('--disable-gpu')
+    chrome_options.add_argument('--ignore-certificate-errors')
+    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+    chrome_options.add_argument("--start-maximized")
+    chrome_options.add_argument('user-agent='+'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
+    chrome_options.add_argument('--headless')
+    # 设置代理
+    # proxy = "127.0.0.1:8080"  # 代理地址和端口
+    # chrome_options.add_argument('--proxy-server=http://' + proxy)
+    driver = webdriver.Chrome(service=path, chrome_options=chrome_options)
+    return driver
+
+@retry(tries=3, delay=5)
+def RequestUrl(url):
+    # ip = baseCore.get_proxy()
+    # proxy = {'https': 'http://127.0.0.1:8888', 'http': 'http://127.0.0.1:8888'}
+    response = requests.get(url=url, headers=headers)
+    response.encoding = response.apparent_encoding
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, 'lxml')
+        return soup
+    else:
+        raise
+
+def browserRequest(url):
+    browser = createDriver()
+    browser.get(url)
+    wait = WebDriverWait(browser, 30)
+    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "m-table2")))
+    page_source = browser.page_source
+    soup = BeautifulSoup(page_source, 'html.parser')
+    return soup
+
+def getUrl(url_parm):
+    # 深市
+    # 沪市
+
+    url = f'http://eid.csrc.gov.cn/{url_parm}/index_f.html'
+
+    # 北交所
+
+    return url
+
+# 映射关系
+def getmap(dic_info):
+    data_dic = {
+        '债券代码': 'zhaiquan_code',
+        '名称': 'zhaiquan_name',
+        '上市地': 'ipo_place',
+        '全称': 'full_name',
+        '发行人': 'issure',
+        '发行量（亿元）': 'volume',
+        '发行价格（元）': 'money',
+        '发行方式': 'method',
+        '期限（年）': 'tenure',
+        '到期日期': 'last_date',
+        '票面利率（%）': 'rate',
+        '利率类型': 'lilvtype',
+        '付息方式': 'payment',
+        '起息日期': 'start_date',
+        '上市日期': 'list_date',
+    }
+    dict3 = {value: dic_info.get(key, '') for key, value in data_dic.items()}
+    print(dict3)
+    return dict3
+    # for key1,value1 in data_dic:
+    #     for key2 in dic_info.keys():
+    #         if key2 == key1:
+    #             dic_info[data_dic[key1]] = dic_info[key2]
+    #             del dic_info[key2]
+    #             break
+    #         else:
+    #             dic_info[data_dic[key1]] = ''
+    #             continue
+    # print(data_dic)
+
+# 采集信息
+def SpiderByZJH(url, start_time):  # dic_info 数据库中获取到的基本信息
+
+    try:
+        soup = RequestUrl(url)
+    except:
+        # 请求失败，输出错误信息
+        log.error(f'请求失败:{url}')
+        #重新放入redis
+        time.sleep(random.randint(60, 120))
+        soup = ''
+    if soup == '':
+        return
+    # 判断查找内容是否存在
+    # try:
+    #     is_exist = soup.find('div',class_='con').text
+    #     if is_exist == '没有查询到数据':
+    #         state = 0
+    #         takeTime = baseCore.getTimeCost(start_time, time.time())
+    #         baseCore.recordLog(social_code, taskType, state, takeTime, url, '没有查询到数据')
+    #         return
+    # except:
+    #     pass
+
+    # 先获取页数
+    page = soup.find('div', class_='pages').find_all('li')[-1]
+    total = page.find('b').text
+
+    for i in range(1,int(total)+1):
+        log.info(f'==========正在采集第{i}页=========')
+        if i == 1:
+            href = url
+        else:
+            # http://eid.csrc.gov.cn/101811/index_3_f.html
+            href = url.split('index')[0] + f'index_{i}.html'
+        try:
+            soup = browserRequest(href)
+        except:
+            # 请求失败，输出错误信息
+            log.error(f'请求失败:{url}')
+            # 重新放入redis
+
+        tr_list1 = soup.find('table', class_='m-table2')
+        # print(tr_list1)
+        tr_list = tr_list1.find_all('tr')
+        # pageIndex = 0
+        for tr in tr_list[1:]:
+            dic_info = {}
+            # pageIndex += 1
+            td_list = tr.find_all('td')
+            zhaiquan_code = td_list[0].text.replace('\r', '').replace('\n', '').replace(' ','')
+            zhaiquan_name = td_list[1].text.replace('\r', '').replace('\n', '').replace(' ','')
+            ipo_place = td_list[2].text.replace('\r', '').replace('\n', '').replace(' ','')
+            list_date = td_list[3].text.replace('\r', '').replace('\n', '').replace(' ','')
+            last_date = td_list[4].text.replace('\r', '').replace('\n', '').replace(' ','')
+            # print(pdf_url)
+            selectSql = f"select count(1) from debt_secutity where zhaiquan_code='{zhaiquan_code}' and zhaiquan_name='{zhaiquan_name}'"
+            cursor.execute(selectSql)
+            count = cursor.fetchone()[0]
+            if count > 0:
+                log.info(f"{zhaiquan_code}-------{zhaiquan_name}---已经存在")
+                continue
+            else:
+                # dic_info = {
+                #     '债券代码': zhaiquan_code,
+                #     '名称': zhaiquan_name,
+                #     '上市地': ipo_place,
+                #     '上市日期': list_date,
+                #     '到期日期': last_date,
+                # }
+                info_url = 'http://eid.csrc.gov.cn/' + td_list[0].find('a')['href']
+                soup_info = RequestUrl(info_url)
+                try:
+                    info_list = soup_info.find('table',class_='m-table3').find_all('tr')
+                except Exception as e:
+                    log.info(f'error---{e}---第{i}页--{info_url}')
+                    info_list = []
+                    dic_info = {
+                        '债券代码': zhaiquan_code,
+                        '名称': zhaiquan_name,
+                        '上市地': ipo_place,
+                        '上市日期': list_date,
+                        '到期日期': last_date,
+                    }
+
+                for tr_ in info_list:
+                    td_list = tr_.find_all('td')
+                    for td in td_list:
+                        value = td.find('span').text.replace('\r', '').replace('\n', '').replace('\t', '').replace(' ', '')
+                        span_tag = td.find('span')
+                        span_tag.decompose()
+                        name = td.text.replace('：', '').replace(':', '').replace('\r', '').replace('\n', '').replace(' ', '')
+                        dic_info[name] = value
+                # 插入数据库
+                final_dic = getmap(dic_info)
+                values_tuple = tuple(final_dic.values())
+                # log.info(f"{gpdm}-------{companyname}---新增")
+                insertSql = f"insert into debt_secutity(zhaiquan_code,zhaiquan_name,ipo_place,full_name,issure,volume,money,method,tenure,last_date,rate,lilvtype,payment,start_date,list_date) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+                cursor.execute(insertSql,values_tuple)
+                cnx.commit()
+                log.info(f"{zhaiquan_code}-------{zhaiquan_name}---新增")
+        log.info(f"【{i}/{total}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+
+
+
+if __name__ == '__main__':
+    num = 0
+    headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Accept-Encoding': 'gzip, deflate',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        # 'Cookie': 'yfx_c_g_u_id_10008998=_ck23112014074614515077233960865; yfx_f_l_v_t_10008998=f_t_1700460466453__r_t_1700460466453__v_t_1700460466453__r_c_0; yfx_mr_10008998=%3A%3Amarket_type_free_search%3A%3A%3A%3Abaidu%3A%3A%3A%3A%3A%3A%3A%3Awww.baidu.com%3A%3A%3A%3Apmf_from_free_search; yfx_mr_f_10008998=%3A%3Amarket_type_free_search%3A%3A%3A%3Abaidu%3A%3A%3A%3A%3A%3A%3A%3Awww.baidu.com%3A%3A%3A%3Apmf_from_free_search; yfx_key_10008998=; _yfx_session_10008998=%7B%22_yfx_firsttime%22%3A%221701508120899%22%2C%22_yfx_lasttime%22%3A%221701508120899%22%2C%22_yfx_visittime%22%3A%221701508120899%22%2C%22_yfx_domidgroup%22%3A%221701508120899%22%2C%22_yfx_domallsize%22%3A%22100%22%2C%22_yfx_cookie%22%3A%2220231202170840906620987838830281%22%7D; acw_tc=01c604a717025467485993784e5c9f1847d885d2c82ee192efdfd627ba',
+        'Host': 'eid.csrc.gov.cn',
+        'If-Modified-Since': 'Thu, 14 Dec 2023 08:06:01 GMT',
+        'If-None-Match': '"657ab769-95b5"',
+        # 'Referer': 'http://eid.csrc.gov.cn/201010/index_3.html',
+        'Upgrade-Insecure-Requests': '1',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+    }
+
+
+    dic_parms = {}
+    # 读取数据库获取股票代码 简称 以及 社会信用代码
+    while True:
+        start_time = time.time()
+        # 沪市http://eid.csrc.gov.cn/101111/index.html 深市http://eid.csrc.gov.cn/101811/index.html 北交所http://eid.csrc.gov.cn/102611/index.html
+        # url 变量 翻页 栏目 http://eid.csrc.gov.cn/101811/index_3_f.html
+
+        # 沪市主板 沪市科创板
+        # url_parms = ['201010', '201014']
+        # url_parms = ['201011', '201013']
+        url_parms = ['201411', '201414', '202011', '202014']
+        # url_parms = ['202011', '202014']
+        for url_parm in url_parms:
+            url = getUrl(url_parm)
+
+            start_time_cj = time.time()
+            log.info(f'======开始处理======')
+            SpiderByZJH(url, start_time)
+        break
+    cursor.close()
+    cnx.close()
+    baseCore.close()
--- a/comData/policylaw/gwyparts.py
+++ b/comData/policylaw/gwyparts.py
@@ -94,7 +94,7 @@ def get_content2():
                    child_type = content_dict['childtype']  # 主题分类
                except:
                    child_type = ''
-                # # 判断是否已经爬取过
+                # 判断是否已经爬取过
                is_href = baseTool.db_storage.find_one({'网址': href})
                if is_href:
                    num += 1
@@ -102,6 +102,7 @@ def get_content2():
                    time.sleep(1)
                    continue
                try:
+                    # href = 'https://www.gov.cn/zhengce/zhengceku/202312/content_6921452.htm'
                    resp = requests.get(url=href, headers=baseTool.headers, verify=False)
                    resp.encoding = resp.apparent_encoding
                    resp_text = resp.text
@@ -120,9 +121,7 @@ def get_content2():
                        except Exception as e:
                            log.info(f'---{href}--------{e}-------')
                            continue
-                        if '.pdf' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href \
-                                or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href \
-                                or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.odf' in file_href:
+                        if '.ofd' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.pdf' in file_href:
                            file_name = file.text.strip()
                            category = os.path.splitext(file_href)[1]
                            if category not in file_name:

--- a/comData/zhuanli/tyc_zhuanli.py
+++ b/comData/zhuanli/tyc_zhuanli.py
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查专利/国内上市'
+
+
+def spider_zhuanli(com_name, social_code, tycid, page, list_all_info):
+    start_time = time.time()
+    log.info(f'===正在处理第{page}页===')
+    # list_all_info = []
+
+    t = int(time.time() * 1000)
+    header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzI3MzczNzEzMSIsImlhdCI6MTcwMzE1MjEzMSwiZXhwIjoxNzA1NzQ0MTMxfQ.3tF-UFhorC_mS4h2UIBOZamApfcaJEfjBbr8K11d2yHhELBM1pEvjd6yccxhLzVKRoyFdTn-1Cz6__ZpzgjnGg',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+    url = f'https://capi.tianyancha.com/cloud-intellectual-property/patent/patentListV6?_={t}&id={tycid}&pageSize=100&pageNum={page}&type=-100&lprs=-100&applyYear=-100&pubYear=-100&fullSearchText=&sortField=&sortType=-100'
+
+    try:
+        ip = baseCore.get_proxy()
+    except:
+        time.sleep(2)
+        ip = baseCore.get_proxy()
+    try:
+        res_j = requests.get(url=url, headers=header, proxies=ip, verify=False).json()
+    except:
+        for i in range(3):
+            try:
+                res_j = requests.get(url=url, headers=header, verify=False).json()
+            except:
+                time.sleep(2)
+                continue
+    # print(res_j)
+    list_all = res_j['data']['items']
+    # print(list_all)
+    if list_all:
+        for one_zhuanli in list_all:
+            title = one_zhuanli['title']
+            try:
+                shenqingri = one_zhuanli['applicationTime']
+            except:
+                shenqingri = ''
+            try:
+                shenqing_code = one_zhuanli['patentNum']
+            except:
+                shenqing_code = ''
+            try:
+                leixing = one_zhuanli['patentType']
+            except:
+                leixing = ''
+            try:
+                status = one_zhuanli['lprs']
+            except:
+                status = ''
+            try:
+                gongkairi = one_zhuanli['pubDate']
+            except:
+                gongkairi = ''
+            try:
+                gongkai_code = one_zhuanli['pubnumber']
+            except:
+                gongkai_code = ''
+            try:
+                famingren = one_zhuanli['inventor']
+            except:
+                famingren = ''
+            try:
+                shenqingren = one_zhuanli['applicantName']
+            except:
+                shenqingren = ''
+            try:
+                gongneng = one_zhuanli['cat']
+            except:
+                gongneng = ''
+            try:
+                uuid = one_zhuanli['uuid']
+            except:
+                uuid = ''
+
+            dic_info = {
+                '企业名称': com_name,
+                '统一信用代码': social_code,
+                '专利名称': title,
+                '申请日': shenqingri,
+                '申请号': shenqing_code,
+                '专利类型': leixing,
+                '专利状态': status,
+                '公开日': gongkairi,
+                '公开号': gongkai_code,
+                '发明人': famingren,
+                '申请人': shenqingren,
+                '功能': gongneng,
+                '天眼查详情id': uuid,
+                '年份': shenqingri[:4]
+            }
+            selectSql = f"select count(1) from zhuanli_sh_tyc where shenqing_code='{shenqing_code}' "
+            cursor.execute(selectSql)
+            count = cursor.fetchone()[0]
+            if count > 0:
+                log.info(f"{com_name}-------{shenqing_code}---已经存在")
+                continue
+            else:
+                values_tuple = tuple(dic_info.values())
+                # log.info(f"{gpdm}-------{companyname}---新增")
+                insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code,title,shenqingri,shenqing_code,leixing,status,gongkairi,gongkai_code,famingren,shenqingren,gongneng,uuid,year) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+                cursor.execute(insertSql, values_tuple)
+                cnx.commit()
+                log.info(f"{com_name}-------{shenqing_code}---新增")
+            time.sleep(2)
+            # list_all_info.append(dic_info)
+        log.info(f"【{page}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        return page
+    else:
+        return 0
+
+if __name__ == "__main__":
+    while True:
+        list_all_info = []
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code')
+        # social_code = '9111010566840059XP'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                    continue
+            count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            page = 1
+            while True:
+                page = spider_zhuanli(com_name, xydm, tycid, page, list_all_info)
+                if page != 0:
+                    page += 1
+
+                else:
+                    # print(len(list_all_info))
+                    # df_all_info = pd.DataFrame(list_all_info)
+                    # df_all_info.to_excel('中国上市企业专利.xlsx', index=False)
+                    log.info(f"{id}---{xydm}----{tycid}----结束处理")
+                    break
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ZhuanLi:gnshSocial_code', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)