24/01/05

55610b8f · 薛凌堃 · 23d4dd76 · 55610b8f · 55610b8f · 55610b8f
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -403,6 +403,7 @@ class BaseCore:
        sql = "select proxy from clb_proxy"
        self.cursor.execute(sql)
        proxy_lists = self.cursor.fetchall()
+        self.cnx.commit()
        ip_list = []
        for proxy_ in proxy_lists:
            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))

--- a/comData/BaseInfo_qcc/getcode.py
+++ b/comData/BaseInfo_qcc/getcode.py
+import pandas as pd
+# from pandas import DataFrame as df
+import pymysql
+cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+df_all = pd.read_excel('D:\\企业数据\\数据组提供\\第五批专精特新企业名单汇总_修订版_20240102.xlsx', dtype=str)
+list_com = []
+for num_df in range(len(df_all)):
+    com_name = str(df_all['企业名称'][num_df])
+    dic_com = {
+        'social_code': '',
+        'com_name': com_name
+    }
+    with cnx.cursor() as cursor:
+        sel_sql = '''select social_credit_code from sys_base_enterprise where name = %s '''
+        cursor.execute(sel_sql, com_name)
+        selects = cursor.fetchone()
+        if selects:
+            print(f'【{num_df}/{len(df_all)}】==={com_name}找到')
+            social_code = selects[0]
+        else:
+            print(f'【{num_df}/{len(df_all)}】==={com_name}未找到')
+            social_code = ''
+    df_all['信用代码'][num_df] = str(social_code)
+    df_all.to_excel('D:\\企业数据\\数据组提供\\第五批专精特新企业名单汇总_修订版_20240102.xlsx', index=False)
\ No newline at end of file
--- a/comData/YanBao/resentYanbao.py
+++ b/comData/YanBao/resentYanbao.py
@@ -228,7 +228,7 @@ def download(data, order_by):
            'sid': sid,
            'sourceAddress': sourceAddress,
            'summary': summary,
-            'title': name_pdf,
+            'title': name_pdf.split('.pdf')[0],
            'type': '0'
        }
        # 将相应字段通过kafka传输保存
@@ -396,8 +396,8 @@ def Mob():
    # usecount = loginfo.split('|')[2]
    usecount = 0
    # 测试用
-    # account = '13636711746'
+    account = '13636711746'
-    # password = 'Zhenghao123'
+    password = 'Zhenghao123'
    # account = '18703752600'
    # password = 'Axlk010208!'
@@ -407,8 +407,8 @@ def Mob():
    # password = 'xlk123456!'
    # account = '17103126138'
    # password = '171BlackOne'
-    account = '17103128590'
+    # account = '17103128590'
-    password = '171BlackTwo'
+    # password = '171BlackTwo'
    browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
    f_url = 'https://www.mob.com/developer/login'
    browser.get(f_url)
@@ -469,12 +469,8 @@ def Mob():
            i_soup = BeautifulSoup(res_href,'html.parser')
            summary_list = i_soup.find(class_='picture-content htmlContent').find_all('h3')
            news_url = f'https://api.os.mob.com/api/academy_report/download/{report_id}'
-            # headers['token'] = '92b42171-7a33-4f3b-a25b-9ca689699e10'
-            # headers['token'] = '495f9714-7ea8-4987-91c0-2b0ede38238b'
+            headers['token'] = '05bc441a-b09b-40cb-ab65-8d9e63e5c529'
-            # headers['token'] = '0dcbde4a-9aaa-4651-b886-856add4b8df9'
-            # headers['token'] = '2fcdd67b-da81-4f2f-9d6f-529fdbf6ae1f'
-            # headers['token'] = 'dd54bc77-50fa-4a25-aec7-95ec45bd17f8'
-            headers['token'] = '2fd143d3-a1ec-4d9d-9d9b-38a1d4cf8387'
            news_req = session.get(url=news_url,headers=headers)
            pdf_url = news_req.json()['data']
@@ -693,31 +689,75 @@ def juliangsuanshu():
    getnews(browser)
    browser.quit()
+def ke36switch(browser,info_url):
+    try:
+        browser.get(info_url)  # 跳到指定页面
+        page_source = browser.page_source  # 获取页面信息
+        soup_info = BeautifulSoup(page_source, 'html.parser')
+        info_date = soup_info.find('meta', {'property': 'article:published_time'}).get('content')[:10]
+        return soup_info
+    except:
+        browser.quit()
+        proxy = baseCore.get_proxy()
+        # proxy = {
+        #     'http': '222.90.4.73:40018',
+        #     'httpS': '222.90.4.73:40018'
+        # }
+        opt.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
+        # opt.add_argument('--proxy-server=' + proxy['http'])
+        browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
+        browser.refresh()
+        ke36switch(browser,info_url)
 # 36氪
 def ke36():
    # browser = webdriver.Chrome(chromedriver)
+    proxy = baseCore.get_proxy()
+    opt.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
+    # opt.add_argument('--proxy-server=' + proxy['http'])
    browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
    url = 'https://36kr.com/academe'
    browser.get(url)#跳到指定页面
+    time.sleep(3)
+    for i in range(10):
+        try:
+            wait = WebDriverWait(browser, 10)
+            wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'show-more')))
+            js = "var q=document.documentElement.scrollTop=3000"
+            browser.execute_script(js)
+            time.sleep(2)
+            browser.find_element(By.CLASS_NAME, 'show-more').click()
+        except:
+            break
+    wait = WebDriverWait(browser, 10)
+    wait.until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
    page_source = browser.page_source#获取页面信息
    soup = BeautifulSoup(page_source, 'html.parser')
    list_all = soup.find('div',{'class':'report-list-wrapper'}).find_all('div',{'class':'report-card type-4'})
-    for one_info in list_all:
+    for one_info in list_all[::-1]:
        info_title = one_info.find('div',{'class':'title'}).text
        info_zhaiyao = one_info.find('div',{'class':'desc'}).text
        info_url = one_info.a.get('href')
+        # is_member = r.sismember('report_pdf_three_history', info_url)
+        # if is_member:
+        #     continue
+        soup_info = ke36switch(browser,info_url)
-        browser.get(info_url)#跳到指定页面
+        info_date = soup_info.find('meta', {'property': 'article:published_time'}).get('content')[:10]
+        if info_date < '2023-05-10':
-        page_source = browser.page_source#获取页面信息
+            pass
-        soup_info = BeautifulSoup(page_source, 'html.parser')
+        else:
+            time.sleep(1)
-        info_date = soup_info.find('meta',{'property':'article:published_time'}).get('content')[:10]
+            continue
-        info_content = soup_info.find('div',{'class':'common-width margin-bottom-20'}).text
+        try:
+            info_content = soup_info.find('div',{'class':'common-width margin-bottom-20'}).text
+        except:
+            proxy = baseCore.get_proxy()
+            opt.add_argument('--proxy-server=' + proxy['http'])
+            browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
+            ke36switch(browser, info_url)
        dic_post = {
            'title': info_title,  # 报告名称
            'url_pdf': '',  # 报告链接
@@ -734,7 +774,7 @@ def ke36():
            'sid': '1662008421217378306',  # 信息源id
        }
        order_by = 1
-        download(dic_post, order_by)
+        # download(dic_post, order_by)
        order_by += 1
    #     print(page,dic_post)
        # url = 'http://114.115.155.139:5002/report_download'
@@ -742,6 +782,7 @@ def ke36():
        # res = requests.post(url, data=json.dumps(dic_post))
        # print(res.json())
        time.sleep(2)
    browser.quit()
@@ -922,6 +963,28 @@ def shijiejingjiluntan():
            time.sleep(2)
        browser.quit()
+def get_json(key_word,page,headers):
+    param = {
+        "uid": "",
+        "keyword": key_word,
+        "type": ["researchReport"],
+        "client": "web",
+        "clientVersion": "curr",
+        "clientType": "web",
+        "param": {"researchReport": {"client": "web", "pageSize": 10, "pageIndex": page}}
+    }
+    param_url = parse.quote(str(param).replace(" ", ""))
+    # param_url = parse.quote(str(param))
+    # param_url = f'%7B"uid"%3A""%2C"keyword"%3A"{key_word}"%2C"type"%3A%5B"researchReport"%5D%2C"client"%3A"web"%2C"clientVersion"%3A"curr"%2C"clientType"%3A"web"%2C"param"%3A%7B"researchReport"%3A%7B"client"%3A"web"%2C"pageSize"%3A10%2C"pageIndex"%3A{page}%7D%7D%7D'
+    t = int(time.time() * 1000)
+    url = f'https://search-api-web.eastmoney.com/search/jsonp?cb=&param={param_url}&_={t}'
+    # url = 'https://search-api-web.eastmoney.com/search/jsonp?cb=jQuery35103326233792363984_1702455623969&param=%7B%22uid%22%3A%22%22%2C%22keyword%22%3A%22%E7%A7%91%E8%BE%BE%E8%87%AA%E6%8E%A7%22%2C%22type%22%3A%5B%22researchReport%22%5D%2C%22client%22%3A%22web%22%2C%22clientVersion%22%3A%22curr%22%2C%22clientType%22%3A%22web%22%2C%22param%22%3A%7B%22researchReport%22%3A%7B%22client%22%3A%22web%22%2C%22pageSize%22%3A10%2C%22pageIndex%22%3A1%7D%7D%7D&_=1702455623970'
+    res = requests.get(url=url, headers=headers).text[1:-1]
+    res_json = json.loads(res)
+    return res_json
 # 东方财富网
 def dongfangcaifu():
    headers = {
@@ -965,101 +1028,70 @@ def dongfangcaifu():
        page = 1
        # for page in range(1,500):
        #     log.info(page)
-        param = {
+        res_json_ = get_json(key_word, page, headers)
-            "uid": "",
+        # 添加页数
-            "keyword": key_word,
+        total = res_json_['hitsTotal']
-            "type": ["researchReport"],
+        page = (total/10) + 1
-            "client": "web",
+        for page_ in range(1,page+1):
-            "clientVersion": "curr",
+            res_json = get_json(key_word,page_,headers)
-            "clientType": "web",
+            list_all = res_json['result']['researchReport']
-            "param": {"researchReport": {"client": "web", "pageSize": 10, "pageIndex": page}}
-        }
+            if list_all:
-        param_url = parse.quote(str(param).replace(" ", ""))
-        # param_url = parse.quote(str(param))
-        # param_url = f'%7B"uid"%3A""%2C"keyword"%3A"{key_word}"%2C"type"%3A%5B"researchReport"%5D%2C"client"%3A"web"%2C"clientVersion"%3A"curr"%2C"clientType"%3A"web"%2C"param"%3A%7B"researchReport"%3A%7B"client"%3A"web"%2C"pageSize"%3A10%2C"pageIndex"%3A{page}%7D%7D%7D'
-        t = int(time.time() * 1000)
-        url = f'https://search-api-web.eastmoney.com/search/jsonp?cb=&param={param_url}&_={t}'
-        # url = 'https://search-api-web.eastmoney.com/search/jsonp?cb=jQuery35103326233792363984_1702455623969&param=%7B%22uid%22%3A%22%22%2C%22keyword%22%3A%22%E7%A7%91%E8%BE%BE%E8%87%AA%E6%8E%A7%22%2C%22type%22%3A%5B%22researchReport%22%5D%2C%22client%22%3A%22web%22%2C%22clientVersion%22%3A%22curr%22%2C%22clientType%22%3A%22web%22%2C%22param%22%3A%7B%22researchReport%22%3A%7B%22client%22%3A%22web%22%2C%22pageSize%22%3A10%2C%22pageIndex%22%3A1%7D%7D%7D&_=1702455623970'
-        res = requests.get(url=url,headers=headers).text[1:-1]
-        res_json = json.loads(res)
-        list_all = res_json['result']['researchReport']
-        if list_all:
-            pass
-        else:
-            continue
-        for one_news in list_all:
-            news_title = one_news['title']
-            news_title = news_title.replace('<em>', '').replace('</em>', '')
-            news_date = one_news['date'][:10]
-            comparison_date = "2023-12-08"
-            # 比较发布日期是否小于2023-10-06
-            if news_date < comparison_date:
-                continue
-            else:
                pass
-            news_come = one_news['source']
+            else:
-            news_code = one_news['code']
+                continue
+            for one_news in list_all:
+                news_title = one_news['title']
+                news_title = news_title.replace('<em>', '').replace('</em>', '')
+                news_date = one_news['date'][:10]
+                comparison_date = "2023-12-08"
+                # 比较发布日期是否小于2023-10-06
+                if news_date < comparison_date:
+                    continue
+                else:
+                    pass
+                news_come = one_news['source']
+                news_code = one_news['code']
-            news_url = f'https://data.eastmoney.com/report/zw_stock.jshtml?infocode={news_code}'
+                news_url = f'https://data.eastmoney.com/report/zw_stock.jshtml?infocode={news_code}'
-            news_res = requests.get(news_url)
+                news_res = requests.get(news_url)
-            news_soup = BeautifulSoup(news_res.content, 'html.parser')
+                news_soup = BeautifulSoup(news_res.content, 'html.parser')
-            try:
+                try:
-                if '抱歉，您访问的页面不存在或已删除！' in news_soup.title.text:
+                    if '抱歉，您访问的页面不存在或已删除！' in news_soup.title.text:
+                        continue
+                except:
                    continue
-            except:
+                try:
-                continue
+                    news_content = news_soup.find('div', {'class': 'newsContent'}).text.strip()
-            try:
+                except:
-                news_content = news_soup.find('div', {'class': 'newsContent'}).text.strip()
+                    news_content = news_soup.find('div', {'class': 'ctx-content'}).text.strip()
-            except:
-                news_content = news_soup.find('div', {'class': 'ctx-content'}).text.strip()
-            try:
+                try:
-                news_pdf = news_soup.find('div', {'class': 'detail-header'}).find_all('a')[-1].get('href')
+                    news_pdf = news_soup.find('div', {'class': 'detail-header'}).find_all('a')[-1].get('href')
-            except:
+                except:
-                news_pdf = news_soup.find('span', {'class': 'to-link'}).a.get('href')
+                    news_pdf = news_soup.find('span', {'class': 'to-link'}).a.get('href')
-            dic_post = {
+                dic_post = {
-                'title': news_title,  # 报告名称
+                    'title': news_title,  # 报告名称
-                'url_pdf': news_pdf,  # 报告链接
+                    'url_pdf': news_pdf,  # 报告链接
-                'year': news_date[:4],  # 报告年份
+                    'year': news_date[:4],  # 报告年份
-                'type_id': '4',  # 报告种类，（年报：1，季报：2，月报：3，研报：4）
+                    'type_id': '4',  # 报告种类，（年报：1，季报：2，月报：3，研报：4）
-                'item_id': social_code,  # 关联记录id，如：企业信用代码
+                    'item_id': social_code,  # 关联记录id，如：企业信用代码
-                'category': 'pdf',  # 文件后缀名，如：pdf
+                    'category': 'pdf',  # 文件后缀名，如：pdf
-                'create_by': 'TangYuHang',  # 创建人，使用驼峰命名，如：TangYuHang
+                    'create_by': 'TangYuHang',  # 创建人，使用驼峰命名，如：TangYuHang
-                'publishDate': news_date,  # 时间
+                    'publishDate': news_date,  # 时间
-                'origin': '东方财富网-研报中心',  # 来源
+                    'origin': '东方财富网-研报中心',  # 来源
-                'sourceAddress': news_url,  # 原文链接
+                    'sourceAddress': news_url,  # 原文链接
-                'content': '',  # 内容
+                    'content': '',  # 内容
-                'summary': news_content,  # 摘要
+                    'summary': news_content,  # 摘要
-                'sid': '1662008733005160449',  # 信息源id
+                    'sid': '1662008733005160449',  # 信息源id
-                'come': news_come,
+                    'come': news_come,
-            }
+                }
-            order_by = 1
+                order_by = 1
-            download(dic_post, order_by)
+                download(dic_post, order_by)
-            order_by += 1
+                order_by += 1
-        #     log.info(page,dic_post)
-        # url = 'http://114.115.155.139:5002/report_download'
-        # # report-list
-        # res = requests.post(url, data=json.dumps(dic_post))
-        # log.info(res.json())
-        # dic_news = {
-        #     '关键字':key_word,
-        #     '标题':news_title,
-        #     '时间':news_date,
-        #     '来源':news_come,
-        #     '摘要':news_content,
-        #     '原文链接':news_url,
-        #     'PDF链接':news_pdf,
-        # }
-        # list_all_info.append(dic_news)
-        # if len(list_all) != 10:
-        #     break
 # 东方财富网2
 def dongfangcaifu2():
@@ -1590,11 +1622,11 @@ def dongfangcaifu7():
 if __name__ == '__main__':
-    try:
+    # try:
-        log.info('mob')
+    #     log.info('mob')
-        Mob()
+    #     Mob()
-    except Exception as e:
+    # except Exception as e:
-        pass
+    #     pass
    # try:
    #     log.info('yidong_guanxiangtai')
    #     yidong_guanxiangtai()
@@ -1605,11 +1637,12 @@ if __name__ == '__main__':
    #     juliangsuanshu()
    # except Exception as e:
    #     pass
-    # try:
+    try:
-    #     log.info('ke36')
+        log.info('ke36')
-    #     ke36()
+        ke36()
-    # except:
+    except Exception as e:
-    #     pass
+        ke36()
+        pass
    # try:
    #     log.info('qianyanzhishiku')
    #     qianyanzhishiku()

--- a/comData/policylaw/gwyparts.py
+++ b/comData/policylaw/gwyparts.py
@@ -121,7 +121,7 @@ def get_content2():
                        except Exception as e:
                            log.info(f'---{href}--------{e}-------')
                            continue
-                        if '.ofd' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.pdf' in file_href:
+                        if '.wps' in file_href or '.ofd' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.pdf' in file_href:
                            file_name = file.text.strip()
                            category = os.path.splitext(file_href)[1]
                            if category not in file_name:

--- a/comData/shangbiao/tyc_shangbiao_zg500.py
+++ b/comData/shangbiao/tyc_shangbiao_zg500.py
+# 天眼查商标申请数量
+# 接口 https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_=1703216298337
+# 请求方式 POST
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查商标/中国500强'
+header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+if __name__ == "__main__":
+    while True:
+        start_time = time.time()
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ShangBiao:zg500shSocial_code')
+        # social_code = '91350700856994874M'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                    continue
+            # count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            t = int(time.time()*1000)
+            # url = f'https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_={t}'
+            url = f'https://capi.tianyancha.com/cloud-intellectual-property/trademark/statistics?_={t}&cgid={tycid}'
+            # tycid = '209252214'
+            # payload = {"id": tycid, "ps": 10, "pn": 1, "int_cls": "-100", "status": "-100", "app_year": "-100",
+            #            "regYear": "-100", "searchType": "-100", "category": "-100", "fullSearchText": "", "sortField": "",
+            #            "sortType": "-100"}
+            request = requests.get(url=url, headers=header, verify=False)
+            # request = requests.post(url=url, headers=header, data=payload)
+            # print(request.text)
+            data_json = request.json()
+            # print(data_json)
+            try:
+                all_data = data_json['data']['applyYearGraph']['statisticGraphData']
+            except:
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}----已经存在---无商标数据")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code) values (%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-----新增---无商标数据")
+                continue
+            for info in all_data:
+                year = info['desc']
+                num = info['num']  # 申请商标数量
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                    '年份': year,
+                    '数量': num
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' and year='{year}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}-------{year}---已经存在")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code,year,num) values (%s,%s,%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-------{year}---新增")
+                time.sleep(2)
+                # list_all_info.append(dic_info)
+            log.info(f"【{xydm}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)
--- a/comData/zhuanli/tyc_zhuanli_zg500.py
+++ b/comData/zhuanli/tyc_zhuanli_zg500.py
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查专利/国内上市'
+def spider_zhuanli(com_name, social_code, tycid, page, list_all_info):
+    start_time = time.time()
+    log.info(f'===正在处理第{page}页===')
+    # list_all_info = []
+    t = int(time.time() * 1000)
+    header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzI3MzczNzEzMSIsImlhdCI6MTcwMzE1MjEzMSwiZXhwIjoxNzA1NzQ0MTMxfQ.3tF-UFhorC_mS4h2UIBOZamApfcaJEfjBbr8K11d2yHhELBM1pEvjd6yccxhLzVKRoyFdTn-1Cz6__ZpzgjnGg',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+    url = f'https://capi.tianyancha.com/cloud-intellectual-property/patent/patentListV6?_={t}&id={tycid}&pageSize=100&pageNum={page}&type=-100&lprs=-100&applyYear=-100&pubYear=-100&fullSearchText=&sortField=&sortType=-100'
+    try:
+        ip = baseCore.get_proxy()
+    except:
+        time.sleep(2)
+        ip = baseCore.get_proxy()
+    try:
+        res_j = requests.get(url=url, headers=header, proxies=ip, verify=False).json()
+    except:
+        for i in range(3):
+            try:
+                res_j = requests.get(url=url, headers=header, verify=False).json()
+            except:
+                time.sleep(2)
+                continue
+    # print(res_j)
+    try:
+        list_all = res_j['data']['items']
+    except:
+        dic_info = {
+            '企业名称': com_name,
+            '统一信用代码': social_code
+        }
+        selectSql = f"select count(1) from zhuanli_sh_tyc where social_code='{social_code}' "
+        cursor.execute(selectSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info(f"{com_name}---{social_code}---已经存在---无专利")
+            return 0
+        else:
+            values_tuple = tuple(dic_info.values())
+            # log.info(f"{gpdm}-------{companyname}---新增")
+            insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code) values (%s,%s)"
+            cursor.execute(insertSql, values_tuple)
+            cnx.commit()
+            log.info(f"{com_name}---{social_code}---新增---无专利")
+        return 0
+    # print(list_all)
+    if list_all:
+        for one_zhuanli in list_all:
+            title = one_zhuanli['title']
+            try:
+                shenqingri = one_zhuanli['applicationTime']
+            except:
+                shenqingri = ''
+            try:
+                shenqing_code = one_zhuanli['patentNum']
+            except:
+                shenqing_code = ''
+            try:
+                leixing = one_zhuanli['patentType']
+            except:
+                leixing = ''
+            try:
+                status = one_zhuanli['lprs']
+            except:
+                status = ''
+            try:
+                gongkairi = one_zhuanli['pubDate']
+            except:
+                gongkairi = ''
+            try:
+                gongkai_code = one_zhuanli['pubnumber']
+            except:
+                gongkai_code = ''
+            try:
+                famingren = one_zhuanli['inventor']
+            except:
+                famingren = ''
+            try:
+                shenqingren = one_zhuanli['applicantName']
+            except:
+                shenqingren = ''
+            try:
+                gongneng = one_zhuanli['cat']
+            except:
+                gongneng = ''
+            try:
+                uuid = one_zhuanli['uuid']
+            except:
+                uuid = ''
+            dic_info = {
+                '企业名称': com_name,
+                '统一信用代码': social_code,
+                '专利名称': title,
+                '申请日': shenqingri,
+                '申请号': shenqing_code,
+                '专利类型': leixing,
+                '专利状态': status,
+                '公开日': gongkairi,
+                '公开号': gongkai_code,
+                '发明人': famingren,
+                '申请人': shenqingren,
+                '功能': gongneng,
+                '天眼查详情id': uuid,
+                '年份': shenqingri[:4]
+            }
+            selectSql = f"select count(1) from zhuanli_sh_tyc where shenqing_code='{shenqing_code}' "
+            cursor.execute(selectSql)
+            count = cursor.fetchone()[0]
+            if count > 0:
+                log.info(f"{com_name}-------{shenqing_code}---已经存在")
+                continue
+            else:
+                values_tuple = tuple(dic_info.values())
+                # log.info(f"{gpdm}-------{companyname}---新增")
+                insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code,title,shenqingri,shenqing_code,leixing,status,gongkairi,gongkai_code,famingren,shenqingren,gongneng,uuid,year) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+                cursor.execute(insertSql, values_tuple)
+                cnx.commit()
+                log.info(f"{com_name}-------{shenqing_code}---新增")
+            time.sleep(2)
+            # list_all_info.append(dic_info)
+        log.info(f"【{page}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        return page
+    else:
+        return 0
+if __name__ == "__main__":
+    while True:
+        list_all_info = []
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code_zg500')
+        # social_code = '91350700856994874M'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                    continue
+            count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            page = 1
+            while True:
+                page = spider_zhuanli(com_name, xydm, tycid, page, list_all_info)
+                if page != 0:
+                    page += 1
+                else:
+                    # print(len(list_all_info))
+                    # df_all_info = pd.DataFrame(list_all_info)
+                    # df_all_info.to_excel('中国上市企业专利.xlsx', index=False)
+                    log.info(f"{id}---{xydm}----{tycid}----结束处理")
+                    break
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)