Merge remote-tracking branch 'origin/master'

a38c9372 · LiuLiYuan · a98a7ff0 · 540a0e68 · a38c9372 · a38c9372
--- a/REITs_policyData/reits.py
+++ b/REITs_policyData/reits.py
@@ -464,7 +464,8 @@ def zhengquanqihuo():

 #上海交易所 http://www.sse.com.cn/home/search/index.shtml?webswd=REITs
 def sse():
-    url = 'http://query.sse.com.cn/search/getESSearchDoc.do?page=0&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C8685%2C9348%2C12632%2C12768%2C12769%2C12770%2C12771%2C12772%2C12773%2C12774%2C12775%2C12776%2C12777%2C12778%2C12779%2C12780%2C12781%2C12782%2C12783%2C12784%2C12785%2C12786%2C12787%2C12788%2C12789%2C12790%2C12791%2C12792%2C12793%2C12794%2C12795%2C12796%2C12797%2C12798%2C12799%2C12800%2C12801%2C12802%2C12803%2C12804%2C12805%2C12806%2C12807%2C12808%2C12809%2C12810%2C12811%2C12812%2C13061%2C13282%2C13283%2C13284%2C13285%2C13286%2C13287%2C13288%2C13289%2C13294%2C13364%2C13365%2C13366%2C13367%2C14595%2C14596%2C14597%2C14598%2C14599%2C14600%2C14601%2C14602%2C14603%2C14604%2C14605%2C14606&trackId=50619067167713018335655119683810&_=1699508921761'
+    # url = 'http://query.sse.com.cn/search/getESSearchDoc.do?page=0&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C8685%2C9348%2C12632%2C12768%2C12769%2C12770%2C12771%2C12772%2C12773%2C12774%2C12775%2C12776%2C12777%2C12778%2C12779%2C12780%2C12781%2C12782%2C12783%2C12784%2C12785%2C12786%2C12787%2C12788%2C12789%2C12790%2C12791%2C12792%2C12793%2C12794%2C12795%2C12796%2C12797%2C12798%2C12799%2C12800%2C12801%2C12802%2C12803%2C12804%2C12805%2C12806%2C12807%2C12808%2C12809%2C12810%2C12811%2C12812%2C13061%2C13282%2C13283%2C13284%2C13285%2C13286%2C13287%2C13288%2C13289%2C13294%2C13364%2C13365%2C13366%2C13367%2C14595%2C14596%2C14597%2C14598%2C14599%2C14600%2C14601%2C14602%2C14603%2C14604%2C14605%2C14606&trackId=50619067167713018335655119683810&_=1699508921761'
+    url = 'http://query.sse.com.cn/search/getESSearchDoc.do?page=0&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C12632&trackId=00752019013296307464953343505659&_=1703469889542'
    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate',
@@ -485,9 +486,13 @@ def sse():
    #     os.makedirs(path)
    for page in range(0, int(total_page)):
        t = int(time.time())
-        url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C12632&trackId=24278800487459370386559742313666&_={t}'
+        url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C12632&trackId=00752019013296307464953343505659&_={t}'
        data = policy.getrequest_json(headers, url_page)
        newslist = data['data']['knowledgeList']
+        # if newslist:
+        #     pass
+        # else:
+        #     continue
        # print(newslist)
        for news in newslist:
            num += 1
@@ -521,8 +526,8 @@ def sse():
                    content = ''
                    response = requests.get(newsUrl, timeout=20)
                    with fitz.open(stream=response.content, filetype='pdf') as doc:
-                        for page in doc.pages():
-                            content += page.get_text()
+                        for page_ in doc.pages():
+                            content += page_.get_text()
                    file_href = newsUrl
                    file_name = title

@@ -628,7 +633,7 @@ def sse():
                        for att_id in id_list:
                            baseCore.deliteATT(att_id)
            except Exception as e:
-                log.info(f"error！！！{newsUrl}")
+                log.info(f"error！！！{newsUrl}===={title}")
                log.info(e)
        log.info(f'====第{page}页====处理结束，================')

@@ -972,14 +977,14 @@ def guizhou():
 if __name__=="__main__":
    # file_path = f'data/REITs贵州省人民政府.xlsx'
    # wb = policy.createfile(file_path)
-    reform()
-    # shenzhen()
-    zhengquanqihuo()
+    # reform()
+    # # shenzhen()
+    # zhengquanqihuo()
    try:
        sse()
    except:
        pass
-    hebei()
-    guizhou()
+    # hebei()
+    # guizhou()

 # zhengquanqihuo()
\ No newline at end of file
--- a/REITs_policyData/start.py
+++ b/REITs_policyData/start.py
@@ -9,7 +9,7 @@ import LawRules_shenzhen, LawRules_2_shenzhen
 from REITs_policyData.policy_beijing import beijing


-if __name__ == "__mian__":
+if __name__ == "__main__":
    beijing()
    reits.sse()
    reits.reform()

--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -403,6 +403,7 @@ class BaseCore:
        sql = "select proxy from clb_proxy"
        self.cursor.execute(sql)
        proxy_lists = self.cursor.fetchall()
+        self.cnx.commit()
        ip_list = []
        for proxy_ in proxy_lists:
            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
@@ -472,6 +473,10 @@ class BaseCore:

    # 从Redis的List中获取并移除一个元素
    def redicPullData(self, key):
+        try:
+            self.r.ping()
+        except:
+            self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
        item = self.r.lpop(key)
        return item.decode() if item else None

@@ -658,6 +663,8 @@ class BaseCore:
            return 'cn'
        if result[0] == '':
            return 'cn'
+        if result[0] == 'ja':
+            return 'jp'
        return result[0]

    #创建excel文件
@@ -685,6 +692,10 @@ class BaseCore:

    # 对失败或者断掉的企业 重新放入redis
    def rePutIntoR(self, key, item):
+        try:
+            self.r.ping()
+        except:
+            self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
        self.r.rpush(key, item)

    # 增加计数器的值并返回增加后的值

--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
@@ -674,7 +674,7 @@ if __name__ == "__main__":
    # BaseInfoEnterprise()
    # FBS()
    # MengZhi()
-    # NQEnterprise()
+    NQEnterprise()
    # SEC_CIK()
    # dujioashou()
    # omeng()
@@ -683,6 +683,6 @@ if __name__ == "__main__":
    # AnnualEnterprise_task()
    # FinanceFromEast()
    # ipo_code()
-    JingyingfenxiFromEase()
+    # JingyingfenxiFromEase()
    log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时：{basecore.getTimeCost(start,time.time())}===')

--- a/comData/BaseInfo_qcc/baseinfo1113.py
+++ b/comData/BaseInfo_qcc/baseinfo1113.py
@@ -292,7 +292,7 @@ def dic_handle(result_dic):
    return aa_dict

 # 采集准备
-def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):

    # if social_code:
    #     dic_info = baseCore.getInfomation(social_code)
@@ -338,7 +338,7 @@ def redaytowork(com_name,social_code,securitiesCode, securitiesShortName, listin
        else:
            # 开始采集
            try:
-                if spiderwork(soup, com_name, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+                if spiderwork(soup, com_name, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
                    count += 1
                    log.info(f'采集{com_name}成功=======耗时{baseCore.getTimeCost(start_time, time.time())}')
                    token.updateTokeen(id_cookie,3)
@@ -373,7 +373,7 @@ def ifbeforename(company_url):
        return ''

 # 采集基本信息和工商信息
-def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
    qccid = company_url.split('firm/')[1].split('.html')[0]
    # 将采集到的企查查id更新
    updateSql = f"update EnterpriseInfo set QCCID = '{qccid}' where SocialCode = '{social_code}'"
@@ -463,7 +463,7 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
        aa_dic['listingDate'] = listingDate
        aa_dic['category'] = category
        aa_dic['exchange'] = exchange
-
+        aa_dic['listingType'] = listType
        # print(aa_dic)
        sendkafka(aa_dic)

@@ -482,11 +482,11 @@ def spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, ca
        aa_dic['listingDate'] = listingDate
        aa_dic['category'] = category
        aa_dic['exchange'] = exchange
-
+        aa_dic['listingType'] = listType
        sendkafka(aa_dic)

 # 判断名称是否统一
-def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name):
+def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name):
    company_url = ''
    try:
        company_list = soup.find('table', class_='app-ltable ntable ntable-list ntable ntable-list')
@@ -516,7 +516,7 @@ def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDat
        # company_url = 'https://www.qcc.com/firm/80af5085726bb6b9c7770f1e4d0580f4.html'
        # company_url = 'https://www.qcc.com/firm/50f75e8a8859e609ec37976f8abe827d.html'
        # 采集基本信息和工商信息
-        spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name)
+        spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name)
    else:
        # 判断是否是曾用名
        tr = tr_list[:1][0]
@@ -526,7 +526,7 @@ def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDat
        company_url = info_t.find('a')['href']
        beforename = ifbeforename(company_url)
        if beforename == receptname:
-            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name)
+            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange,listType, ynDomestic, countryName, file_name)
        else:
            #没有搜到相同的企业名称
            data = [com_name, social_code]
@@ -549,6 +549,7 @@ if __name__ == '__main__':
        else:
            log.info('==========已无cookies==========')
            time.sleep(30)
+
            continue
        id_cookie = cookieinfo[0]
        cookie_ = json.loads(cookieinfo[1])
@@ -579,8 +580,8 @@ if __name__ == '__main__':
        }
        start_time = time.time()
        # 获取企业信息
-        company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
-        # company_field = '91220101606092819L||'
+        # company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
+        company_field = '913300007125582210||'
        if company_field == 'end':
            # 本轮处理完毕，需要发送邮件，并且进入下一轮
            baseCore.sendEmail(file_name)
@@ -595,6 +596,11 @@ if __name__ == '__main__':
            while flag:
                log.info('--------已没有数据---------')
                time.sleep(30)
+                if not baseCore.check_mysql_conn(cnx_):
+                    # 144数据库
+                    cnx_ = baseCore.cnx
+                    cursor_ = cnx_.cursor()
+                    log.info('===11数据库重新连接成功===')
                company_field = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
                if company_field:
                    flag = False
@@ -604,26 +610,28 @@ if __name__ == '__main__':
            continue

        social_code = company_field.split('|')[0]
-        com_name = company_field.split('|')[2].replace(' ', '')
-
-        ynDomestic = company_field.split('|')[15]
-        countryName = company_field.split('|')[16]
-        securitiesCode = company_field.split('|')[17]
-        securitiesShortName = company_field.split('|')[18]
-        listingDate = company_field.split('|')[21]
-        category = company_field.split('|')[19]
-        exchange = company_field.split('|')[20]
-        # ynDomestic = ''
-        # countryName = ''
-        # securitiesCode = ''
-        # securitiesShortName = ''
-        # listingDate = ''
-        # category = ''
-        # exchange = ''
-
-        count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,ynDomestic, countryName, file_name)
+        com_name = company_field.split('|')[1].replace(' ', '')
+
+        # ynDomestic = company_field.split('|')[15]
+        # countryName = company_field.split('|')[16]
+        # securitiesCode = company_field.split('|')[17]
+        # securitiesShortName = company_field.split('|')[18]
+        # listingDate = company_field.split('|')[21]
+        # category = company_field.split('|')[19]
+        # exchange = company_field.split('|')[20]
+        # listType = company_field.split('|')[21]
+        ynDomestic = '1'
+        countryName = '中国内地'
+        securitiesCode = ''
+        securitiesShortName = ''
+        listingDate = ''
+        category = ''
+        exchange = ''
+        listType = ''
+
+        count = redaytowork(com_name, social_code, securitiesCode, securitiesShortName, listingDate, category, exchange,listType, ynDomestic, countryName, file_name)
        time.sleep(2)
-        # break
+        break
        # baseCore.r.close()
        # baseCore.sendEmail(file_name)
        # 信息采集完成后将该企业的采集次数更新

--- a/comData/BaseInfo_qcc/baseinfo1122.py
+++ b/comData/BaseInfo_qcc/baseinfo1122.py
@@ -516,7 +516,7 @@ def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDat
        # company_url = 'https://www.qcc.com/firm/80af5085726bb6b9c7770f1e4d0580f4.html'
        # company_url = 'https://www.qcc.com/firm/50f75e8a8859e609ec37976f8abe827d.html'
        # 采集基本信息和工商信息
-        spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, ynDomestic, countryName, file_name)
+        spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name)
    else:
        # 判断是否是曾用名
        tr = tr_list[:1][0]
@@ -526,7 +526,7 @@ def spiderwork(soup, receptname, securitiesCode, securitiesShortName, listingDat
        company_url = info_t.find('a')['href']
        beforename = ifbeforename(company_url)
        if beforename == receptname:
-            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange,listType, ynDomestic, countryName, file_name)
+            spiderinfo(company_url, securitiesCode, securitiesShortName, listingDate, category, exchange, listType, ynDomestic, countryName, file_name)
        else:
            #没有搜到相同的企业名称
            data = [com_name, social_code]

--- a/comData/BaseInfo_qcc/getcode.py
+++ b/comData/BaseInfo_qcc/getcode.py
+import pandas as pd
+# from pandas import DataFrame as df
+import pymysql
+
+cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+df_all = pd.read_excel('D:\\企业数据\\数据组提供\\第五批专精特新企业名单汇总_修订版_20240102.xlsx', dtype=str)
+list_com = []
+for num_df in range(len(df_all)):
+    com_name = str(df_all['企业名称'][num_df])
+
+    dic_com = {
+        'social_code': '',
+        'com_name': com_name
+    }
+
+    with cnx.cursor() as cursor:
+        sel_sql = '''select social_credit_code from sys_base_enterprise where name = %s '''
+        cursor.execute(sel_sql, com_name)
+        selects = cursor.fetchone()
+        if selects:
+            print(f'【{num_df}/{len(df_all)}】==={com_name}找到')
+            social_code = selects[0]
+        else:
+            print(f'【{num_df}/{len(df_all)}】==={com_name}未找到')
+            social_code = ''
+
+    df_all['信用代码'][num_df] = str(social_code)
+    df_all.to_excel('D:\\企业数据\\数据组提供\\第五批专精特新企业名单汇总_修订版_20240102.xlsx', index=False)
\ No newline at end of file
--- a/comData/Tyc/getTycId.py
+++ b/comData/Tyc/getTycId.py
@@ -28,7 +28,7 @@ headers = {
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
-        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTY5OTkyNTk5NywiZXhwIjoxNzAyNTE3OTk3fQ.9iXmxFEiBdu2WYa7RwdU0xKKx7v_wBe9-QipH0TNKp9Dzk_2cZK1ESsmO1o8ICrddb5sx2cl5pjOBoaaf_9Qsg',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ',
        'X-TYCID': '6f6298905d3011ee96146793e725899d',
        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        'sec-ch-ua-mobile': '?0',

--- a/comData/Tyc/tyc_qydt_add.py
+++ b/comData/Tyc/tyc_qydt_add.py
@@ -38,7 +38,7 @@ headers = {
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
-        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTY5OTkyNTk5NywiZXhwIjoxNzAyNTE3OTk3fQ.9iXmxFEiBdu2WYa7RwdU0xKKx7v_wBe9-QipH0TNKp9Dzk_2cZK1ESsmO1o8ICrddb5sx2cl5pjOBoaaf_9Qsg',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ',
        'X-TYCID': '6f6298905d3011ee96146793e725899d',
        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        'sec-ch-ua-mobile': '?0',
@@ -70,7 +70,7 @@ def beinWork(tyc_code, social_code,start_time):
            pass
    except Exception as e:
        #todo:重新放入redis中
-        baseCore.rePutIntoR('NoticeEnterprise:gnqy_socialCode',social_code)
+        baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode',social_code)
        log.error(f"{tyc_code}-----获取总数接口失败")
        error = '获取总数接口失败'
        state = 0
@@ -302,10 +302,11 @@ def doJob():
                continue
            id = data[0]
            xydm = data[2]
+            com_name = data[1]
            tycid = data[11]
            if tycid == None or tycid == '':
                try:
-                    retData = getTycIdByXYDM(xydm)
+                    retData = getTycIdByXYDM(com_name)
                    if retData['tycData'] and retData['reput']:
                        tycid = retData['tycData']['id']
                        # todo:写入数据库

--- a/comData/YanBao/att_id.py
+++ b/comData/YanBao/att_id.py
@@ -43,7 +43,7 @@ class EsMethod(object):
                   "must": [
                       {
                           "match": {
-                               "type": "1"
+                               "type": "0"
                           }
                       }
                   ]
@@ -115,7 +115,7 @@ def main(page, p, esMethod):
        attid = mms['_source']['attachmentIds'][0]

        log.info(f'{id}-{attid}--{title}--{sourceAddress}---')
-        selects = secrchATT('1', attid)
+        selects = secrchATT('4', attid)
        if selects:
            pass
        else:

--- a/comData/YanBao/resentYanbao.py
+++ b/comData/YanBao/resentYanbao.py
--- a/comData/annualReport_US/uploadfile.py
+++ b/comData/annualReport_US/uploadfile.py
@@ -53,12 +53,12 @@ class EsMethod(object):
                      # 'hits.hits._source.createDate',
                      # 'hits.hits._source.publishDate',
                      ]  # 字段2
-       result = self.es.search(index=index_name
+       resultb = self.es.search(index=index_name
                               , doc_type='_doc'
                               , filter_path=filter_path
                               , body=body)
       # log.info(result)
-       return result
+       return resultb

    def updateaunn(self, index_name, id, content, contentWithTag):
        body = {
@@ -67,24 +67,28 @@ class EsMethod(object):
                'contentWithTag': contentWithTag
            }
        }
-        result = self.es.update(index=index_name
+        resulta = self.es.update(index=index_name
                                ,id=id
                                ,body=body)
-        log.info('更新结果:%s' % result)
+        log.info('更新结果:%s' % resulta)

 def paserUrl(html,listurl):
    # soup = BeautifulSoup(html, 'html.parser')
    # 获取所有的<a>标签和<img>标签
    links = html.find_all(['a', 'img'])
+    print(len(links))
    # 遍历标签，将相对地址转换为绝对地址
    for link in links:
+        print(link)
        if 'href' in link.attrs:
-            link['href'] = urljoin(listurl, link['href'])
+            # link['href'] = urljoin(listurl, link['href'])
+            pass
        elif 'src' in link.attrs:
-            link['src'] = urljoin(listurl, link['src'])
+            pass
+            # link['src'] = urljoin(listurl, link['src'])
    return html

-def get_news(news_url,ip_dic):
+def get_news(news_url,sourceAddress,id):
    header = {
        'Host': 'www.sec.gov',
        'Connection': 'keep-alive',
@@ -102,30 +106,44 @@ def get_news(news_url,ip_dic):
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': '_gid=GA1.2.385814648.1694135927; _ga_300V1CHKH1=GS1.1.1694135927.6.1.1694136598.0.0.0; _ga=GA1.1.733439486.1693211261; _4c_=%7B%22_4c_s_%22%3A%22dZJbj9owEIX%2FCvJDngj4EowTKaqqVKq20vbe7SMK9pBYC3HkGLwU8d9rQ%2Bh2V61fEn9z5vjInhPyLXSoIDzPCOMcYyHwFD3CcUDFCVmt4ueACqRqlinOcMprxtOsZos0ZwpSIYQUQi0WFDCaoqfgtcQ4F0vKCRX0PEWqu3lYUDDopnupE5xSHnS6d6MwpGEsx8Ez4%2BKmJYTzK4nam2WN%2Flm3%2FmZ1Kyxyxl9KIwnS3r4%2B9b9S2Y%2FSE5JGQTie5DMiZjjdDCGH%2BxVIJuI19NaovXQrd%2ByjzMN6MqjHUFBw0BJWXivXXvopfqYt6KZ1EeOLi4rZEAl%2FXnfK%2BNdtI%2F3TlrOoXVvjB4idVWvNDiaELAI24UXRz0tHDGthA9ZeZK1z%2FVDM59772QBy1pjDXDY6XetufjVLQTW1fSPNrq%2B7Y%2Fnh832yq51sy8HV1g2p165NNnoL3X5XJt9c7aBMKrPvnD2G%2FV1VJruj8R3YEp7kdq8gqaXTpisbcKNryDRoF29rzDCCMItXll7Zg45UTb5XXwP%2F%2BBf5Un26H9H7t6sfd%2B%2FCZslYxvJM8Fl8XkpIGEt0vr5umHlKaR5WFqbMuS0qBM9wXOfz%2BTc%3D%22%7D'
    }
-    response = requests.get(url=news_url,headers=header,verify=False,timeout=30)
+    response = requests.get(url=news_url,headers=header,verify=False)
+    # aa = response.text
+    # print(response.text)
    # response = requests.get(url=news_url, verify=False, proxies=ip_dic, timeout=30)
    if response.status_code == 200:
        # 请求成功，处理响应数据
        # print(response.text)
-        result = BeautifulSoup(response.content,'html.parser')
+        # result_ = BeautifulSoup(response.content,'html.parser')
+        result_ = BeautifulSoup(response.text, 'lxml')
        # print(result)
        pass
    else:
        # 请求失败，输出错误信息
        log.info('请求失败:', response.status_code, response.text)
-        result = ''
-    return result
+        result_ = ''
+    if result_:
+        pass
+    # 相对路径转化为绝对路径
+    # soup = paserUrl(result_, sourceAddress)
+    time.sleep(2)
+    content = result_.text.strip()
+    # del(result_)
+    # content = result_
+    # print(content)
+    time.sleep(2)
+    esMethod.updateaunn(esMethod.index_name, str(id), content, str(result_))

 def main(esMethod):
    redis_conn = redis.Redis(connection_pool=pool)
    id_ = redis_conn.lpop('NianbaoUS:id')
-    id = id_.decode()
+
    # id = "23101317164"
-    if id:
+    if id_:
        pass
    else:
        log.info('已无数据')
-        return
+        return False
+    id = id_.decode()
    result_ = esMethod.queryatt(index_name=esMethod.index_name,id=id)
    result = result_['hits']['hits'][0]
    num = 0
@@ -135,17 +153,8 @@ def main(esMethod):
    log.info(f'====={title}=={social_code}===正在更新===')
    sourceAddress = result['_source']['sourceAddress']
    ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
-    soup = get_news(sourceAddress,ip_dic)
-    if soup:
-        pass
-    else:
-        return
-    # 相对路径转化为绝对路径
-    soup = paserUrl(soup, sourceAddress)
-    content = soup.text.strip()
-
-    esMethod.updateaunn(esMethod.index_name, str(id), content, str(soup))
-    return
+    get_news(sourceAddress,sourceAddress,id)
+    return True


 def run_threads(num_threads,esMethod):
@@ -164,6 +173,9 @@ if __name__ == '__main__':
    while True:
        esMethod = EsMethod()
        start = time.time()
-        num_threads = 5
-        run_threads(num_threads,esMethod)
-        log.info(f'5线程 总耗时{time.time()-start}秒')
\ No newline at end of file
+        # num_threads = 5
+        # run_threads(num_threads,esMethod)
+        # log.info(f'5线程 总耗时{time.time()-start}秒')
+        result = main(esMethod)
+        if not result:
+            break
\ No newline at end of file
--- a/comData/bond_zjh/zhaiquan.py
+++ b/comData/bond_zjh/zhaiquan.py
-# 证监会沪市、gong深市 公司债券和企业债券采集
-"""
-证监会企业名单
-"""
+# 证监会沪市、深市 公司债券和企业债券采集
 import time
 import random
 import requests
@@ -25,7 +22,7 @@ cursor = baseCore.cursor
 cnx_ = baseCore.cnx_
 cursor_ = baseCore.cursor_

-taskType = '企业名单/证监会'
+taskType = '企业债券/证监会'

 def createDriver():
    chrome_driver = r'D:\cmd100\chromedriver.exe'
@@ -136,7 +133,8 @@ def SpiderByZJH(url, start_time):  # dic_info 数据库中获取到的基本信
    page = soup.find('div', class_='pages').find_all('li')[-1]
    total = page.find('b').text

-    for i in range(1,int(total)+1):
+    # for i in range(1,int(total)+1):
+    for i in range(224, 225):
        log.info(f'==========正在采集第{i}页=========')
        if i == 1:
            href = url
@@ -241,7 +239,7 @@ if __name__ == '__main__':
        # url_parms = ['201010', '201014']
        # url_parms = ['201011', '201013']
        url_parms = ['201411', '201414', '202011', '202014']
-        # url_parms = ['202011', '202014']
+        # url_parms = ['201411']
        for url_parm in url_parms:
            url = getUrl(url_parm)


--- a/comData/caiwushuju/yfinance_.py
+++ b/comData/caiwushuju/yfinance_.py
+import yfinance as yf
+
+# 获取股票数据
+stock = yf.Ticker("MET")
+
+# 获取资产负债表数据
+balance_sheet = stock.balance_sheet
+
+# 获取报告日期
+report_dates = balance_sheet.index
+print(report_dates)
+
+# 获取现金流量表数据
+cashflow_statement = stock.cashflow
+
+# 获取利润表数据
+income_statement = stock.financials
+
+print(balance_sheet)
+print(cashflow_statement)
+print(income_statement)
+
+
+# import yfinance as yf
+#
+# # 获取股票数据
+# stock = yf.Ticker("AAPL")
+#
+# # 获取历史价格数据
+# historical_prices = stock.history(period="max")
+#
+# # 获取市值数据
+# market_cap = stock.info["marketCap"]
+#
+# print(historical_prices)
+# print(market_cap)
+
+# import yfinance as yf
+#
+# # 获取股票数据
+# stock = yf.Ticker("AAPL")
+#
+# # 获取历史价格数据
+# historical_prices = stock.history(period="max")
+#
+# # 获取市值数据
+# market_cap = stock.info["marketCap"]
+#
+# print(historical_prices)
+# print(market_cap)
+
+
--- a/comData/dingzhi/bmfw.py
+++ b/comData/dingzhi/bmfw.py
@@ -57,8 +57,8 @@ def page_list():
        'Content-Length': '25',
        'x-tif-openid': 'ojyj-40u1IEK5a2CSK7_Pg31ySks',
        'x-tif-did': 'u8Ajuqdyap',
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309080f)XWEB/8501',
-        'x-tif-sid': '755e67ddc8f86552d3f8d356fe22721cc5',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090819)XWEB/8519',
+        'x-tif-sid': 'ee270e93c3636dc3f281da8e0603db6a63',
        'Content-Type': 'application/json',
        'xweb_xhr': '1',
        'dgd-pre-release': '0',
@@ -69,11 +69,11 @@ def page_list():
        'Sec-Fetch-Site': 'cross-site',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Dest': 'empty',
-        'Referer': 'https://servicewechat.com/wxbebb3cdd9b331046/748/page-frame.html',
+        'Referer': 'https://servicewechat.com/wxbebb3cdd9b331046/750/page-frame.html',
        'Accept-Encoding': 'gzip, deflate, br'
    }
    url='https://xcx.www.gov.cn/ebus/gwymp/api/r/faqlib/GetPolicyList'
-    for i in range(1,453):
+    for i in range(1,2):
        log.info(f'采集第{i}页数据')
        k=i
        da='{"filterType":"","departmentid":"","keyword":"","page_size":15,"page":[k]}'
@@ -110,8 +110,8 @@ def detailpaser(dmsg):
        'Content-Length': '25',
        'x-tif-openid': 'ojyj-40u1IEK5a2CSK7_Pg31ySks',
        'x-tif-did': 'u8Ajuqdyap',
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309080f)XWEB/8501',
-        'x-tif-sid': '755e67ddc8f86552d3f8d356fe22721cc5',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090819)XWEB/8519',
+        'x-tif-sid': 'ee270e93c3636dc3f281da8e0603db6a63',
        'Content-Type': 'application/json',
        'xweb_xhr': '1',
        'dgd-pre-release': '0',
@@ -122,7 +122,7 @@ def detailpaser(dmsg):
        'Sec-Fetch-Site': 'cross-site',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Dest': 'empty',
-        'Referer': 'https://servicewechat.com/wxbebb3cdd9b331046/748/page-frame.html',
+        'Referer': 'https://servicewechat.com/wxbebb3cdd9b331046/750/page-frame.html',
        'Accept-Encoding': 'gzip, deflate, br'
    }
    try:

--- a/comData/dingzhi/see_measures.py
+++ b/comData/dingzhi/see_measures.py
+import json
+import time
+import uuid
+
+import pymysql
+import redis
+import requests
+from kafka import KafkaProducer
+
+import urllib3
+urllib3.disable_warnings()
+from obs import ObsClient
+import fitz
+
+import sys
+sys.path.append('D:\\kkwork\\zzsn_spider\\base')
+import BaseCore
+baseCore = BaseCore.BaseCore()
+log = baseCore.getLogger()
+r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=5)
+# cnx = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
+cnx = baseCore.cnx_
+obsClient = ObsClient(
+    access_key_id='VEHN7D0TJ9316H8AHCAV',  # 你的华为云的ak码
+    secret_access_key='heR353lvSWVPNU8pe2QxDtd8GDsO5L6PGH5eUoQY',  # 你的华为云的sk
+    server='https://obs.cn-north-1.myhuaweicloud.com'  # 你的桶的地址
+)
+
+pathType = 'CrowDingZhi/'
+headers = {
+    'Accept': '*/*',
+    'Accept-Encoding': 'gzip, deflate',
+    'Accept-Language': 'zh-CN,zh;q=0.9',
+    'Connection': 'keep-alive',
+    'Cookie': 'ba17301551dcbaf9_gdp_user_key=; gdp_user_id=gioenc-9a36dgc8%2C6b5d%2C5265%2Ccdc5%2C2ea193d9g222; ba17301551dcbaf9_gdp_session_id_878c2669-93f0-43bd-91c1-cc30ca7136ef=true; ba17301551dcbaf9_gdp_session_id_194d0e44-fe9b-48e5-b10a-8ed88066d31e=true; ba17301551dcbaf9_gdp_session_id_6b4b8111-8bf8-454e-9095-e16e285874b9=true; ba17301551dcbaf9_gdp_session_id_1bb9733b-f7c9-4f8d-b375-d393646e7329=true; ba17301551dcbaf9_gdp_session_id_7c08264f-759e-4cf8-b60b-ba1894f4a647=true; ba17301551dcbaf9_gdp_session_id_cbae63ce-6754-4b86-80e8-435ec24dde71=true; ba17301551dcbaf9_gdp_session_id_371e25f6-19a8-4e37-b3a9-fafb0236b2ac=true; ba17301551dcbaf9_gdp_session_id_d5257d90-edc8-4bd6-9625-d671f80c853f=true; ba17301551dcbaf9_gdp_session_id_26c35bee-808e-4a4d-a3dd-25ad65896727=true; ba17301551dcbaf9_gdp_session_id=c1b0f1df-857f-413a-b51b-2f7fda8bb882; ba17301551dcbaf9_gdp_session_id_c1b0f1df-857f-413a-b51b-2f7fda8bb882=true; ba17301551dcbaf9_gdp_sequence_ids={%22globalKey%22:220%2C%22VISIT%22:11%2C%22PAGE%22:23%2C%22CUSTOM%22:69%2C%22VIEW_CLICK%22:118%2C%22VIEW_CHANGE%22:3}',
+    'Host': 'query.sse.com.cn',
+    'Referer': 'http://www.sse.com.cn/',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+}
+
+
+def convert_size(size_bytes):
+    # 定义不同单位的转换值
+    units = ['bytes', 'KB', 'MB', 'GB', 'TB']
+    i = 0
+    while size_bytes >= 1024 and i < len(units) - 1:
+        size_bytes /= 1024
+        i += 1
+    return f"{size_bytes:.2f} {units[i]}"
+
+
+def getuuid():
+    get_timestamp_uuid = uuid.uuid1()  # 根据 时间戳生成 uuid , 保证全球唯一
+    return get_timestamp_uuid
+
+
+# 数据入库，返回主键id传到kafka中
+def tableUpdate(year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status,
+                create_by, create_time, come, page_size):
+    with cnx.cursor() as cursor:
+        Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,source,page_size,object_key,bucket_name) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+
+        values = (
+        year, name_pdf, type_id, item_id, group_name, path, full_path, category, file_size, order_by, status, create_by,
+        create_time, come, page_size, full_path.split('https://zzsn.obs.cn-north-1.myhuaweicloud.com/')[1], 'zzsn')
+        # log.info(values)
+        cursor.execute(Upsql, values)  # 插入
+        cnx.commit()  # 提交
+
+        querySql = '''select id from clb_sys_attachment where type_id=15 and full_path = %s'''  # and stock_code = "01786.HK"
+        cursor.execute(querySql, full_path)
+        selects = cursor.fetchone()
+        pdf_id = selects[0]
+    # cnx.close()
+    # log.info("更新完成:{}".format(pdf_id))
+    return pdf_id
+
+def uptoOBS(pdf_url, name_pdf, type_id, pathType, category):
+    retData = {'state': False, 'type_id': type_id, 'group_name': '', 'path': '',
+               'full_path': '',
+               'category': category, 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
+               'create_time': '', 'page_size': '', 'content': ''}
+    for i in range(0, 3):
+        try:
+            ip = baseCore.get_proxy()
+            # response = requests.get(pdf_url, headers=headers, verify=False, timeout=20)
+            response = requests.get(pdf_url)
+            file_size = int(response.headers.get('Content-Length'))
+            break
+        except:
+            time.sleep(3)
+            continue
+    for i in range(0, 3):
+        try:
+            name = str(getuuid()) + '.' + category
+            now_time = time.strftime("%Y-%m")
+            result = obsClient.putContent('zzsn', pathType + name, content=response.content)
+            if category == 'pdf':
+                with fitz.open(stream=response.content, filetype='pdf') as doc:
+                    page_size = doc.page_count
+                    for page in doc.pages():
+                        retData['content'] += page.get_text()
+                break
+            else:
+                page_size = 0
+                retData['content'] = ''
+                break
+        except Exception as e:
+            time.sleep(3)
+            continue
+    try:
+        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        retData['state'] = True
+        retData['path'] = result['body']['objectUrl'].split('.com')[1]
+        retData['full_path'] = result['body']['objectUrl']
+        retData['file_size'] = convert_size(file_size)
+        retData['create_time'] = time_now
+        retData['page_size'] = page_size
+    except Exception as e:
+        log.info(f'error---{e}')
+        return retData
+
+    return retData
+
+if __name__ == "__main__":
+    num = 0
+    t = int(time.time()*1000)
+    url_ = f'http://query.sse.com.cn/commonSoaQuery.do?&isPagination=true&pageHelp.pageSize=25&pageHelp.pageNo=1&pageHelp.beginPage=1&pageHelp.cacheSize=1&pageHelp.endPage=1&sqlId=BS_KCB_GGLL&siteId=28&channelId=10007%2C10008%2C10009%2C10010&type=&stockcode=&extTeacher=&extWTFL=&createTime=&createTimeEnd=&order=createTime%7Cdesc%2Cstockcode%7Casc&_={t}'
+    req_ = requests.get(url=url_, headers=headers)
+    data_json = req_.json()
+    print(data_json)
+    pageCount = data_json['pageHelp']['pageCount']
+    for i in range(1,int(pageCount + 1)):
+        url = f'http://query.sse.com.cn/commonSoaQuery.do?&isPagination=true&pageHelp.pageSize=25&pageHelp.pageNo={i}&pageHelp.beginPage={i}&pageHelp.cacheSize=1&pageHelp.endPage={i}&sqlId=BS_KCB_GGLL&siteId=28&channelId=10007%2C10008%2C10009%2C10010&type=&stockcode=&extTeacher=&extWTFL=&createTime=&createTimeEnd=&order=createTime%7Cdesc%2Cstockcode%7Casc&_={t}'
+        req = requests.get(url=url, headers=headers)
+        data_list = req.json()['result']
+        for info in data_list:
+            publishDate = info['cmsOpDate']  # 处理日期
+            year = publishDate[:4]
+            com = '上海证券交易所'
+            docTitle = info['docTitle']  # 处理事由
+            docType = info['docType']  # 文档类型
+            docURL = "http://" + info['docURL']  # 链接 http://www.sse.com.cn/disclosure/credibility/supervision/measures/focus/c/f409d7c0-2726-47d1-ac5e-120a9cdb0727.pdf
+            flag = r.sismember('IN-20231227-0001', docURL)
+            if flag:
+                log.info('信息已采集入库过')
+                continue
+            # 上传至obs
+            retData = uptoOBS(docURL, docTitle, 15, pathType, docType)
+            if retData['state']:
+                pass
+            else:
+                log.info(f'====pdf解析失败====')
+                continue
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            page_size = retData['page_size']
+            path = retData['path']
+            full_path = retData['full_path']
+            file_size = retData['file_size']
+            create_by = retData['create_by']
+            content = retData['content']
+            status = 1
+            num += 1
+            create_time = time_now
+            # 上传到附件表
+            att_id = tableUpdate(year, docTitle+'.'+docType, 15, '', '', path, full_path, docType, file_size, num, status, create_by, create_time, com, page_size)
+            if att_id:
+                pass
+            else:
+                continue
+            time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+            sid = '1739914218978594817'
+            info_code = "IN-20231227-0001"
+
+            dic_news = {
+                'attachmentIds': str(att_id),
+                'content': content,
+                'contentWithTag': '',
+                'id': '',
+                'origin': com,
+                'publishDate': publishDate,
+                'sid': sid,
+                'sourceAddress': docURL,
+                'title': docTitle,
+                'source':'16',
+                'type': ''
+            }
+
+            # 将相应字段通过kafka传输保存
+            try:
+                producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+                kafka_result = producer.send("crawlerInfo",
+                                             json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+
+                log.info(kafka_result.get(timeout=10))
+            except Exception as e:
+                log.info(e)
+                log.info(f'传输失败：{dic_news["title"]}、{dic_news["publishDate"]}')
+            dic_result = {
+                'success': 'ture',
+                'message': '操作成功',
+                'code': '200',
+            }
+            log.info(dic_result)
+            r.sadd(info_code, docURL)
+            continue
--- a/comData/important_meeting/zyqmshggldxzhy19.py
+++ b/comData/important_meeting/zyqmshggldxzhy19.py
+# 中央全面深化改革委员会会议
+import json
+import time
+
+import requests
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+from kafka import KafkaProducer
+
+headers = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cache-Control': 'max-age=0',
+        'Connection': 'keep-alive',
+        'Cookie': 'cna=HcAKHtgXUG4CAQHBO1G6ZJYK',
+        'Host': 'www.12371.cn',
+        'Sec-Fetch-Dest': 'document',
+        'Sec-Fetch-Mode': 'navigate',
+        'Sec-Fetch-Site': 'none',
+        'Sec-Fetch-User': '?1',
+        'Upgrade-Insecure-Requests': '1',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"'
+}
+
+
+if __name__ == "__main__":
+        # 中央全面深化改革委员会会议
+
+        # 中央全面深化改革领导小组会议
+        # url_list = ['https://www.12371.cn/special/zyqmshggldxzhy19/', 'https://www.12371.cn/special/zyqmshggldxzhy19/']
+        url_list = ['https://www.12371.cn/special/zyqmshggldxzhy19/']
+        for url in url_list:
+                request = requests.get(url=url, headers=headers)
+                soup = BeautifulSoup(request.content, 'html.parser')
+                request.encoding = request.apparent_encoding
+                # print(soup)
+                info_html = soup.find('div', id='SUBD1663831285709121').find('ul', class_='ul_list')
+                ul_list = info_html.find_all('li')
+                for ul in ul_list:
+                        publishDate_ = str(ul.find('span').text)
+                        date_obj= datetime.strptime(publishDate_, "%Y年%m月%d日")
+                        publishDate = date_obj.strftime('%Y-%m-%d')
+                        year = int(publishDate[:4])
+                        if year < 2023:
+                                continue
+                        newsUrl = ul.find('a')['href']
+                        summary = ul.find('a').text
+                        # todo: 链接判重
+                        news_request = requests.get(url=newsUrl, headers=headers)
+                        news_soup = BeautifulSoup(news_request.content, 'html.parser')
+                        print(news_soup)
+                        title = news_soup.find('h1', class_='big_title').text
+                        source = news_soup.find('div', class_='title_bottom').find('i').text
+                        contentwithTag = news_soup.find('div', class_='word')
+                        content = contentwithTag.text
+                        if url == 'https://www.12371.cn/special/zyqmshggldxzhy19/':
+                                sid = '1691633319715676162'
+                        else:
+                                sid = '1691633869186277378'
+                        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                        dic_info ={
+                                'id': '1681549361661489154' + str(int(time.time()*1000)),
+                                'title': title,
+                                'origin': source,
+                                'contentWithTag': str(contentwithTag),
+                                'content': content,
+                                'summary': summary,
+                                'publishDate': publishDate,
+                                'sid': sid,
+                                'subjectId': '1681549361661489154',
+                                'sourceAddress':newsUrl,
+                                'checkStatus': 1,
+                                'deleteFlag': 0,
+                                'createDate': time_now,
+
+                        }
+                        producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+                        try:
+                                kafka_result = producer.send("research_center_fourth",
+                                                             json.dumps(dic_info, ensure_ascii=False).encode('utf8'))
+                                # r.sadd(info_code + '-test', sourceAddress)
+                                print('发送kafka结束')
+                        except Exception as e:
+                                print(e)
+                                print('发送kafka异常！')
+                        finally:
+                                producer.close()
\ No newline at end of file
--- a/comData/policylaw/deletedup.py
+++ b/comData/policylaw/deletedup.py
@@ -27,29 +27,19 @@ class EsMethod(object):
    def __init__(self):
        # 创建Elasticsearch对象，并提供账号信息
        self.es = Elasticsearch(['http://114.116.19.92:9700'], http_auth=('elastic', 'zzsn9988'), timeout=300)
-        self.index_name = 'policy'
+        self.index_name = 'researchreportdata'

    def queryatt(self,index_name,pnum):
       body = {
            "query": {
+
               "bool": {
                   "must": [
                       {
-          "nested" : {
-          "query" : {
-            "bool" : {
-              "must" : [
-                {
-                  "match_phrase" : {
-                    "labels.relationId" : {
-                      "query" : "1698"
-                    }
-                  }
+                           "term": {
+                               "sid.keyword": {
+                                   "value": "1662008524476948481"
                               }
-              ]
-            }
-          },
-          "path" : "labels"
                           }
                       }
                   ]
@@ -112,7 +102,7 @@ def main(page, p, esMethod):
    unique_document_ids = [bucket["duplicate_docs"]["hits"]["hits"][-1]["_id"] for bucket in documents]
    # 删除重复的文档
    for doc_id in unique_document_ids:
-        esMethod.delete(index_name="policy", id=doc_id)
+        esMethod.delete(index_name="researchreportdata", id=doc_id)




--- a/comData/policylaw/gwyparts.py
+++ b/comData/policylaw/gwyparts.py
@@ -121,7 +121,7 @@ def get_content2():
                        except Exception as e:
                            log.info(f'---{href}--------{e}-------')
                            continue
-                        if '.ofd' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.pdf' in file_href:
+                        if '.wps' in file_href or '.ofd' in file_href or '.docx' in file_href or '.doc' in file_href or 'xls' in file_href or '.zip' in file_href or '.rar' in file_href or '.ppt' in file_href or '.PDF' in file_href or '.DOC' in file_href or '.XLS' in file_href or '.ZIP' in file_href or '.RAR' in file_href or '.pdf' in file_href:
                            file_name = file.text.strip()
                            category = os.path.splitext(file_href)[1]
                            if category not in file_name:

--- a/comData/shangbiao/tyc_shangbiao.py
+++ b/comData/shangbiao/tyc_shangbiao.py
+# 天眼查商标申请数量
+# 接口 https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_=1703216298337
+# 请求方式 POST
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查商标/国内上市'
+
+header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+
+if __name__ == "__main__":
+    while True:
+        start_time = time.time()
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        # social_code = baseCore.redicPullData('ShangBiao:gnshSocial_code')
+        social_code = '91130629MA0CG2DL51'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ShangBiao:gnshSocial_code', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('ShangBiao:gnshSocial_code', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('ShangBiao:gnshSocial_code', social_code)
+                    continue
+            # count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            t = int(time.time()*1000)
+            # url = f'https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_={t}'
+            url = f'https://capi.tianyancha.com/cloud-intellectual-property/trademark/statistics?_={t}&cgid={tycid}'
+            # tycid = '209252214'
+            # payload = {"id": tycid, "ps": 10, "pn": 1, "int_cls": "-100", "status": "-100", "app_year": "-100",
+            #            "regYear": "-100", "searchType": "-100", "category": "-100", "fullSearchText": "", "sortField": "",
+            #            "sortType": "-100"}
+            request = requests.get(url=url, headers=header, verify=False)
+            # request = requests.post(url=url, headers=header, data=payload)
+            # print(request.text)
+            data_json = request.json()
+            # print(data_json)
+            try:
+                all_data = data_json['data']['applyYearGraph']['statisticGraphData']
+            except:
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}----已经存在---无商标数据")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code) values (%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-----新增---无商标数据")
+                continue
+            for info in all_data:
+                year = info['desc']
+                num = info['num']  # 申请商标数量
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                    '年份': year,
+                    '数量': num
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' and year='{year}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}-------{year}---已经存在")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code,year,num) values (%s,%s,%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-------{year}---新增")
+                time.sleep(2)
+                # list_all_info.append(dic_info)
+            log.info(f"【{xydm}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ShangBiao:gnshSocial_code', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)
+
+        break
\ No newline at end of file
--- a/comData/shangbiao/tyc_shangbiao_zg500.py
+++ b/comData/shangbiao/tyc_shangbiao_zg500.py
+# 天眼查商标申请数量
+# 接口 https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_=1703216298337
+# 请求方式 POST
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查商标/中国500强'
+
+header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxODcwMzc1MjYwMCIsImlhdCI6MTcwMjcxMjg4MywiZXhwIjoxNzA1MzA0ODgzfQ.mVTR6Wz7W_IBjf4rLYhKacG9CRxGTzIGKmlqrR9jN-_t0Z4vUYVYwOTMzo7vT9IClJELruhl4d31KBHX0bZ1NQ',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+
+if __name__ == "__main__":
+    while True:
+        start_time = time.time()
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ShangBiao:zg500shSocial_code')
+        # social_code = '91350700856994874M'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+                    continue
+            # count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            t = int(time.time()*1000)
+            # url = f'https://capi.tianyancha.com/cloud-intellectual-property/intellectualProperty/trademarkList?_={t}'
+            url = f'https://capi.tianyancha.com/cloud-intellectual-property/trademark/statistics?_={t}&cgid={tycid}'
+            # tycid = '209252214'
+            # payload = {"id": tycid, "ps": 10, "pn": 1, "int_cls": "-100", "status": "-100", "app_year": "-100",
+            #            "regYear": "-100", "searchType": "-100", "category": "-100", "fullSearchText": "", "sortField": "",
+            #            "sortType": "-100"}
+            request = requests.get(url=url, headers=header, verify=False)
+            # request = requests.post(url=url, headers=header, data=payload)
+            # print(request.text)
+            data_json = request.json()
+            # print(data_json)
+            try:
+                all_data = data_json['data']['applyYearGraph']['statisticGraphData']
+            except:
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}----已经存在---无商标数据")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code) values (%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-----新增---无商标数据")
+                continue
+            for info in all_data:
+                year = info['desc']
+                num = info['num']  # 申请商标数量
+                dic_info = {
+                    '企业名称': com_name,
+                    '统一信用代码': social_code,
+                    '年份': year,
+                    '数量': num
+                }
+                selectSql = f"select count(1) from shangbiao_sh_tyc where social_code='{xydm}' and year='{year}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}-------{year}---已经存在")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into shangbiao_sh_tyc(com_name,social_code,year,num) values (%s,%s,%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-------{year}---新增")
+                time.sleep(2)
+                # list_all_info.append(dic_info)
+            log.info(f"【{xydm}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ShangBiao:zg500shSocial_code', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)
+
--- a/comData/weixin_solo/get_tokenCookies.py
+++ b/comData/weixin_solo/get_tokenCookies.py
@@ -56,7 +56,7 @@ if __name__=="__main__":
    url = "https://mp.weixin.qq.com/"
    browser.get(url)
    # 可改动
-    time.sleep(10)
+    time.sleep(20)

    s = requests.session()
    #获取到token和cookies

--- a/comData/weixin_solo/oneWeixin2.py
+++ b/comData/weixin_solo/oneWeixin2.py
@@ -239,6 +239,8 @@ if __name__=="__main__":
    list_all_info = []
    while True:
        #一次拿取一篇文章
+        # todo: 从redis拿数据 更新mysql状态
+
        dict_json  =getjsonInfo()
        if dict_json:
            if get_info(dict_json):

--- a/comData/weixin_solo/wxList.py
+++ b/comData/weixin_solo/wxList.py
@@ -113,7 +113,7 @@ def insertWxList(dic_url,json_search,page):
                cnx_.commit()
            except Exception as e:
                log.error(f"保存数据库失败：{e}")
-
+            # todo: 放入redis
    log.info(f"---{dic_url['name']}--第{page}页----总数：{listCount}---重复数：{repetCount}---新增数：{insertCount}-------------")
    if listCount==0:
        #列表为空认为结束

--- a/comData/zhuanli/500qiang_zhuanli.py
+++ b/comData/zhuanli/500qiang_zhuanli.py
+from bs4 import BeautifulSoup
+import requests,time,re
+from base import BaseCore
+# import pandas as pd
+
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+cnx_ = baseCore.cnx_
+cursor_ = baseCore.cursor_
+log = baseCore.getLogger()
+taskType = '500强专利'
+# headers = {
+#     "Cookie":"currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2Fdata%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%25202021%25202020%25202019%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26rnd%3D1663641959596; PGS=10; _pk_id.93.72ee=ee83303e45a089a1.1663061058.; org.springframework.web.servlet.i18n.CookieLocaleResolver.LOCALE=cn_EP; _pk_ses.93.72ee=1; LevelXLastSelectedDataSource=EPODOC; menuCurrentSearch=%2F%2Fworldwide.espacenet.com%2FsearchResults%3FAB%3D%26AP%3D%26CPC%3D%26DB%3DEPODOC%26IC%3D%26IN%3D%26PA%3Dapple%26PD%3D2022%26PN%3D%26PR%3D%26ST%3Dadvanced%26Submit%3D%E6%A3%80%E7%B4%A2%26TI%3D%26locale%3Dcn_EP; currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26Submit%3D%25E6%25A3%2580%25E7%25B4%25A2; JSESSIONID=qnLt7d5QgBtpUGjMmHuD-kwJ.espacenet_levelx_prod_2",
+#     "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
+# }
+# df_all = pd.read_excel('D:\\kkwork\\jupyter\\专利数量\\t1.xlsx')
+# for i in range(2022,1890,-1):
+#     df_all[f'{i}'] = ''
+
+# df_all['Espacenet专利检索'] = ''
+headers = {
+    "Cookie": "currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2Fdata%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%25202021%25202020%25202019%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26rnd%3D1663641959596; PGS=10; _pk_id.93.72ee=ee83303e45a089a1.1663061058.; org.springframework.web.servlet.i18n.CookieLocaleResolver.LOCALE=cn_EP; _pk_ses.93.72ee=1; LevelXLastSelectedDataSource=EPODOC; menuCurrentSearch=%2F%2Fworldwide.espacenet.com%2FsearchResults%3FAB%3D%26AP%3D%26CPC%3D%26DB%3DEPODOC%26IC%3D%26IN%3D%26PA%3Dapple%26PD%3D2022%26PN%3D%26PR%3D%26ST%3Dadvanced%26Submit%3D%E6%A3%80%E7%B4%A2%26TI%3D%26locale%3Dcn_EP; currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26Submit%3D%25E6%25A3%2580%25E7%25B4%25A2; JSESSIONID=qnLt7d5QgBtpUGjMmHuD-kwJ.espacenet_levelx_prod_2",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
+}
+
+def name_handle(english_name_):
+    if 'INC.' in english_name_ or 'LTD.' in english_name_ or 'CO.' in english_name_ \
+            or 'CORP.' in english_name_ or 'GMBH' in english_name_ \
+            or ' AG' in english_name_ or 'SARL' in english_name_ or 'S.A.' in english_name_ \
+            or 'PTY' in english_name_ or 'LLC' in english_name_ or 'LLP' in english_name_ \
+            or ' AB' in english_name_ or ' NV' in english_name_ or 'N.V.' in english_name_ \
+            or 'A.S.' in english_name_ or ' SA' in english_name_ or ',Limited' in english_name_ \
+            or ' SE' in english_name_ or ' LPC' in english_name_ or 'S.P.A.' in english_name_:
+        english_name = english_name_.replace('INC.', '').replace('LTD.', '').replace('CO.', '').replace('CORP.', '') \
+            .replace('GMBH', '').replace(' AG', '').replace('SARL', '').replace('S.A.', '').replace('PTY', '') \
+            .replace('LLC', '').replace('LLP', '').replace(' AB', '').replace(' NV', '').replace(',', '') \
+            .replace('A.S.', '').replace(' SA', '').replace(',Limited', '').replace(' SE', '').replace(' PLC', '') \
+            .replace('N.V.', '').replace('S.P.A.', '').rstrip()
+        return english_name
+    else:
+        english_name = english_name_
+        return english_name
+
+if __name__ == '__main__':
+    while True:
+        start_time = time.time()
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ZhuanLi:gwSocial_code')
+        # social_code = '9111000071093123XX'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ZhuanLi:gwSocial_code', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            english_name_ = data[5]
+            place = data[6]
+            if place == 1:
+                log.info(f'{com_name}--国内')
+                baseCore.rePutIntoR('Zhuanli:gwSocial_code',social_code)
+                continue
+            if english_name_:
+                pass
+            else:
+                query = f"select * from sys_base_enterprise where social_credit_code ='{xydm}'"
+                cursor_.execute(query)
+                reslut = cursor_.fetchone()
+                english_name_ = reslut[32]
+                # todo:将该字段更新到144企业库
+                update_ = f"update EnterpriseInfo set EnglishName='{english_name_}' where SocialCode='{xydm}' "
+                cursor.execute(update_)
+                cnx.commit()
+            english_name_ = english_name_.upper()
+            english_name = name_handle(english_name_)
+            num_zhuanli = 0
+            # url1 = f'https://worldwide.espacenet.com/data/searchResults?ST=singleline&locale=cn_EP&submitted=true&DB=&query={com_name}&rnd=' + str(
+            #     int(float(time.time()) * 1000))
+            #
+            # res1 = requests.get(url1, headers=headers)
+            # soup1 = BeautifulSoup(res1.content, 'html.parser')
+            #
+            # num_text = soup1.find('p', {'class': 'numResultsFoundMsg'}).text
+            #
+            # try:
+            #     zhuanli = re.findall("约(.*?)个", num_text)[0].replace(',', '')
+            # except:
+            #     zhuanli = re.findall("多于(.*?)个", num_text)[0].replace(',', '')
+            # if zhuanli:
+            for year in range(2023, 1900, -1):
+                url = f'https://worldwide.espacenet.com/data/searchResults?submitted=true&locale=cn_EP&DB=EPODOC&ST=advanced&TI=&AB=&PN=&AP=&PR=&PD={year}&PA={english_name}&IN=&CPC=&IC=&rnd=' + str(
+                    int(float(time.time()) * 1000))
+                # url = 'https://worldwide.espacenet.com/data/searchResults?submitted=true&locale=cn_EP&DB=EPODOC&ST=advanced&TI=&AB=&PN=&AP=&PR=&PD=2022&PA=APPLE&IN=&CPC=&IC=&rnd=1703643229331'
+                ip = baseCore.get_proxy()
+                res = requests.get(url, headers=headers, proxies=ip)
+                soup = BeautifulSoup(res.content, 'html.parser')
+
+                num_text = soup.find('p', {'class': 'numResultsFoundMsg'}).text
+                try:
+                    try:
+                        zhuanli = int(re.findall("约(.*?)个", num_text)[0].replace(',', ''))
+                    except:
+                        zhuanli = int(re.findall("多于(.*?)个", num_text)[0].replace(',', ''))
+                except:
+                    zhuanli = int(re.findall("找到(.*?)个", num_text)[0].replace(',', ''))
+                if zhuanli == 0:
+                    dic_info = {
+                        'com_name': com_name,
+                        'social_code': social_code,
+                    }
+                    # 插入数据库表中
+                    selectSql = f"select count(1) from zhuanli_500 where social_code='{xydm}' "
+                    cursor.execute(selectSql)
+                    count = cursor.fetchone()[0]
+                    if count > 0:
+                        log.info(f"{com_name}-----已经存在--{year}--无专利信息")
+                        break
+                    else:
+                        values_tuple = tuple(dic_info.values())
+                        # log.info(f"{gpdm}-------{companyname}---新增")
+                        insertSql = f"insert into zhuanli_500(com_name,social_code) values (%s,%s)"
+                        cursor.execute(insertSql, values_tuple)
+                        cnx.commit()
+                        log.info(f"{com_name}------新增----无专利信息")
+                    break
+                dic_info = {
+                    'com_name': com_name,
+                    'social_code': social_code,
+                    'year': year,
+                    'num': zhuanli
+                }
+                # 插入数据库表中
+                selectSql = f"select count(1) from zhuanli_500 where social_code='{xydm}' and year='{year}' "
+                cursor.execute(selectSql)
+                count = cursor.fetchone()[0]
+                if count > 0:
+                    log.info(f"{com_name}-------{year}---已经存在")
+                    continue
+                else:
+                    values_tuple = tuple(dic_info.values())
+                    # log.info(f"{gpdm}-------{companyname}---新增")
+                    insertSql = f"insert into zhuanli_500(com_name,social_code,year,num) values (%s,%s,%s,%s)"
+                    cursor.execute(insertSql, values_tuple)
+                    cnx.commit()
+                    log.info(f"{com_name}-------{year}---新增")
+        except:
+            log.info("error!{}".format(social_code))
+            baseCore.rePutIntoR('ZhuanLi:gwSocial_code', social_code)
+            continue
\ No newline at end of file
--- a/comData/zhuanli/guowai_zhuanli.py
+++ b/comData/zhuanli/guowai_zhuanli.py
+import requests,re,time,os,datetime,random
+import pandas as pd
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import redis
+
+# headers = {
+#     "Cookie":"currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2Fdata%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%25202021%25202020%25202019%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26rnd%3D1663641959596; PGS=10; _pk_id.93.72ee=ee83303e45a089a1.1663061058.; org.springframework.web.servlet.i18n.CookieLocaleResolver.LOCALE=cn_EP; _pk_ses.93.72ee=1; LevelXLastSelectedDataSource=EPODOC; menuCurrentSearch=%2F%2Fworldwide.espacenet.com%2FsearchResults%3FAB%3D%26AP%3D%26CPC%3D%26DB%3DEPODOC%26IC%3D%26IN%3D%26PA%3Dapple%26PD%3D2022%26PN%3D%26PR%3D%26ST%3Dadvanced%26Submit%3D%E6%A3%80%E7%B4%A2%26TI%3D%26locale%3Dcn_EP; currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26Submit%3D%25E6%25A3%2580%25E7%25B4%25A2; JSESSIONID=qnLt7d5QgBtpUGjMmHuD-kwJ.espacenet_levelx_prod_2",
+#     "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
+# }
+df_all = pd.read_excel('D:\\kkwork\\jupyter\\专利数量\\t1.xlsx')
+# for i in range(2022,1890,-1):
+#     df_all[f'{i}'] = ''
+
+# df_all['Espacenet专利检索'] = ''
+headers = {
+    "Cookie": "currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2Fdata%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%25202021%25202020%25202019%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26rnd%3D1663641959596; PGS=10; _pk_id.93.72ee=ee83303e45a089a1.1663061058.; org.springframework.web.servlet.i18n.CookieLocaleResolver.LOCALE=cn_EP; _pk_ses.93.72ee=1; LevelXLastSelectedDataSource=EPODOC; menuCurrentSearch=%2F%2Fworldwide.espacenet.com%2FsearchResults%3FAB%3D%26AP%3D%26CPC%3D%26DB%3DEPODOC%26IC%3D%26IN%3D%26PA%3Dapple%26PD%3D2022%26PN%3D%26PR%3D%26ST%3Dadvanced%26Submit%3D%E6%A3%80%E7%B4%A2%26TI%3D%26locale%3Dcn_EP; currentUrl=https%3A%2F%2Fworldwide.espacenet.com%2FsearchResults%3Fsubmitted%3Dtrue%26locale%3Dcn_EP%26DB%3DEPODOC%26ST%3Dadvanced%26TI%3D%26AB%3D%26PN%3D%26AP%3D%26PR%3D%26PD%3D2022%26PA%3Dapple%26IN%3D%26CPC%3D%26IC%3D%26Submit%3D%25E6%25A3%2580%25E7%25B4%25A2; JSESSIONID=qnLt7d5QgBtpUGjMmHuD-kwJ.espacenet_levelx_prod_2",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
+}
+
+for i in range(len(df_all['英文名称'])):
+
+    for num in range(0, 2):
+        try:
+            if '中国' not in df_all['企业所属国家'][i]:
+                com_name = df_all['英文名称'][i]
+                num_zhuanli = 0
+
+                url1 = f'https://worldwide.espacenet.com/data/searchResults?ST=singleline&locale=cn_EP&submitted=true&DB=&query={com_name}&rnd=' + str(
+                    int(float(time.time()) * 1000))
+
+                res1 = requests.get(url1, headers=headers)
+                soup1 = BeautifulSoup(res1.content, 'html.parser')
+
+                num_text = soup1.find('p', {'class': 'numResultsFoundMsg'}).text
+
+                # try:
+                #     zhuanli = re.findall("约(.*?)个", num_text)[0].replace(',', '')
+                # except:
+                #     zhuanli = re.findall("多于(.*?)个", num_text)[0].replace(',', '')
+                zhuanli = '10000'
+                if zhuanli == '10000':
+                    for year in range(2023, 1900, -1):
+                        # url = f'https://worldwide.espacenet.com/data/searchResults?submitted=true&locale=cn_EP&DB=EPODOC&ST=advanced&TI=&AB=&PN=&AP=&PR=&PD={year}&PA={com_name}&IN=&CPC=&IC=&rnd=' + str(
+                        #     int(float(time.time()) * 1000))
+                        url = 'https://worldwide.espacenet.com/data/searchResults?submitted=true&locale=cn_EP&DB=EPODOC&ST=advanced&TI=&AB=&PN=&AP=&PR=&PD=2022&PA=APPLE&IN=&CPC=&IC=&rnd=1703643229331'
+                        res = requests.get(url, headers=headers)
+                        soup = BeautifulSoup(res.content, 'html.parser')
+
+                        num_text = soup.find('p', {'class': 'numResultsFoundMsg'}).text
+                        try:
+                            try:
+                                zhuanli2 = int(re.findall("约(.*?)个", num_text)[0].replace(',', ''))
+                            except:
+                                zhuanli2 = int(re.findall("多于(.*?)个", num_text)[0].replace(',', ''))
+                        except:
+                            zhuanli2 = int(re.findall("找到(.*?)个", num_text)[0].replace(',', ''))
+                        if zhuanli2 == 0:
+                            break
+                        df_all[f'{year}'][i] = zhuanli2
+                        # num_zhuanli = num_zhuanli + zhuanli2
+                        num_zhuanli = num_zhuanli + zhuanli2
+                        print(year)
+                        time.sleep(random.uniform(1.5, 2))
+                else:
+
+                    num_zhuanli = int(zhuanli)
+                    time.sleep(random.uniform(1.5, 2))
+
+                df_all['Espacenet专利检索'][i] = num_zhuanli
+                print(f"{com_name} : {num_zhuanli}")
+
+                break
+        except:
+            if num == 0:
+                print("重试")
+                time.sleep(60)
+                continue
+            else:
+                print("error!{}".format(df_all['英文名称'][i]))
\ No newline at end of file
--- a/comData/zhuanli/tyc_zhuanli.py
+++ b/comData/zhuanli/tyc_zhuanli.py
--- a/comData/zhuanli/tyc_zhuanli_zg500.py
+++ b/comData/zhuanli/tyc_zhuanli_zg500.py
+import requests,time,re,random
+from base import BaseCore
+import pandas as pd
+from bs4 import BeautifulSoup as bs
+from comData.Tyc.getTycId import getTycIdByXYDM
+baseCore = BaseCore.BaseCore()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+log = baseCore.getLogger()
+taskType = '天眼查专利/国内上市'
+
+
+def spider_zhuanli(com_name, social_code, tycid, page, list_all_info):
+    start_time = time.time()
+    log.info(f'===正在处理第{page}页===')
+    # list_all_info = []
+
+    t = int(time.time() * 1000)
+    header = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json',
+        'Host': 'capi.tianyancha.com',
+        'Origin': 'https://www.tianyancha.com',
+        'Referer': 'https://www.tianyancha.com/',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzI3MzczNzEzMSIsImlhdCI6MTcwMzE1MjEzMSwiZXhwIjoxNzA1NzQ0MTMxfQ.3tF-UFhorC_mS4h2UIBOZamApfcaJEfjBbr8K11d2yHhELBM1pEvjd6yccxhLzVKRoyFdTn-1Cz6__ZpzgjnGg',
+        'X-TYCID': '6f6298905d3011ee96146793e725899d',
+        'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'version': 'TYC-Web'
+    }
+    url = f'https://capi.tianyancha.com/cloud-intellectual-property/patent/patentListV6?_={t}&id={tycid}&pageSize=100&pageNum={page}&type=-100&lprs=-100&applyYear=-100&pubYear=-100&fullSearchText=&sortField=&sortType=-100'
+
+    try:
+        ip = baseCore.get_proxy()
+    except:
+        time.sleep(2)
+        ip = baseCore.get_proxy()
+    try:
+        res_j = requests.get(url=url, headers=header, proxies=ip, verify=False).json()
+    except:
+        for i in range(3):
+            try:
+                res_j = requests.get(url=url, headers=header, verify=False).json()
+            except:
+                time.sleep(2)
+                continue
+    # print(res_j)
+    try:
+        list_all = res_j['data']['items']
+    except:
+        dic_info = {
+            '企业名称': com_name,
+            '统一信用代码': social_code
+        }
+        selectSql = f"select count(1) from zhuanli_sh_tyc where social_code='{social_code}' "
+        cursor.execute(selectSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info(f"{com_name}---{social_code}---已经存在---无专利")
+            return 0
+        else:
+            values_tuple = tuple(dic_info.values())
+            # log.info(f"{gpdm}-------{companyname}---新增")
+            insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code) values (%s,%s)"
+            cursor.execute(insertSql, values_tuple)
+            cnx.commit()
+            log.info(f"{com_name}---{social_code}---新增---无专利")
+        return 0
+    # print(list_all)
+    if list_all:
+        for one_zhuanli in list_all:
+            title = one_zhuanli['title']
+            try:
+                shenqingri = one_zhuanli['applicationTime']
+            except:
+                shenqingri = ''
+            try:
+                shenqing_code = one_zhuanli['patentNum']
+            except:
+                shenqing_code = ''
+            try:
+                leixing = one_zhuanli['patentType']
+            except:
+                leixing = ''
+            try:
+                status = one_zhuanli['lprs']
+            except:
+                status = ''
+            try:
+                gongkairi = one_zhuanli['pubDate']
+            except:
+                gongkairi = ''
+            try:
+                gongkai_code = one_zhuanli['pubnumber']
+            except:
+                gongkai_code = ''
+            try:
+                famingren = one_zhuanli['inventor']
+            except:
+                famingren = ''
+            try:
+                shenqingren = one_zhuanli['applicantName']
+            except:
+                shenqingren = ''
+            try:
+                gongneng = one_zhuanli['cat']
+            except:
+                gongneng = ''
+            try:
+                uuid = one_zhuanli['uuid']
+            except:
+                uuid = ''
+
+            dic_info = {
+                '企业名称': com_name,
+                '统一信用代码': social_code,
+                '专利名称': title,
+                '申请日': shenqingri,
+                '申请号': shenqing_code,
+                '专利类型': leixing,
+                '专利状态': status,
+                '公开日': gongkairi,
+                '公开号': gongkai_code,
+                '发明人': famingren,
+                '申请人': shenqingren,
+                '功能': gongneng,
+                '天眼查详情id': uuid,
+                '年份': shenqingri[:4]
+            }
+            selectSql = f"select count(1) from zhuanli_sh_tyc where shenqing_code='{shenqing_code}' "
+            cursor.execute(selectSql)
+            count = cursor.fetchone()[0]
+            if count > 0:
+                log.info(f"{com_name}-------{shenqing_code}---已经存在")
+                continue
+            else:
+                values_tuple = tuple(dic_info.values())
+                # log.info(f"{gpdm}-------{companyname}---新增")
+                insertSql = f"insert into zhuanli_sh_tyc(com_name,social_code,title,shenqingri,shenqing_code,leixing,status,gongkairi,gongkai_code,famingren,shenqingren,gongneng,uuid,year) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+                cursor.execute(insertSql, values_tuple)
+                cnx.commit()
+                log.info(f"{com_name}-------{shenqing_code}---新增")
+            time.sleep(2)
+            # list_all_info.append(dic_info)
+        log.info(f"【{page}】-----------end,耗时{baseCore.getTimeCost(start_time, time.time())}")
+        return page
+    else:
+        return 0
+
+if __name__ == "__main__":
+    while True:
+        list_all_info = []
+        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
+        social_code = baseCore.redicPullData('ZhuanLi:gnshSocial_code_zg500')
+        # social_code = '91350700856994874M'
+        # 判断 如果Redis中已经没有数据，则等待
+        if social_code == None:
+            # time.sleep(20)
+            break
+        start = time.time()
+        try:
+            data = baseCore.getInfomation(social_code)
+            if len(data) != 0:
+                pass
+            else:
+                # 数据重新塞入redis
+                baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
+                continue
+            id = data[0]
+            com_name = data[1]
+            xydm = data[2]
+            tycid = data[11]
+            if tycid == None or tycid == '':
+                try:
+                    retData = getTycIdByXYDM(xydm)
+                    if retData['tycData'] and retData['reput']:
+                        tycid = retData['tycData']['id']
+                        # todo:写入数据库
+                        updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
+                        cursor.execute(updateSql)
+                        cnx.commit()
+                    elif not retData['tycData'] and retData['reput']:
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start, time.time())
+                        baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                        log.info(f'======={social_code}====重新放入redis====')
+                        baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                        continue
+                    elif not retData['reput'] and not retData['tycData']:
+                        continue
+                except:
+                    state = 0
+                    takeTime = baseCore.getTimeCost(start, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, '', '获取天眼查id失败')
+                    baseCore.rePutIntoR('NewsEnterprise:gnqy_socialCode', social_code)
+                    continue
+            count = data[17]
+            log.info(f"{id}---{xydm}----{tycid}----开始处理")
+            page = 1
+            while True:
+                page = spider_zhuanli(com_name, xydm, tycid, page, list_all_info)
+                if page != 0:
+                    page += 1
+
+                else:
+                    # print(len(list_all_info))
+                    # df_all_info = pd.DataFrame(list_all_info)
+                    # df_all_info.to_excel('中国上市企业专利.xlsx', index=False)
+                    log.info(f"{id}---{xydm}----{tycid}----结束处理")
+                    break
+        except Exception as e:
+            log.info(f'==={social_code}=====获取企业信息失败==={e}=')
+            # 重新塞入redis
+            baseCore.rePutIntoR('ZhuanLi:gnshSocial_code_zg500', social_code)
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
+            time.sleep(5)
--- a/google_comm/config.ini
+++ b/google_comm/config.ini
@@ -13,9 +13,10 @@ url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=ut
 [kafka]
 bootstrap_servers=114.115.159.144:9092
 topic=keyWordsInfo
-groupId=python_baidu_test
+groupId=python_google

 [selenium]
-chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
-binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
-
+;chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
+;binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
+chrome_driver=D:\cmd100\chromedriver.exe
+binary_location=D:\Google\Chrome\Application\chrome.exe
--- a/google_comm/googleSpider.py
+++ b/google_comm/googleSpider.py
@@ -168,6 +168,8 @@ class GoogleSpider(object):
        try:
            driver.get(url)
            # 等待页面加载完成
+            time.sleep(3)
+            driver.refresh()
            wait = WebDriverWait(driver, 20)
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
            html=driver.page_source
@@ -256,6 +258,7 @@ class GoogleSpider(object):
        self.driver.get(self.url)
        # 等待页面加载完成
        time.sleep(3)
+        self.driver.refresh()
        wait = WebDriverWait(self.driver, 20)
        wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
        search_input = self.driver.find_element('xpath', '//textarea[@title="Google 搜索"]')
@@ -265,7 +268,11 @@ class GoogleSpider(object):
            time.sleep(3)
            wait = WebDriverWait(self.driver, 20)
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+            try:
                self.driver.find_element('xpath', '//div[@class="GKS7s"]/span[text()="新闻"]').click()
+            except:
+                self.driver.find_element('xpath', '//*[@id="hdtb-msb"]/div[1]/div/div[2]/a/span').click()
+
            time.sleep(3)
            self.driver.find_element('xpath', '//div[@id="hdtb-tls"]').click()
            time.sleep(2)
@@ -273,7 +280,8 @@ class GoogleSpider(object):
            time.sleep(2)
            self.driver.find_element('xpath', '//div[@class="YpcDnf OSrXXb HG1dvd"]/a[text()="按日期排序"]').click()
        except Exception as e:
-            print(e)
+            self.logger.info(f'--点击按钮失效----{e}')
+            return
        self.logger.info("开始抓取首页..." + self.searchkw )
        time.sleep(5)
        flag, lists = self.parse_page()
@@ -446,7 +454,7 @@ class GoogleSpider(object):
            detailurl=detailmsg['detailUrl']
            title = detailmsg['title']
            content,contentWithTag=self.extractorMsg(detailurl,title)
-            contentWithTag=self.rmTagattr(contentWithTag)
+            contentWithTag=self.rmTagattr(contentWithTag,detailurl)
        except Exception as e:
            content=''
            contentWithTag=''

--- a/google_comm/googletaskJob.py
+++ b/google_comm/googletaskJob.py
@@ -40,7 +40,7 @@ class GoogleTaskJob(object):
        try:
            for record in consumer:
                try:
-                    logger.info("value:",record.value)
+                    logger.info(f"value:{record.value}")
                    keymsg=record.value
                    if keymsg:
                        break
@@ -176,7 +176,7 @@ if __name__ == '__main__':
                continue
            if kwList:
                # 创建一个线程池，指定线程数量为4
-                with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                    # 提交任务给线程池，每个任务处理一个数据
                    results = [executor.submit(googleTaskJob.runSpider, data) for data in kwList]
                    # 获取任务的执行结果

--- a/google_comm/test.py
+++ b/google_comm/test.py
+import requests
+url = 'https://www.ctwant.com/article/308534'
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'
+}
+req = requests.get(url,headers)
+print(req.text)
\ No newline at end of file
--- a/qiushi_leaderspeech.py
+++ b/qiushi_leaderspeech.py
@@ -113,23 +113,23 @@ if __name__=='__main__':
                        author = new.find('font', face='楷体').text.replace('/', '').replace('\u3000', ' ').replace('\xa0', '')
                    except:
                        continue
-                    # if len(author)>4:
-                    #     continue
+                    if len(author)>4:
+                        continue
                    # if '（' in author or '本刊' in author or '国家' in author\
                    #     or '中共' in author or '记者' in author or '新闻社' in author\
                    #     or '党委' in author or '调研组' in author or '研究中心' in author\
                    #     or '委员会' in author or '博物' in author or '大学' in author or '联合会' in author :
-                    # if '（' in author or '本刊' in author  \
-                    #         or '记者' in author or '新闻社' in author \
-                    #         or '”' in author\
-                    #         or '大学' in author or '洛桑江村' in author:
-                    #     continue
-                    if '国资委党委' in author:
-                        pass
-                    else:
+                    if '（' in author or '本刊' in author  \
+                            or '记者' in author or '新闻社' in author \
+                            or '”' in author\
+                            or '大学' in author or '洛桑江村' in author:
                        continue
+                    # if '国资委党委' in author:
+                    #     pass
+                    # else:
+                    #     continue
                    new_href = new.find('a')['href']
-                    is_member = r.sismember('qiushileaderspeech::' + period_title, new_href)
+                    is_member = r.sismember('qiushileaderspeech_two::' + period_title, new_href)
                    if is_member:
                        continue
                    new_title = new.find('a').text.replace('\u3000',' ').lstrip(' ').replace('——', '').replace('\xa0', '')
@@ -165,7 +165,7 @@ if __name__=='__main__':
                }
                log.info(dic_news)
                if sendKafka(dic_news):
-                    r.sadd('qiushileaderspeech::' + period_title, new_href)
+                    r.sadd('qiushileaderspeech_two::' + period_title, new_href)
                    log.info(f'采集成功----{dic_news["sourceAddress"]}')


--- a/test.py
+++ b/test.py
@@ -55,56 +55,56 @@ from obs import ObsClient
 from kafka import KafkaProducer

 from base.BaseCore import BaseCore
-baseCore = BaseCore()
-log = baseCore.getLogger()
-cnx_ = baseCore.cnx
-cursor_ = baseCore.cursor
-
-def use_ocr(img):
-    ocr = ddddocr.DdddOcr()
-    with open(img, 'rb') as f:
-        image = f.read()
-    res = ocr.classification(image)
-    print(res)
-    return res
-
-if __name__=="__main__":
-    requests.DEFAULT_RETRIES = 5
-    time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    log.info(f'开始时间为：{time_start}')
-
-    requests.adapters.DEFAULT_RETRIES = 3
-    headers = {
-        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
-    }
-
-    opt = webdriver.ChromeOptions()
-    opt.add_argument(
-        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
-
-    opt.add_argument("--ignore-certificate-errors")
-    opt.add_argument("--ignore-ssl-errors")
-    opt.add_experimental_option("excludeSwitches", ["enable-automation"])
-    opt.add_experimental_option('excludeSwitches', ['enable-logging'])
-    opt.add_experimental_option('useAutomationExtension', False)
-    opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
-    chromedriver = r'D:/cmd100/chromedriver.exe'
-    browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
-    url = "http://zxgk.court.gov.cn/shixin/"
-    browser.get(url)
-    # 可改动
-    time.sleep(20)
-    page_source = browser.page_source
-    soup = BeautifulSoup(page_source, 'html.parser')
-    img_url = soup.select('img[id="captchaImg"]')[0]['src']
-
-    browser.find_element(By.ID, 'pName').send_keys('北京远翰国际教育咨询有限责任公司')
-
-
-    browser.find_element(By.ID, 'yzm').send_keys(yzm)
-    browser.find_element(By.ID, 'searchForm').click()
-    wait = WebDriverWait(browser, 30)
-    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+# baseCore = BaseCore()
+# log = baseCore.getLogger()
+# cnx_ = baseCore.cnx
+# cursor_ = baseCore.cursor
+#
+# def use_ocr(img):
+#     ocr = ddddocr.DdddOcr()
+#     with open(img, 'rb') as f:
+#         image = f.read()
+#     res = ocr.classification(image)
+#     print(res)
+#     return res
+#
+# if __name__=="__main__":
+#     requests.DEFAULT_RETRIES = 5
+#     time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+#     log.info(f'开始时间为：{time_start}')
+#
+#     requests.adapters.DEFAULT_RETRIES = 3
+#     headers = {
+#         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
+#     }
+#
+#     opt = webdriver.ChromeOptions()
+#     opt.add_argument(
+#         'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
+#
+#     opt.add_argument("--ignore-certificate-errors")
+#     opt.add_argument("--ignore-ssl-errors")
+#     opt.add_experimental_option("excludeSwitches", ["enable-automation"])
+#     opt.add_experimental_option('excludeSwitches', ['enable-logging'])
+#     opt.add_experimental_option('useAutomationExtension', False)
+#     opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
+#     chromedriver = r'D:/cmd100/chromedriver.exe'
+#     browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
+#     url = "http://zxgk.court.gov.cn/shixin/"
+#     browser.get(url)
+#     # 可改动
+#     time.sleep(20)
+#     page_source = browser.page_source
+#     soup = BeautifulSoup(page_source, 'html.parser')
+#     img_url = soup.select('img[id="captchaImg"]')[0]['src']
+#
+#     browser.find_element(By.ID, 'pName').send_keys('北京远翰国际教育咨询有限责任公司')
+#
+#
+#     browser.find_element(By.ID, 'yzm').send_keys(yzm)
+#     browser.find_element(By.ID, 'searchForm').click()
+#     wait = WebDriverWait(browser, 30)
+#     wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

 # screen_img_path = "D:/screen/xxx.png"
 # out_img_path = "D:/out/xxx.png"
@@ -112,3 +112,27 @@ if __name__=="__main__":
 #
 # code = use_ocr(out_img_path)
 # 验证码输入框元素.send_keys(code)
+import requests
+headers = {
+    # 'Accept': '*/*',
+    # 'Accept-Encoding': 'gzip, deflate, br',
+    # 'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
+    # 'Cache-Control': 'no-cache',
+    # 'Connection': 'keep-alive',
+    # 'Host': 'search-api-web.eastmoney.com',
+    # 'Pragma': 'no-cache',
+    # 'Sec-Fetch-Dest': 'script',
+    # 'Sec-Fetch-Mode': 'no-cors',
+    # 'Sec-Fetch-Site': 'same-site',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
+    # 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
+    # 'sec-ch-ua-mobile': '?0',
+    # 'sec-ch-ua-platform': '"Windows"'
+}
+url = "https://www-private-oss.mob.com/academy_reports/2023/03/31/Mob%E7%A0%94%E7%A9%B6%E9%99%A2%E3%80%8A2023%E5%B9%B4%E4%B8%AD%E5%9B%BD%E6%96%87%E6%97%85%E4%BA%A7%E4%B8%9A%E5%8F%91%E5%B1%95%E8%B6%8B%E5%8A%BF%E6%8A%A5%E5%91%8A%E3%80%8B.pdf?response-content-disposition=attachment&OSSAccessKeyId=LTAI5t5mdPuMS9gNj93RPowJ&Expires=1703839064&Signature=X1mpakYGVaBffNokvhvW917UH%2Fk%3D"
+
+
+# res = requests.get(url).text[1:-1]
+res = requests.get(url=url, headers=headers)
+with open('./a.pdf','wb') as f:
+    f.write(res.content)
\ No newline at end of file
--- a/百度翻译/test.py
+++ b/百度翻译/test.py
--- a/百度采集/baidu_comm/baiduSpider.py
+++ b/百度采集/baidu_comm/baiduSpider.py
-#coding=utf-8
+#coding=utf-8
@@ -25,7 +25,7 @@ from baseCore import BaseCore
 import configparser

 from smart_extractor import SmartExtractor
-
+# baseCore=BaseCore()

 class BaiduSpider(object):
    def __init__(self,searchkw,wordsCode,sid):
@@ -40,13 +40,15 @@ class BaiduSpider(object):
                             port=self.config.get('redis', 'port'),
                             password=self.config.get('redis', 'pass'), db=0)
        self.page_num = 1
-        chrome_driver =self.config.get('selenium', 'chrome_driver')
-        self.kafka_bootstrap_servers = self.config.get('kafka', 'bootstrap_servers')
-        path =  Service(chrome_driver)
-        chrome_options = webdriver.ChromeOptions()
-        chrome_options.binary_location =  self.config.get('selenium', 'binary_location')
-        self.driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
-        # driver = webdriver.Chrome(chrome_options=chrome_options)
+        # chrome_driver =self.config.get('selenium', 'chrome_driver')
+        # self.kafka_bootstrap_servers = self.config.get('kafka', 'bootstrap_servers')
+        # path =  Service(chrome_driver)
+        # chrome_options = webdriver.ChromeOptions()
+        # chrome_options.binary_location =  self.config.get('selenium', 'binary_location')
+        # proxy = baseCore.get_proxy()
+        # chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
+        # self.driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
+        # # driver = webdriver.Chrome(chrome_options=chrome_options)
        self.qtitle = Queue()
        self.qurl = Queue()
        self.detailList = Queue()
@@ -54,14 +56,16 @@ class BaiduSpider(object):
        self.wordsCode = wordsCode
        self.sid = sid
    def createDriver(self):
+
        chrome_driver =self.config.get('selenium', 'chrome_driver')
+        self.kafka_bootstrap_servers = self.config.get('kafka', 'bootstrap_servers')
        path =  Service(chrome_driver)
        chrome_options = webdriver.ChromeOptions()
-        chrome_options.binary_location =self.config.get('selenium', 'binary_location')
-        # 设置代理
-        # proxy = "127.0.0.1:8080"  # 代理地址和端口
-        # chrome_options.add_argument('--proxy-server=http://' + proxy)
+        chrome_options.binary_location =  self.config.get('selenium', 'binary_location')
+        proxy = baseCore.get_proxy()
+        chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
        self.driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
+        # driver = webdriver.Chrome(chrome_options=chrome_options)
    #将列表数据插入到表中 meta_search_result
    def itemInsertToTable(self,items):
        try:

--- a/百度采集/baidu_comm/baidutaskJob_comm.py
+++ b/百度采集/baidu_comm/baidutaskJob_comm.py
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
@@ -12,12 +12,16 @@ from kafka import KafkaProducer
 from kafka import KafkaConsumer
 import json
 import itertools
+
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
 from baiduSpider import BaiduSpider

 import concurrent.futures
 from baseCore import BaseCore
 from queue import Queue
 import configparser
+from tqdm import tqdm

 class BaiduTaskJob(object):
    def __init__(self):
@@ -39,7 +43,7 @@ class BaiduTaskJob(object):
                                 bootstrap_servers=[bootstrap_servers],
                                 value_deserializer=lambda m: json.loads(m.decode('utf-8')))
        try:
-            for record in consumer:
+            for record in tqdm(consumer, desc="Consuming messages"):
                try:
                    logger.info("value:",record.value)
                    keymsg=record.value
@@ -119,7 +123,15 @@ class BaiduTaskJob(object):
        kwList=[]
        if searchEngines:
            if '3' in searchEngines:
-                keyword=keymsg['keyWord']
+                start_time = time.time()
+                keyword = keymsg['keyWord']
+                wordsName = keymsg['wordsName']
+                first = wordsName
+                if wordsName == first:
+                    end_time = time.time()
+                    if int(end_time - start_time) > 10:
+                        logger.info(f'采集一轮{wordsName}关键词耗时{baseCore.getTimeCost(start_time,end_time)}')
+                logger.info(f"获取到关键词组:{wordsName}---{wordsCode}")
                keymsglist=self.getkeywords(keyword)
                for kw in keymsglist:
                    kwmsg={
@@ -157,6 +169,25 @@ class BaiduTaskJob(object):
    #     finally:
    #         baiduSpider.driver.quit()
    #     logger.info("关键词采集结束！"+searchkw)
+    def createDriver(self):
+        chrome_driver = r'D:\cmd100\chromedriver.exe'
+        path = Service(chrome_driver)
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--ignore-certificate-errors')
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--start-maximized")
+        proxy = baseCore.get_proxy()
+        chrome_options.add_argument('--proxy-server=' + proxy['http'].split('://')[1])
+        chrome_options.add_argument(
+            'user-agent=' + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
+        # chrome_options.add_argument('--headless')
+
+        browser = webdriver.Chrome(service=path, chrome_options=chrome_options)
+
+        return browser
+
    def runSpider(self,kwmsg):
        searchkw=kwmsg['kw']
        wordsCode=kwmsg['wordsCode']
@@ -166,6 +197,8 @@ class BaiduTaskJob(object):
            baiduSpider.get_page_html()
        except Exception as e:
            try:
+                baiduSpider.driver.quit()
+                baiduSpider.driver=self.createDriver()
                baiduSpider.get_page_html()
            except Exception as e:
                logger.info('百度搜索异常'+searchkw)

--- a/百度采集/baidu_comm/baseCore.py
+++ b/百度采集/baidu_comm/baseCore.py
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
@@ -293,6 +293,7 @@ class BaseCore:
        sql = "select proxy from clb_proxy"
        self.__cursor_proxy.execute(sql)
        proxy_lists = self.__cursor_proxy.fetchall()
+        self.__cnx_proxy.commit()
        ip_list = []
        for proxy_ in proxy_lists:
            ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
@@ -304,8 +305,8 @@ class BaseCore:
                "port": str_ip_list[1],
            }
            proxy = {
-                "HTTP": proxyMeta,
-                "HTTPS": proxyMeta
+                "http": proxyMeta,
+                "https": proxyMeta
            }
            proxy_list.append(proxy)
        return proxy_list[random.randint(0, 3)]

--- a/百度采集/baidu_comm/config.ini
+++ b/百度采集/baidu_comm/config.ini
-[redis]
+[redis]
@@ -16,6 +16,8 @@ topic=keyWordsInfo
 groupId=python_baidu

 [selenium]
-chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
-binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
+;chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
+;binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
+chrome_driver=D:\cmd100\chromedriver.exe
+binary_location=D:\Google\Chrome\Application\chrome.exe

--- a/百度采集/baidu_comm/test.py
+++ b/百度采集/baidu_comm/test.py
+# from baiduSpider import BaiduSpider
+# from baiduSpider import BaiduSpider
+# searchkw, wordsCode, sid = '', '', ''
+# baidu = BaiduSpider(searchkw, wordsCode, sid)
+import requests
+# url = 'https://baijiahao.baidu.com/s?id=1784907851792547880&wfr=spider&for=pc'
+# title = '“一带一路”商学院联盟副秘书长解奕炯：临沂在国际化物流建设中一定能“先行一步”'
+# try:
+#     detailurl = url
+#     title = title
+#     content, contentWithTag = baidu.extractorMsg(detailurl, title)
+#     contentWithTag = baidu.rmTagattr(contentWithTag, detailurl)
+# except Exception as e:
+#     content = ''
+#     contentWithTag = ''
+#
+#
+# detailmsg = {
+#     'title': title,
+#     'detailurl': url,
+#     'content': content,
+#     'contentHtml': contentWithTag,
+# }
+# print(detailmsg)
+headers = {
+    'Accept': '*/*',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
+    'Cache-Control': 'no-cache',
+    'Connection': 'keep-alive',
+    'Host': 'search-api-web.eastmoney.com',
+    'Pragma': 'no-cache',
+    'Sec-Fetch-Dest': 'script',
+    'Sec-Fetch-Mode': 'no-cors',
+    'Sec-Fetch-Site': 'same-site',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': '"Windows"'
+}
+url = 'https://search-api-web.eastmoney.com/search/jsonp?cb=jQuery35103326233792363984_1702455623969&param=%7B%22uid%22%3A%22%22%2C%22keyword%22%3A%22%E7%A7%91%E8%BE%BE%E8%87%AA%E6%8E%A7%22%2C%22type%22%3A%5B%22researchReport%22%5D%2C%22client%22%3A%22web%22%2C%22clientVersion%22%3A%22curr%22%2C%22clientType%22%3A%22web%22%2C%22param%22%3A%7B%22researchReport%22%3A%7B%22client%22%3A%22web%22%2C%22pageSize%22%3A10%2C%22pageIndex%22%3A1%7D%7D%7D&_=1702455623970'
+# res = requests.get(url).text[1:-1]
+res = requests.get(url=url, headers=headers)
+
+res_json = res.text
+print(res_json)
\ No newline at end of file