Merge remote-tracking branch 'origin/master'

dc808e12 · LiuLiYuan · d6410378 · 5f3288f3 · dc808e12 · dc808e12
--- a/612test.py
+++ b/612test.py
+
+# cache = {
+#     "company": {
+#         "上汽集团": 1,
+#         "欧盟委员会": 13,
+#         "欧盟": 8,
+#         "欧盟委员会总部大厦一角": 2
+#     },
+#     "person": {
+#         "梅赛德斯": 2,
+#         "齐普策": 8,
+#         "韦杜姆": 5,
+#         "朔尔茨": 7,
+#         "冯德莱恩": 1,
+#         "布尔茨": 4,
+#         "纳吉": 3,
+#         "哈贝克": 3,
+#         "特斯拉": 2,
+#         "莫里永": 1,
+#         "维辛": 1,
+#         "康松林": 1,
+#         "尼古拉斯普瓦捷NiclasPoitiers": 1,
+#         "李缘": 1,
+#         "伯格": 1,
+#         "阿斯拉克伯格AslakBerg": 1,
+#         "林燕": 1,
+#         "华盛顿": 1,
+#         "布鲁盖尔Bruegel": 1,
+#         "川普": 1,
+#         "哈桑-扎米特": 1,
+#         "小鹏": 2,
+#         "康逸": 3,
+#         "费尔": 1,
+#         "埃德加博高": 1,
+#         "蔚来": 3,
+#         "杜登赫费尔": 3,
+#         "赵丁喆": 3,
+#         "卢基斯": 1,
+#         "斯泰兰蒂斯": 2,
+#         "保罗博若思": 1,
+#         "海国": 1,
+#         "斯特凡德古阿拉": 1,
+#         "弗兰克": 1,
+#         "施沃佩": 1,
+#         "费迪南德": 3,
+#         "迪尔克扬杜拉": 1,
+#         "米扎克": 1,
+#         "帕沃尔安塔利奇": 1,
+#         "亚采克米扎克": 1,
+#         "弗尔季奥蒂洛": 1,
+#         "张晨霖": 1,
+#         "基塞伊佐尔坦": 1,
+#         "德古阿拉": 3,
+#         "明道加斯普": 1,
+#         "杜登赫": 1,
+#         "奥托尔巴吉": 1,
+#         "郭晨": 1,
+#         "波罗": 1,
+#         "尹栋逊": 1,
+#         "颜景辉": 1,
+#         "段思瑶": 1,
+#         "裴健如": 1,
+#         "陈庆": 1,
+#         "纳吉马顿": 2,
+#         "崔东树": 1,
+#         "PatrickHummel": 1,
+#         "如蔚": 1,
+#         "李斌": 1,
+#         "福尔克•维辛": 1,
+#         "蔚": 1,
+#         "TechWeb": 1,
+#         "Suky": 1,
+#         "陈继业": 1,
+#         "欧方": 1,
+#         "齐普策OliverZipse": 1,
+#         "康林松OlaKaellenius": 1,
+#         "PFA": 1,
+#         "ACEA": 1,
+#         "希尔德加德": 1,
+#         "穆勒HildegardMueller": 1,
+#         "阿道夫乌尔索阿道夫": 1,
+#         "乌尔索": 1,
+#         "马库斯费伯MarkusFerber": 1,
+#         "特蕾莎里贝拉TeresaRibera": 1,
+#         "福尔克维辛": 2,
+#         "辛婧": 1,
+#         "殷晓圣": 3,
+#         "李若佳": 1,
+#         "刘维佳": 1,
+#         "萨拉热窝": 1,
+#         "专员薇奥莱塔布尔茨": 4,
+#         "哈贝克RobertHabeck": 1,
+#         "布特克MaximilianButek": 1,
+#         "关乌": 1,
+#         "布特克": 2,
+#         "俄乌": 1,
+#         "哈桑": 2,
+#         "吕瑟尔斯海姆": 2,
+#         "何塞普戈梅斯": 3,
+#         "李学军": 2,
+#         "刘向": 2,
+#         "戈梅斯": 2,
+#         "马灿": 2,
+#         "克雷希米尔": 2,
+#         "康林松": 3,
+#         "于荣": 2,
+#         "霍尔格格尔克": 3,
+#         "陈斌杰": 2,
+#         "梁国勇": 3,
+#         "李博": 2,
+#         "乔纳森博格": 2,
+#         "胡加齐": 2,
+#         "单玮怡": 2,
+#         "林剑": 3,
+#         "马克西米利安布特克MaximilianButek": 1,
+#         "何亚东": 1,
+#         "吕骞": 1,
+#         "金瑞庭": 1,
+#         "罗知之": 1,
+#         "马铭博": 1,
+#         "马铭": 1,
+#         "梅赛德斯-奔驰": 1,
+#         "埃隆马斯克": 1,
+#         "罗伯特哈贝克RobertHabeck": 1,
+#         "奥拉夫朔尔茨OlafScholz": 1
+#     },
+#     "location": {
+#         "上海市": 4,
+#         "北京市": 2,
+#         "江西省": 1,
+#         "赣州市": 1,
+#         "常州市": 2,
+#         "武进区": 2,
+#         "江苏省": 2
+#     },
+#     "sentiment": {
+#         "负面": 4,
+#         "中性": 10,
+#         "正面": 10
+#     },
+#     "time": {
+#         "17.4": 2,
+#         "6月12日": 6,
+#         "12日": 2,
+#         "2024年06月14日": 1,
+#         "2024年6月3日": 1,
+#         "6月13日": 1,
+#         "7月4日": 1,
+#         "38.1": 1,
+#         "2023年2月15日": 1,
+#         "6月17日": 3,
+#         "60.7": 1,
+#         "6月14日": 1
+#     }
+# }
+#
+# top_keywords = {keyword_type: sorted(keyword_freq.items(), key=lambda x: x[1], reverse=True)[:10] for
+#                     keyword_type, keyword_freq in cache.items()}
+# # print(top_keywords)
+#
+# # 提取前十的关键词
+# top_keywords_dict = {keyword_type: [keyword for keyword, freq in keywords] for keyword_type, keywords in
+#                          top_keywords.items()}
+# print(top_keywords_dict)
+#
+# industry_result = top_keywords_dict['industry'] if 'industry' in top_keywords_dict else []
+# insert_industry = ",".join(industry_result)
+# company_result = top_keywords_dict["company"] if "company" in top_keywords_dict else []
+# person_result = top_keywords_dict["person"] if "person" in top_keywords_dict else []
+# sentiment_result = top_keywords_dict["sentiment"] if "sentiment" in top_keywords_dict else []
+# location_result = top_keywords_dict["location"] if "location" in top_keywords_dict else []
+# time_result = top_keywords_dict["time"] if "time" in top_keywords_dict else []
+# print(f"insert_industry:{insert_industry}")
+# insert_company = ",".join(company_result)
+# insert_person = ",".join(person_result)
+# insert_sentiment = ",".join(sentiment_result)
+# insert_location = ",".join(location_result)
+# insert_time = ",".join(time_result)
+# print(f"insert_company:{insert_company}")
+# print(f"insert_person:{insert_person}")
+# print(f"insert_sentiment:{insert_sentiment}")
+# print(f"insert_location:{insert_location}")
+# print(f"insert_time:{insert_time}")
+# print(type(insert_industry))
+#
+# test_none = None
+# test_set = set(test_none)
+# print(test_set)
+
+
+# set1 = {'万家小新,迎春,李虹萦,张灏然,王宏志,袁野,谭作钧,习近平,鄂维南,苟坪'}
+# set2 = {'孟晚舟,李虹萦,张灏然,习近平,王宏志,鄂维南,张玉卓,谭作钧'}
+# # 使用 & 运算符找到交集
+# intersection_set = set1 & set2
+#
+# print(intersection_set)
+
+
+import json
+
+import json
+import pandas as pd
+
+# 假设我们有一个包含int64类型数据的DataFrame
+df = pd.DataFrame({'id': [1, 2, 3], 'value': [4, 5, 6]})
+df['id'] = df['id'].astype('int64')  # 确保id列是int64类型
+
+# 将DataFrame转换为JSON
+# 首先将int64转换为int
+df['id'] = df['id'].astype('int')
+
+# 现在可以将DataFrame转换为JSON
+json_str = json.dumps(df.to_dict(orient='records'))
+
+print(json_str)
+
+
+
+
--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
@@ -372,8 +372,9 @@ def AnnualEnterpriseXueQ_task():
 def AnnualEnterpriseUS():
    cnx,cursor = connectSql()
    # 获取美股企业
-    us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode is not null and  CreateTime='2023-08-15 14:00:00'"
+    # us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode is not null and  CreateTime='2023-08-15 14:00:00'"
    # us_query = "select SocialCode from EnterpriseInfo where Place = '2' and SecuritiesType = '美股' and SecuritiesCode = 'BP' "
+    us_query = "select cik from mgzqyjwyh_list where state=2 "
    #ZZSN22080900000025
    cursor.execute(us_query)
    us_result = cursor.fetchall()
@@ -381,7 +382,7 @@ def AnnualEnterpriseUS():
    us_social_list = [item[0] for item in us_result]
    print('=======')
    for item in us_social_list:
-        r.rpush('AnnualEnterprise:usqy_socialCode', item)
+        r.rpush('Sec_cik_US:uscik_annualReport', item)
    closeSql(cnx,cursor)

 #国外企业基本信息 redis中放入id
@@ -659,12 +660,14 @@ if __name__ == "__main__":
    # zhuangjingtexind()
    # NoticeEnterprise()
    # NoticeDF()
+    AnnualEnterpriseUS()
    # AnnualEnterpriseIPO()
    # AnnualEnterprise()
    # BaseInfoEnterprise()
    # BaseInfoEnterpriseAbroad()
    # NewsEnterprise_task()
    # NewsEnterprise()
+    # NoticeEnterprise()
    # CorPerson()
    # china100()
    # global100()
@@ -678,7 +681,7 @@ if __name__ == "__main__":
    # SEC_CIK()
    # dujioashou()
    # omeng()
-    AnnualEnterprise()
+    # AnnualEnterprise()
    # AnnualEnterpriseUS()
    # NoticeEnterprise_task()
    # AnnualEnterprise_task()

--- a/comData/Synchronize_data/同步数据.py
+++ b/comData/Synchronize_data/同步数据.py
-"""
+"""
@@ -47,93 +47,100 @@ def update_table(update_sql, cursor_c, cnx_c):

 if __name__ == "__main__":
    key = 'Synchronize_data:info'
-    # result = search_formal_table('social_credit_code, name, english_name', 'sys_base_enterprise', 'yn_domestic', '1', cursor)
-    # for row in result:
-    #     social_credit_code = row[0]
-    #     name = row[1]
-    #     english_name = row[2]
-    #     if not english_name:
-    #         english_name = ''
-    #     item = social_credit_code + '|' + name + '|' + english_name
-    #     baseCore.rePutIntoR(key, item)
-    while True:
-        info = baseCore.redicPullData(key)
-        if info == None:
-            break
-        else:
-            pass
-        com_code = info.split('|')[0]
-        com_name = info.split('|')[1]
-        com_english_name = info.split('|')[2]
-        result = search_formal_table('CompanyName, SocialCode, EnglishName, SecuritiesCode, SecuritiesShortName, Place, isIPO, SecuritiesType, Category, Exchange, countryName', 'EnterpriseInfo',
-                                     'SocialCode', com_code, cursor_)
-        u_name, u_code, u_ename, u_short_name, u_type, u_category, u_exchange = '', '', '', '', '', '', ''
-        # 更新语句
-        update_sql = """update EnterpriseInfo set {} where SocialCode = {}"""
-        fields = ''
-        if result:
-            # 判断这几个值是否为空
-            if result[0][0] != com_name:
-                u_name = com_name
-                fields = f'CompanyName = "{com_name}", '
-            if not result[0][2] and com_english_name:
-                u_ename = com_english_name
-                fields += f'EnglishName = "{com_english_name}", '
-            if not result[0][5]:
-                u_place = '1'
-                fields += f'Place = "{u_place}", '
-            if not result[0][10]:
-                u_countryname = '中国内地'
-                fields += f'countryName = "{u_countryname}", '
-            if not result[0][3]:
-                result_ipo = search_formal_table('social_credit_code, securities_code, securities_short_name, securities_type, category,exchange', 'sys_base_enterprise_ipo', 'social_credit_code',
-                                                 com_code, cursor)
-                if result_ipo:
-                    # 是上市企业
-                    if not result[0][6]:
-                        u_ipo = '1'
-                        fields += f'IsIPO = "{u_ipo}", '
-                    if result_ipo[0][1]:
-                        u_code = result_ipo[0][1]
-                        fields += f'SecuritiesCode = "{u_code}", '
-                    if not result[0][4] and result_ipo[0][2]:
-                        u_short_name = result_ipo[0][2]
-                        fields += f'SecuritiesShortName = "{u_short_name}", '
-                    if not result[0][7] and result_ipo[0][3]:
-                        u_type = result_ipo[0][3]
-                        fields += f'SecuritiesType = "{u_type}", '
-                    if not result[0][8] and result_ipo[0][4]:
-                        u_category = result_ipo[0][4]
-                        fields += f'Category = "{u_category}", '
-                    if not result[0][9] and result_ipo[0][5]:
-                        u_exchange = result_ipo[0][5]
-                        fields += f'Exchange = "{u_exchange}", '
-                else:  # 可能不是上市企业
+    result = search_formal_table('social_credit_code, name, english_name', 'sys_base_enterprise', 'yn_domestic', '1', cursor)
+    for row in result:
+        social_credit_code = row[0]
+        name = row[1]
+        english_name = row[2]
+        if not english_name:
+            english_name = ''
+        item = social_credit_code + '|' + name + '|' + english_name
+        baseCore.rePutIntoR(key, item)
+
+    # while True:
+    #     # info = baseCore.redicPullData(key)
+    #     info = ""
+    #     if info == None:
+    #         break
+    #     else:
+    #         pass
+    #     log.info(f"当前企业---{info}---")
+    #     com_code = info.split('|')[0]
+    #     com_name = info.split('|')[1]
+    #     com_english_name = info.split('|')[2]
+    #     result = search_formal_table('CompanyName, SocialCode, EnglishName, SecuritiesCode, SecuritiesShortName, Place, isIPO, SecuritiesType, Category, Exchange, countryName', 'EnterpriseInfo',
+    #                                  'SocialCode', com_code, cursor_)
+    #     u_name, u_code, u_ename, u_short_name, u_type, u_category, u_exchange = '', '', '', '', '', '', ''
+    #     # 更新语句
+    #     update_sql = """update EnterpriseInfo set {} where SocialCode = {}"""
+    #     fields = ''
+    #     if result:
+    #         # 判断这几个值是否为空
+    #         if result[0][0] != com_name:
+    #             u_name = com_name
+    #             fields = f'CompanyName = "{com_name}", '
+    #         if not result[0][2] and com_english_name:
+    #             u_ename = com_english_name
+    #             fields += f'EnglishName = "{com_english_name}", '
+    #         if not result[0][5]:
+    #             u_place = '1'
+    #             fields += f'Place = "{u_place}", '
+    #         if not result[0][10]:
+    #             u_countryname = '中国内地'
+    #             fields += f'countryName = "{u_countryname}", '
+    #         if not result[0][3]:
+    #             result_ipo = search_formal_table('social_credit_code, securities_code, securities_short_name, securities_type, category,exchange', 'sys_base_enterprise_ipo', 'social_credit_code',
+    #                                              com_code, cursor)
+    #             if len(result_ipo) == 1:
+    #                 # 是上市企业
+    #                 if not result[0][6]:
+    #                     u_ipo = '1'
+    #                     fields += f'IsIPO = "{u_ipo}", '
+    #                 if result_ipo[0][1]:
+    #                     u_code = result_ipo[0][1]
+    #                     fields += f'SecuritiesCode = "{u_code}", '
+    #                 if not result[0][4] and result_ipo[0][2]:
+    #                     u_short_name = result_ipo[0][2]
+    #                     fields += f'SecuritiesShortName = "{u_short_name}", '
+    #                 if not result[0][7] and result_ipo[0][3]:
+    #                     u_type = result_ipo[0][3]
+    #                     fields += f'SecuritiesType = "{u_type}", '
+    #                 if not result[0][8] and result_ipo[0][4]:
+    #                     u_category = result_ipo[0][4]
+    #                     fields += f'Category = "{u_category}", '
+    #                 if not result[0][9] and result_ipo[0][5]:
+    #                     u_exchange = result_ipo[0][5]
+    #                     fields += f'Exchange = "{u_exchange}", '
+    #             else:
+    #                 if len(result_ipo) > 1:
+    #                     # 记录下
+    #                     baseCore.rePutIntoR(key, "Synchronize_data:More")
+    #                 # 可能不是上市企业
+    #                 # if fields:
+    #                 #     update_sql = update_sql.format(fields.rstrip(', '), f'"{com_code}"')
+    #                 #     print(update_sql)
+    #                 #     continue
+    #                 pass
+    #
    #         if fields:
    #             update_sql = update_sql.format(fields.rstrip(', '), f'"{com_code}"')
-                    #     print(update_sql)
-                    #     continue
-                    pass
-
-            if fields:
-                update_sql = update_sql.format(fields.rstrip(', '), f'"{com_code}"')
-                log.info(f'更新的sql语句--{update_sql}')
-                update_table(update_sql, cursor_, cnx_)
-        else:
-            result_ipo = search_formal_table('social_credit_code, securities_code, securities_short_name, securities_type, category,exchange', 'sys_base_enterprise_ipo', 'social_credit_code',
-                                             com_code, cursor)
-            if result_ipo:
-                SecuritiesCode = result_ipo[1]
-                SecuritiesShortName = result_ipo[2]
-                securities_type = result_ipo[3]
-                Category = result_ipo[4]
-                exchange = result_ipo[5]
-                sqlInsert = 'insert into EnterpriseInfo(CompanyName, SocialCode, EnglishName, SecuritiesCode, SecuritiesShortName, Place, isIPO, SecuritiesType, Category, Exchange, countryName) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
-                baseCore.cursor.execute(sqlInsert, (com_name, com_code, com_english_name, SecuritiesCode, 1, 1, securities_type, Category, exchange, '中国内地'))
-                baseCore.cnx.commit()
-                log.info(f'{com_name}==={com_name}===上市企业===插入成功')
-            else:
-                sqlInsert = 'insert into EnterpriseInfo(CompanyName, SocialCode, EnglishName, Place, isIPO, countryName) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
-                baseCore.cursor.execute(sqlInsert, (com_name, com_code, com_english_name, 1, 1, '中国内地'))
-                baseCore.cnx.commit()
-                log.info(f'{com_name}==={com_name}===非上市企业===插入成功')
\ No newline at end of file
+    #             log.info(f'更新的sql语句--{update_sql}')
+    #             update_table(update_sql, cursor_, cnx_)
+    #     else:
+    #         result_ipo = search_formal_table('social_credit_code, securities_code, securities_short_name, securities_type, category,exchange', 'sys_base_enterprise_ipo', 'social_credit_code',
+    #                                          com_code, cursor)
+    #         if result_ipo:
+    #             SecuritiesCode = result_ipo[1]
+    #             SecuritiesShortName = result_ipo[2]
+    #             securities_type = result_ipo[3]
+    #             Category = result_ipo[4]
+    #             exchange = result_ipo[5]
+    #             sqlInsert = 'insert into EnterpriseInfo(CompanyName, SocialCode, EnglishName, SecuritiesCode, SecuritiesShortName, Place, isIPO, SecuritiesType, Category, Exchange, countryName) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
+    #             baseCore.cursor.execute(sqlInsert, (com_name, com_code, com_english_name, SecuritiesCode, 1, 1, securities_type, Category, exchange, '中国内地'))
+    #             baseCore.cnx.commit()
+    #             log.info(f'{com_name}==={com_name}===上市企业===插入成功')
+    #         else:
+    #             sqlInsert = 'insert into EnterpriseInfo(CompanyName, SocialCode, EnglishName, Place, isIPO, countryName) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
+    #             baseCore.cursor.execute(sqlInsert, (com_name, com_code, com_english_name, 1, 1, '中国内地'))
+    #             baseCore.cnx.commit()
+    #             log.info(f'{com_name}==={com_name}===非上市企业===插入成功')
\ No newline at end of file
--- a/comData/Tyc/classtool.py
+++ b/comData/Tyc/classtool.py
@@ -16,7 +16,7 @@ cursor = baseCore.cursor
 db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017/', username='admin', password='ZZsn@9988').ZZSN[
    '天眼查登录信息']
 db_storage2 = pymongo.MongoClient('mongodb://114.115.221.202:27017/', username='admin', password='ZZsn@9988').ZZSN[
-    '股东信息']
+    '股东信息0621']


 class File():
@@ -164,6 +164,20 @@ class Info():
        db_storage2.update_one({'序号': str(no)}, {
            '$set': {'股东企业信用代码': dic_info['股东企业信用代码'], '股东企业标签': dic_info['股东企业标签']}})
        pass
+    def insert_into(self, dic_info):
+        if dic_info['股东序号序号']:
+
+            db_storage2.find_one_and_update(
+                {
+                '序号': str(dic_info['序号']),
+                "股东序号序号": str(dic_info['股东序号序号'])
+                },
+                {'$set': dic_info}, upsert=True)
+        else:
+
+            result = db_storage2.insert_one(dic_info)
+            print(result)
+            pass

 if __name__ == '__main__':
    # token = Token()

--- a/comData/Tyc/getTycId.py
+++ b/comData/Tyc/getTycId.py
@@ -64,7 +64,7 @@ taskType = '天眼查企业id/天眼查'

 @retry(tries=5, delay=3)
 def getTycIdByXYDM(com_name, s):
-    retData={'state':False, 'tycData':None, 'reput':True}
+    retData={'state': False, 'tycData': None, 'reput': True}
    url=f"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3?_={baseCore.getNowTime(3)}"
    # url=f"https://capi.tianyancha.com/cloud-tempest/search/suggest/v3"
    ip = baseCore.get_proxy()

--- a/comData/Tyc/shareHolderInfo.py
+++ b/comData/Tyc/shareHolderInfo.py
@@ -25,7 +25,7 @@ taskType = '天眼查/股东信息'
 from classtool import Token, Info

 token = Token()
-info = Info()
+Info = Info()

 @retry(tries=3, delay=1)
 def get_html(tycid, driver, dic_info):
@@ -90,22 +90,6 @@ def get_page(url, s, headers):
 @retry(tries=5, delay=3)
 def get_page1(url, s, headers):
    ip = baseCore.get_proxy()
-    header = {
-        'Accept': 'application/json, text/plain, */*',
-        'Accept-Language': 'zh-CN,zh;q=0.9',
-        'Connection': 'keep-alive',
-        'Content-Type': 'application/json',
-        'Sec-Fetch-Dest': 'empty',
-        'Sec-Fetch-Mode': 'cors',
-        'Sec-Fetch-Site': 'same-site',
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
-        'X-AUTH-TOKEN': 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzYzNjcxMTc0NiIsImlhdCI6MTcxNDk1Njg3MywiZXhwIjoxNzE3NTQ4ODczfQ.qMEvtETT7RS3Rhwq9idu5H2AKMxc2cjtr5bDDW6C6yOFKR-ErgDwT4SOBX9PB2LWDexAG2hNaeAvn6swr-n6VA',
-        'X-TYCID': 'dad485900fcc11ee8c0de34479b5b939',
-        'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
-        'sec-ch-ua-mobile': '?0',
-        'sec-ch-ua-platform': '"Windows"',
-        'version': 'TYC-Web'
-    }
    res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
    if res.status_code != 200:
        raise
@@ -181,7 +165,7 @@ def doJob():
    for i in range(1000):
        # while True:
        # todo:设置cookies的使用
-        dic_info = {}
+
        headers = {
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Content-Type': 'application/json',
@@ -196,7 +180,7 @@ def doJob():
            continue
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
        item = baseCore.redicPullData('shareHolderInfo')
-        # item = '1|914401010885128005'
+        # item = '900|微创心律管理|None|罗七一|健康科技|￥ 90 亿|￥ 90 亿|￥ 92 亿|823|861|911|ZZSN231108150127681|MicroPort Cardiac Rhythm Management International Limited|中国|None'
        # 判断 如果Redis中已经没有数据，则等待
        # social_code = '91110108780992804C'
        if item == None:
@@ -204,8 +188,31 @@ def doJob():
            continue
        start = time.time()
        no = item.split('|')[0]
-        social_code = item.split('|')[1]
-
+        social_code = item.split('|')[11]
+
+        recept_name = item.split('|')[12]
+        dic_info = {"序号": item.split('|')[0],
+                    "企业名称（榜单公布）": item.split('|')[1],
+                    "企业别称": item.split('|')[2],
+                    "门人/联合创始": item.split('|')[3],
+                    "行业": item.split('|')[4],
+                    "企业估值（2022年）": item.split('|')[5],
+                    "企业估值（2023年）": item.split('|')[6],
+                    "企业估值（2024年）": item.split('|')[7],
+                    "2022年独角兽排名": item.split('|')[8],
+                    "2023年独角兽排名": item.split('|')[9],
+                    "2024年独角兽排名": item.split('|')[10],
+                    "企业信用代码（中国内地企业需填写信用代码）": item.split('|')[11],
+                    "企业名称（企查查）": item.split('|')[12],
+                    "所属国家": item.split('|')[13]
+                    }
+        if "ZZSN" in social_code:
+            dic_info['前十大股东名称'] = ''
+            dic_info['持股比例'] = ''
+            dic_info['认缴出资额'] = ''
+            dic_info['股东序号序号'] = ''
+            Info.insert_into(dic_info)
+            break
        try:
            try:
                data = baseCore.getInfomation(social_code)
@@ -237,7 +244,8 @@ def doJob():
                    tycid = ''
            if tycid == None or tycid == '':
                try:
-                    retData = getTycIdByXYDM(xydm, s)
+                    retData = getTycIdByXYDM(recept_name, s)
+                    # retData = getTycIdByXYDM("极星汽车销售有限公司", s)
                    if retData['state']:
                        tycid = retData['tycData']['id']

@@ -269,16 +277,20 @@ def doJob():
                baseCore.rePutIntoR('shareHolderInfo', item)
                log.info(f"{no}---{xydm}----{tycid}----请求失败----重新放入redis")
                time.sleep(3)
-                continue
+                break
            elif charge == -2:
                # 该企业没有股东信息

                token.updateTokeen(id_cookie, 2)
-                baseCore.rePutIntoR('shareHolderInfo', item)
+                # baseCore.rePutIntoR('shareHolderInfo', item)
                log.info(f"{no}---{xydm}----{tycid}----没有股东信息或需要滑动验证----重新放入redis")
                time.sleep(5)
-                # log.info(f"{id}---{xydm}----{tycid}----没有核心人员")
-                continue
+                dic_info['前十大股东名称'] = ''
+                dic_info['持股比例'] = ''
+                dic_info['认缴出资额'] = ''
+                dic_info['股东序号序号'] = ''
+                Info.insert_into(dic_info)
+                break

            else:
                log.info(f"{no}---{xydm}----{tycid}")
@@ -310,7 +322,7 @@ def doJob():
                    flag = 1
                else:
                    if total_page3 == charge:
-                        url = 'https://capi.tianyancha.com/cloud-listed-company/listed/holder/topTen?_={}&gid={}&pageSize=20&pageNum={}&percentLevel=-100&type=1'
+                        url = 'https://capi.tianyancha.com/cloud-listed-company/listed/holder/topTen?&gid={}&pageSize=20&pageNum={}&percentLevel=-100&type=1'
                        total_page = total_page3
                        data_page_one = data_page3
                        flag = 3
@@ -325,15 +337,52 @@ def doJob():
                baseCore.rePutIntoR('shareHolderInfo', item)
                log.info(f'==={social_code}=====总数请求失败===重新放入redis====')
                continue
-            # # todo:获取页数
-            # total_page = 34
-            # flag = 2
-            # todo: 测试程序是否执行到这一步
+            # todo:获取页数
            log.info(f'总数为{total_page}')
-
+            if int(total_page % 20) == 0:
+                maxpage = int((total_page / 20) + 1)
+            else:
+                maxpage = int((total_page / 20) + 1) + 1
+            for page in range(1, maxpage):
+                if page == 1:
                    data_page = data_page_one
                    errorCode = data_page['errorCode']
-
+                else:
+                    res = None
+                    for d in range(3):
+                        ip = baseCore.get_proxy()
+                        if flag == 1:
+                            url_ = url
+                            payload = {"gid": f"{tycid}", "pageSize": 10, "pageNum": f"{page}", "sortField": "",
+                                       "sortType": "-100", "historyType": 1}
+                            try:
+                                res = s.post(url=url_, headers=headers, data=json.dumps(payload), proxies=ip,
+                                             timeout=(5, 10))
+                            except requests.exceptions.RequestException as e:
+                                log.info(e)
+                                time.sleep(1)
+                                continue
+                            data_page = res.json()
+                            errorCode = res.json()['errorCode']
+                            if errorCode != 0:
+                                continue
+                            else:
+                                break
+                        else:
+                            url_ = url.format(tycid, page)
+                            try:
+                                res = s.get(url_, headers=headers, proxies=ip, timeout=(5, 10))  # ,verify=False
+                            except requests.exceptions.RequestException as e:
+                                log.info(e)
+                                time.sleep(1)
+                                continue
+                            data_page = res.json()
+                            errorCode = res.json()['errorCode']
+                            if errorCode != 0:
+                                continue
+                            else:
+                                break
+                    res.close()
                if errorCode == 0:
                    pass
                else:
@@ -359,27 +408,40 @@ def doJob():
                # res.close()
                log.info(f'----flag:{flag}----')
                log.info(f'-----list_all:{len(list_all)}----')
+
+                for idx,holder_info in enumerate(list_all):
                    shareHolderName, percent = '', ''
                    if flag == 1:
-                holder_info = list_all[0]
                        shareHolderName = holder_info['shareHolderName']
                        percent = holder_info['percent']
+                        capitalTotal = holder_info['capitalTotal']

                    elif flag == 3:
-                holder_info = list_all[0]
                        shareHolderName = holder_info['name']
                        percent = holder_info['proportion']
+                        capitalTotal = ''

                    else:
-                holder_info = list_all[0]
                        shareHolderName = holder_info['holder_name']
                        percent = holder_info['longHeldRatioWithUnit']
+                        capitalTotal = ''
                    if shareHolderName and percent:
-                dic_info['最大持股名称'] = shareHolderName
+                        if page == 1:
+                            dic_info['股东序号序号'] = idx + 1
+                        else:
+                            dic_info['股东序号序号'] = idx + 1 + (10 * (page-1))
+                        dic_info['前十大股东名称'] = shareHolderName
                        dic_info['持股比例'] = percent
-            # todo: 更新字段
-            # info.update_holder(no, dic_info)
+                        dic_info['认缴出资额'] = capitalTotal
+                        # todo: 插入一条新纪录
+                        log.info(dic_info)
+                        try:
+                            del dic_info['_id']
+                        except:
+                            pass
+                        Info.insert_into(dic_info)
                        log.info('=========成功======')
+
                token.updateTokeen(id_cookie, 3)
                # time.sleep(randint(5,10))
                time.sleep(5)
@@ -395,7 +457,7 @@ def doJob():
            takeTime = baseCore.getTimeCost(start, time.time())
            baseCore.recordLog(social_code, taskType, state, takeTime, '', f'获取企业信息失败--{e}')
            time.sleep(5)
-        break
+        # break




--- a/comData/Tyc/test.py
+++ b/comData/Tyc/test.py
-import json
-
+import openpyxl
 import redis
-from bs4 import BeautifulSoup
-import langid
-
-from base.BaseCore import BaseCore
-baseCore =BaseCore()
-import pymysql
-# print(baseCore.detect_language("是对jhjjhjhhjjhjhjh的浮点数"))
-# cnx_ = baseCore.cnx
-# cursor_ = baseCore.cursor
-cnx_ = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
-                                   charset='utf8mb4')
-cursor_ = cnx_.cursor()
-# updateBeginSql = f"update Tfbs set state3=%s where col3=%s "
-# # print(updateBeginSql)
-# cursor_.execute(updateBeginSql,(200,'91350000158142711F'))
-# cnx_.commit()
-
-import time
-# from getTycId import getTycIdByXYDM
-# social_code = '91440101231247350J'
-# data = baseCore.getInfomation(social_code)
-# tycid = data[11]
-# if tycid == None:
-#     print(data)
-#     retData = getTycIdByXYDM(social_code)
-#     tycid = retData['tycData']['id']
-#     print(tycid)
-
-# time_struct = time.localtime(int(1692762780000 / 1000))  # 首先把时间戳转换为结构化时间
-# time_format = time.strftime("%Y-%m-%d %H-%M-%S", time_struct)  # 把结构化时间转换为格式化时间
-# print(time_format)
-
-# r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=6)
-# #原键名
-# key1 = 'CorPersonEnterpriseFbs:gnqy_socialCode'
-# #目标键名
-# key2 = 'NewsEnterpriseFbs:gnqy_socialCode'
-# values = r.lrange(key1,0,-1)
-# for value in values:
-#     r.rpush(key2, value)
-#
-# # 关闭Redis连接
-# r.close()
-
-
-list_all = []
-if list_all:
-    print(len(list_all))
-else:
-    print('---')
-
+# 先采两千强和独角兽
+# 连接到Redis服务器
+redis_client = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn', db=6)
+
+# 打开Excel文件
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2022年福布斯2000强榜单（已排除2023年）.xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2023年福布斯2000强.xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2023年独角兽企业（已排除2024年）.xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2024胡润独角兽(4).xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2022年世界500强企业39家（已排除23年上榜企业）2.xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2023年世界500强名单.xlsx')
+# workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2023年欧盟2500（已排除2022年）.xlsx')
+workbook = openpyxl.load_workbook(r'D:\kkwork\企业数据\数据组提供\企业裁员数据\2022年欧盟2500强.xlsx')
+
+# 选择要读取的工作表
+worksheet = workbook['Sheet1']
+# worksheet = workbook['sheet1']
+
+# 选择要读取的列
+column_index = 0  # 选择第2列
+
+# 遍历指定列的单元格，并将值放入Redis列表
+for row in worksheet.iter_rows(values_only=True):
+    try:
+        cell_value = row[1]
+    except:
+        print(row[1])
+        continue
+    # print(type(cell_value))
+    # print(cell_value)
+    if row[0] == '序列' or row[0] == '序号' or row[0] == '排序':
+        continue
+
+    # 309
+    # item = ""+ "|"+str(row[3]) + "|" + str(row[2])+ "|" + str(row[1])+ "|" + "2022年福布斯2000强"
+    # item = str(row[2])+ "|"+str(row[5]) + "|" + str(row[3])+ "|" + str(row[1])+ "|" + "2023年福布斯2000强"
+    # item = str(row[2])+ "|"+str(row[3]) + "|" + str(row[3])+ "|" + str(row[1])+ "|" + "2023年独角兽"
+    # item = str(row[2])+ "|"+str(row[4]) + "|" + str(row[4])+ "|" + str(row[0])+ "|" + "2024年独角兽"
+    # item = str(row[1])+ "|"+str(row[5]) + "|" + str(row[4])+ "|" + str(row[0])+ "|" + "2022年世界500强"
+    # item = str(row[1])+ "|"+str(row[3]) + "|" + str(row[5])+ "|" + str(row[6])+ "|" + "2023年世界500强"
+    # item = ""+ "|"+str(row[3]) + "|" + str(row[2])+ "|" + str(row[1])+ "|" + "2023年欧盟2500"
+    item = str(row[2])+ "|"+str(row[5]) + "|" + str(row[4])+ "|" + str(row[1])+ "|" + "2022年欧盟2500"
+    redis_client.rpush('GOOGLE_KEYWORDS:COMPANY_NAME', item)
+    # redis_client.rpush('BAIDU_KEYWORDS:COMPANY_NAME', item)
+    print(item)
+    # break
+# 关闭Excel文件
+workbook.close()
--- a/comData/weixin_solo/wxList.py
+++ b/comData/weixin_solo/wxList.py
@@ -261,6 +261,7 @@ def getPageData(dic_url, page, dic_user_count):
        return True, dic_user_count
    # 修改token使用时间
    updateTokeen(token, 3)
+    pagecount = json_search['app_msg_cnt']   # 837
    # 保存数据到数据库
    return insertWxList(dic_url, json_search, page, user_name), dic_user_count

@@ -280,6 +281,7 @@ def getWxList(infoSourceCode, dic_user_count):
    origin = dic_url['name']
    biz = dic_url['biz']

+    # retFlag, dic_user_count = getPageData(dic_url, 1, dic_user_count)
    for page in range(1, 6):
        retFlag, dic_user_count = getPageData(dic_url, page, dic_user_count)
        time.sleep(random.randint(60, 181))
@@ -311,12 +313,12 @@ def getnumber_redis():


 if __name__ == "__main__":
-    getFromSql()
+    # getFromSql()

    # numbers = getnumber_redis()
    # log.info("当前批次采集公众号个数{}".format(numbers))
    # time.sleep(3)
-    # dic_user_count = {}
+    dic_user_count = {}
    # # dic_user_count = {
    # #     'name': '',
    # #     'use_count': 0,
@@ -344,5 +346,5 @@ if __name__ == "__main__":
    #     for key, value in dic_user_count.items():
    #         log.info(f"====账号{key}，使用次数{value}")
    #     # break
-    # # infoSourceCode = 'IN-20220917-0159'
-    # # getWxList(infoSourceCode)
+    infoSourceCode = 'IN-20231110-0003'
+    getWxList(infoSourceCode, dic_user_count)