中英对照标签调整

41a25e25 · XveLingKun · bf7737d6 · 41a25e25 · 41a25e25 · 41a25e25
--- a/comData/Tyc/get_tyc_cookies.py
+++ b/comData/Tyc/get_tyc_cookies.py
@@ -26,7 +26,7 @@ if __name__ == "__main__":
    name = input('所属用户:')
    driver = create_driver()
    driver.get(url)
-    time.sleep(60)
+    time.sleep(80)

    cookies = driver.get_cookies()
    # print(driver.get_cookies())

--- a/comData/caiwushuju/东方财富网财务数据.py
+++ b/comData/caiwushuju/东方财富网财务数据.py
-"""
+"""
@@ -15,7 +15,7 @@ log = baseCore.getLogger()

 # 判断股票代码是否存在
 def check_code(com_code):
-    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+    r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
    res = r.exists('com_caiwushuju_code::'+com_code)
    #如果key存在 则不是第一次采集该企业， res = 1
    if res:
@@ -24,7 +24,7 @@ def check_code(com_code):
        return True #表示是第一次采集

 def check_date(com_code,info_date):
-    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
+    r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn', db=3)
    res = r.sismember('com_caiwushuju_code::'+com_code, info_date)  # 注意是 保存set的方式
    if res:
        return True
@@ -33,7 +33,7 @@ def check_date(com_code,info_date):

 # 将采集后的股票代码对应的报告期保存进redis
 def add_date(com_code,date_list):
-    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+    r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
    #遍历date_list 放入redis
    for date in date_list:
        res = r.sadd('com_caiwushuju_code::'+com_code,date)
@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
            for one_info in soup_zcfzb.find_all('tr')[2:]:
                if 'value.' not in one_info.text:
                    continue
-                info_name = one_info.find('span').text
+            # todo:24-4-12 添加判断标签是否是隐藏标签，是则跳过，找下一个标签
+                info_tag_list = one_info.find_all('span')
+                info_name = ''
+                for info_tag in info_tag_list:
+                    if 'display:none' in info_tag.get('style'):
+                        continue
+                    else:
+                        info_name = info_tag.text
+                        break
+                if info_name:
+                    pass
+                else:
+                    continue
+                # info_name = one_info.find('span').text
                if '审计意见' in info_name:
                    continue
                info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                    break
                if 'value.' not in one_info.text:
                    continue
-                info_name = one_info.find('span').text
+                # todo:24-4-12 添加判断标签是否是隐藏标签，是则跳过，找下一个标签
+                info_tag_list = one_info.find_all('span')
+                info_name = ''
+                for info_tag in info_tag_list:
+                    if 'display:none' in info_tag.get('style'):
+                        continue
+                    else:
+                        info_name = info_tag.text
+                        break
+                if info_name:
+                    pass
+                else:
+                    continue
+                # info_name = one_info.find('span').text
                if '审计意见' in info_name:
                    continue
                info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                soup_name = BeautifulSoup(res_name.content, 'html.parser')
                # 第一个表
                try:
-                    script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qy'})
+                    script_zcfzb = soup_name.find('script', {'id': 'cccccccccc_qy'})
                    if script_zcfzb:
                        soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
                    else:
@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                for one_info in soup_zcfzb.find_all('tr')[2:]:
                    if 'value.' not in one_info.text:
                        continue
-                    info_name = one_info.find('span').text
+                    # todo:24-4-12 添加判断标签是否是隐藏标签，是则跳过，找下一个标签
+                    info_tag_list = one_info.find_all('span')
+                    info_name = ''
+                    for info_tag in info_tag_list:
+                        if 'display:none' in info_tag.get('style'):
+                            continue
+                        else:
+                            info_name = info_tag.text
+                            break
+                    if info_name:
+                        pass
+                    else:
+                        continue
+                    # info_name = one_info.find('span').text
                    if '审计意见' in info_name:
                        continue
                    info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                for one_info in soup_lrb.find_all('tr')[2:]:
                    if 'value.' not in one_info.text:
                        continue
-                    info_name = one_info.find('span').text
+                    # todo:24-4-12 添加判断标签是否是隐藏标签，是则跳过，找下一个标签
+                    info_tag_list = one_info.find_all('span')
+                    info_name = ''
+                    for info_tag in info_tag_list:
+                        if 'display:none' in info_tag.get('style'):
+                            continue
+                        else:
+                            info_name = info_tag.text
+                            break
+                    if info_name:
+                        pass
+                    else:
+                        continue
+                    # info_name = one_info.find('span').text
                    if '审计意见' in info_name:
                        continue
                    info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
                        break
                    if 'value.' not in one_info.text:
                        continue
-                    info_name = one_info.find('span').text
+                    # todo:24-4-12 添加判断标签是否是隐藏标签，是则跳过，找下一个标签
+                    info_tag_list = one_info.find_all('span')
+                    info_name = ''
+                    for info_tag in info_tag_list:
+                        if 'display:none' in info_tag.get('style'):
+                            continue
+                        else:
+                            info_name = info_tag.text
+                            break
+                    if info_name:
+                        pass
+                    else:
+                        continue
+                    # info_name = one_info.find('span').text
                    if '审计意见' in info_name:
                        continue
                    info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
@@ -455,7 +520,7 @@ def getReportTime():

    list_month = ['-12-31', '-09-30', '-06-30', '-03-31']
    for year in range(year, 2018, -1):
-        for month in list_month:
+        for month in list_month[::-1]:
            date = str(year) + month
            #todo:判断拼接的报告期是否大于当前日期，如果大于当前日期，怎说明还没到这个时间，跳过
            current_date = current_date_.strftime('%Y-%m-%d')
@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor):
        #         securities_code = code
        #     else:
        #         continue
+        # 测试：
+        # securities_code = '601179'
        if exchange == 1:
            com_code = 'bj' + securities_code
        if exchange == 2:

--- a/comData/dingzhi/trading_economics.py
+++ b/comData/dingzhi/trading_economics.py
+
+import requests
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
+}
+
+if __name__ == "__main__":
+    url = 'https://tradingeconomics.com/united-states/indicators'
+    requests.get(url, headers=headers)
+    if requests.status_codes == 200:
+        pass
+    pass
\ No newline at end of file
--- a/comData/weixin_solo/wxList.py
+++ b/comData/weixin_solo/wxList.py
@@ -311,38 +311,38 @@ def getnumber_redis():


 if __name__ == "__main__":
-    # getFromSql()
-
-    numbers = getnumber_redis()
-    log.info("当前批次采集公众号个数{}".format(numbers))
-    time.sleep(3)
-    dic_user_count = {}
-    # dic_user_count = {
-    #     'name': '',
-    #     'use_count': 0,
-    #     'gzh_count': 0
-    # }
-    start = time.time()
-    log.info(f"开始时间{baseCore.getNowTime(1)}")
-    while True:
-
-        infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
-        # infoSourceCode = 'IN-20220609-57899'
-        if infoSourceCode == 'None' or infoSourceCode == None:
-            log.info("redis已经没有数据了，重新放置数据")
-            log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
-
-            # time.sleep(60)
-            # numbers = getnumber_redis()
-            # log.info("当前批次采集公众号个数{}".format(numbers))
-            break
-            # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
-            # continue
-
-        getWxList(infoSourceCode, dic_user_count)
-    if dic_user_count:
-        for key, value in dic_user_count.items():
-            log.info(f"====账号{key}，使用次数{value}")
-        # break
-    # infoSourceCode = 'IN-20220917-0159'
-    # getWxList(infoSourceCode)
+    getFromSql()
+
+    # numbers = getnumber_redis()
+    # log.info("当前批次采集公众号个数{}".format(numbers))
+    # time.sleep(3)
+    # dic_user_count = {}
+    # # dic_user_count = {
+    # #     'name': '',
+    # #     'use_count': 0,
+    # #     'gzh_count': 0
+    # # }
+    # start = time.time()
+    # log.info(f"开始时间{baseCore.getNowTime(1)}")
+    # while True:
+    #
+    #     infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
+    #     # infoSourceCode = 'IN-20220609-57899'
+    #     if infoSourceCode == 'None' or infoSourceCode == None:
+    #         log.info("redis已经没有数据了，重新放置数据")
+    #         log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
+    #
+    #         # time.sleep(60)
+    #         # numbers = getnumber_redis()
+    #         # log.info("当前批次采集公众号个数{}".format(numbers))
+    #         break
+    #         # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
+    #         # continue
+    #
+    #     getWxList(infoSourceCode, dic_user_count)
+    # if dic_user_count:
+    #     for key, value in dic_user_count.items():
+    #         log.info(f"====账号{key}，使用次数{value}")
+    #     # break
+    # # infoSourceCode = 'IN-20220917-0159'
+    # # getWxList(infoSourceCode)