提交 41a25e25 作者: XveLingKun

中英对照标签调整

上级 bf7737d6
...@@ -26,7 +26,7 @@ if __name__ == "__main__": ...@@ -26,7 +26,7 @@ if __name__ == "__main__":
name = input('所属用户:') name = input('所属用户:')
driver = create_driver() driver = create_driver()
driver.get(url) driver.get(url)
time.sleep(60) time.sleep(80)
cookies = driver.get_cookies() cookies = driver.get_cookies()
# print(driver.get_cookies()) # print(driver.get_cookies())
......
""" """
...@@ -15,7 +15,7 @@ log = baseCore.getLogger() ...@@ -15,7 +15,7 @@ log = baseCore.getLogger()
# 判断股票代码是否存在 # 判断股票代码是否存在
def check_code(com_code): def check_code(com_code):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3) r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
res = r.exists('com_caiwushuju_code::'+com_code) res = r.exists('com_caiwushuju_code::'+com_code)
#如果key存在 则不是第一次采集该企业, res = 1 #如果key存在 则不是第一次采集该企业, res = 1
if res: if res:
...@@ -24,7 +24,7 @@ def check_code(com_code): ...@@ -24,7 +24,7 @@ def check_code(com_code):
return True #表示是第一次采集 return True #表示是第一次采集
def check_date(com_code,info_date): def check_date(com_code,info_date):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3) r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn', db=3)
res = r.sismember('com_caiwushuju_code::'+com_code, info_date) # 注意是 保存set的方式 res = r.sismember('com_caiwushuju_code::'+com_code, info_date) # 注意是 保存set的方式
if res: if res:
return True return True
...@@ -33,7 +33,7 @@ def check_date(com_code,info_date): ...@@ -33,7 +33,7 @@ def check_date(com_code,info_date):
# 将采集后的股票代码对应的报告期保存进redis # 将采集后的股票代码对应的报告期保存进redis
def add_date(com_code,date_list): def add_date(com_code,date_list):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3) r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
#遍历date_list 放入redis #遍历date_list 放入redis
for date in date_list: for date in date_list:
res = r.sadd('com_caiwushuju_code::'+com_code,date) res = r.sadd('com_caiwushuju_code::'+com_code,date)
...@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_zcfzb.find_all('tr')[2:]: for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text: if 'value.' not in one_info.text:
continue continue
info_name = one_info.find('span').text # todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name: if '审计意见' in info_name:
continue continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0] info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
...@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break break
if 'value.' not in one_info.text: if 'value.' not in one_info.text:
continue continue
info_name = one_info.find('span').text # todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name: if '审计意见' in info_name:
continue continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0] info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
...@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
soup_name = BeautifulSoup(res_name.content, 'html.parser') soup_name = BeautifulSoup(res_name.content, 'html.parser')
# 第一个表 # 第一个表
try: try:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qy'}) script_zcfzb = soup_name.find('script', {'id': 'cccccccccc_qy'})
if script_zcfzb: if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml') soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else: else:
...@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_zcfzb.find_all('tr')[2:]: for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text: if 'value.' not in one_info.text:
continue continue
info_name = one_info.find('span').text # todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name: if '审计意见' in info_name:
continue continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0] info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
...@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_lrb.find_all('tr')[2:]: for one_info in soup_lrb.find_all('tr')[2:]:
if 'value.' not in one_info.text: if 'value.' not in one_info.text:
continue continue
info_name = one_info.find('span').text # todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name: if '审计意见' in info_name:
continue continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0] info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
...@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType) ...@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break break
if 'value.' not in one_info.text: if 'value.' not in one_info.text:
continue continue
info_name = one_info.find('span').text # todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name: if '审计意见' in info_name:
continue continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0] info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
...@@ -455,7 +520,7 @@ def getReportTime(): ...@@ -455,7 +520,7 @@ def getReportTime():
list_month = ['-12-31', '-09-30', '-06-30', '-03-31'] list_month = ['-12-31', '-09-30', '-06-30', '-03-31']
for year in range(year, 2018, -1): for year in range(year, 2018, -1):
for month in list_month: for month in list_month[::-1]:
date = str(year) + month date = str(year) + month
#todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过 #todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过
current_date = current_date_.strftime('%Y-%m-%d') current_date = current_date_.strftime('%Y-%m-%d')
...@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor): ...@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor):
# securities_code = code # securities_code = code
# else: # else:
# continue # continue
# 测试:
# securities_code = '601179'
if exchange == 1: if exchange == 1:
com_code = 'bj' + securities_code com_code = 'bj' + securities_code
if exchange == 2: if exchange == 2:
......
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
}
if __name__ == "__main__":
url = 'https://tradingeconomics.com/united-states/indicators'
requests.get(url, headers=headers)
if requests.status_codes == 200:
pass
pass
\ No newline at end of file
...@@ -311,38 +311,38 @@ def getnumber_redis(): ...@@ -311,38 +311,38 @@ def getnumber_redis():
if __name__ == "__main__": if __name__ == "__main__":
# getFromSql() getFromSql()
numbers = getnumber_redis() # numbers = getnumber_redis()
log.info("当前批次采集公众号个数{}".format(numbers)) # log.info("当前批次采集公众号个数{}".format(numbers))
time.sleep(3) # time.sleep(3)
dic_user_count = {} # dic_user_count = {}
# dic_user_count = { # # dic_user_count = {
# 'name': '', # # 'name': '',
# 'use_count': 0, # # 'use_count': 0,
# 'gzh_count': 0 # # 'gzh_count': 0
# } # # }
start = time.time() # start = time.time()
log.info(f"开始时间{baseCore.getNowTime(1)}") # log.info(f"开始时间{baseCore.getNowTime(1)}")
while True: # while True:
#
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode') # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# infoSourceCode = 'IN-20220609-57899' # # infoSourceCode = 'IN-20220609-57899'
if infoSourceCode == 'None' or infoSourceCode == None: # if infoSourceCode == 'None' or infoSourceCode == None:
log.info("redis已经没有数据了,重新放置数据") # log.info("redis已经没有数据了,重新放置数据")
log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}") # log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
#
# time.sleep(60) # # time.sleep(60)
# numbers = getnumber_redis() # # numbers = getnumber_redis()
# log.info("当前批次采集公众号个数{}".format(numbers)) # # log.info("当前批次采集公众号个数{}".format(numbers))
break # break
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode') # # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# continue # # continue
#
getWxList(infoSourceCode, dic_user_count) # getWxList(infoSourceCode, dic_user_count)
if dic_user_count: # if dic_user_count:
for key, value in dic_user_count.items(): # for key, value in dic_user_count.items():
log.info(f"====账号{key},使用次数{value}") # log.info(f"====账号{key},使用次数{value}")
# break # # break
# infoSourceCode = 'IN-20220917-0159' # # infoSourceCode = 'IN-20220917-0159'
# getWxList(infoSourceCode) # # getWxList(infoSourceCode)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论