提交 41a25e25 作者: XveLingKun

中英对照标签调整

上级 bf7737d6
......@@ -26,7 +26,7 @@ if __name__ == "__main__":
name = input('所属用户:')
driver = create_driver()
driver.get(url)
time.sleep(60)
time.sleep(80)
cookies = driver.get_cookies()
# print(driver.get_cookies())
......
"""
"""
......@@ -15,7 +15,7 @@ log = baseCore.getLogger()
# 判断股票代码是否存在
def check_code(com_code):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
res = r.exists('com_caiwushuju_code::'+com_code)
#如果key存在 则不是第一次采集该企业, res = 1
if res:
......@@ -24,7 +24,7 @@ def check_code(com_code):
return True #表示是第一次采集
def check_date(com_code,info_date):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn', db=3)
res = r.sismember('com_caiwushuju_code::'+com_code, info_date) # 注意是 保存set的方式
if res:
return True
......@@ -33,7 +33,7 @@ def check_date(com_code,info_date):
# 将采集后的股票代码对应的报告期保存进redis
def add_date(com_code,date_list):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn',db=3)
#遍历date_list 放入redis
for date in date_list:
res = r.sadd('com_caiwushuju_code::'+com_code,date)
......@@ -158,7 +158,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
......@@ -212,7 +225,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
......@@ -273,7 +299,7 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
soup_name = BeautifulSoup(res_name.content, 'html.parser')
# 第一个表
try:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qy'})
script_zcfzb = soup_name.find('script', {'id': 'cccccccccc_qy'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else:
......@@ -341,7 +367,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
......@@ -368,7 +407,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
for one_info in soup_lrb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
......@@ -395,7 +447,20 @@ def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType)
break
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
# todo:24-4-12 添加判断标签是否是隐藏标签,是则跳过,找下一个标签
info_tag_list = one_info.find_all('span')
info_name = ''
for info_tag in info_tag_list:
if 'display:none' in info_tag.get('style'):
continue
else:
info_name = info_tag.text
break
if info_name:
pass
else:
continue
# info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
......@@ -455,7 +520,7 @@ def getReportTime():
list_month = ['-12-31', '-09-30', '-06-30', '-03-31']
for year in range(year, 2018, -1):
for month in list_month:
for month in list_month[::-1]:
date = str(year) + month
#todo:判断拼接的报告期是否大于当前日期,如果大于当前日期,怎说明还没到这个时间,跳过
current_date = current_date_.strftime('%Y-%m-%d')
......@@ -513,6 +578,8 @@ def job(taskType,cnx,cursor):
# securities_code = code
# else:
# continue
# 测试:
# securities_code = '601179'
if exchange == 1:
com_code = 'bj' + securities_code
if exchange == 2:
......
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
}
if __name__ == "__main__":
url = 'https://tradingeconomics.com/united-states/indicators'
requests.get(url, headers=headers)
if requests.status_codes == 200:
pass
pass
\ No newline at end of file
......@@ -311,38 +311,38 @@ def getnumber_redis():
if __name__ == "__main__":
# getFromSql()
numbers = getnumber_redis()
log.info("当前批次采集公众号个数{}".format(numbers))
time.sleep(3)
dic_user_count = {}
# dic_user_count = {
# 'name': '',
# 'use_count': 0,
# 'gzh_count': 0
# }
start = time.time()
log.info(f"开始时间{baseCore.getNowTime(1)}")
while True:
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# infoSourceCode = 'IN-20220609-57899'
if infoSourceCode == 'None' or infoSourceCode == None:
log.info("redis已经没有数据了,重新放置数据")
log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
# time.sleep(60)
# numbers = getnumber_redis()
# log.info("当前批次采集公众号个数{}".format(numbers))
break
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# continue
getWxList(infoSourceCode, dic_user_count)
if dic_user_count:
for key, value in dic_user_count.items():
log.info(f"====账号{key},使用次数{value}")
# break
# infoSourceCode = 'IN-20220917-0159'
# getWxList(infoSourceCode)
getFromSql()
# numbers = getnumber_redis()
# log.info("当前批次采集公众号个数{}".format(numbers))
# time.sleep(3)
# dic_user_count = {}
# # dic_user_count = {
# # 'name': '',
# # 'use_count': 0,
# # 'gzh_count': 0
# # }
# start = time.time()
# log.info(f"开始时间{baseCore.getNowTime(1)}")
# while True:
#
# infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# # infoSourceCode = 'IN-20220609-57899'
# if infoSourceCode == 'None' or infoSourceCode == None:
# log.info("redis已经没有数据了,重新放置数据")
# log.info(f"采集完一轮公众号耗时{baseCore.getTimeCost(start, time.time())}")
#
# # time.sleep(60)
# # numbers = getnumber_redis()
# # log.info("当前批次采集公众号个数{}".format(numbers))
# break
# # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
# # continue
#
# getWxList(infoSourceCode, dic_user_count)
# if dic_user_count:
# for key, value in dic_user_count.items():
# log.info(f"====账号{key},使用次数{value}")
# # break
# # infoSourceCode = 'IN-20220917-0159'
# # getWxList(infoSourceCode)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论