提交 864508c6 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

...@@ -27,7 +27,7 @@ headers = { ...@@ -27,7 +27,7 @@ headers = {
'Cache-Control': 'no-cache', 'Cache-Control': 'no-cache',
'Pragma': 'no-cache' 'Pragma': 'no-cache'
} }
taskType = '企业动态/新浪财经' taskType = '企业动态/新浪财经/国内'
pattern = r"\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}" pattern = r"\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}"
# 获取响应页面 # 获取响应页面
......
...@@ -28,7 +28,7 @@ headers = { ...@@ -28,7 +28,7 @@ headers = {
'Cache-Control': 'no-cache', 'Cache-Control': 'no-cache',
'Pragma': 'no-cache' 'Pragma': 'no-cache'
} }
taskType = '企业动态/新浪财经' taskType = '企业动态/新浪财经/香港'
# 判断时间是否是正确格式 # 判断时间是否是正确格式
...@@ -51,7 +51,7 @@ def format_time(time_str): ...@@ -51,7 +51,7 @@ def format_time(time_str):
def getrequests(url): def getrequests(url):
ip = baseCore.get_proxy() ip = baseCore.get_proxy()
req = requests.get(url, headers=headers,proxies=ip) req = requests.get(url, headers=headers,proxies=ip)
req.encoding = req.apparent_encoding req.encoding = 'gbk'
soup = BeautifulSoup(req.text, 'html.parser') soup = BeautifulSoup(req.text, 'html.parser')
return soup return soup
...@@ -117,7 +117,7 @@ def getDic(social_code, title, href, pub_time): ...@@ -117,7 +117,7 @@ def getDic(social_code, title, href, pub_time):
# state = 0 # state = 0
# takeTime = baseCore.getTimeCost(start_time, time.time()) # takeTime = baseCore.getTimeCost(start_time, time.time())
# baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===发送Kafka失败') # baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===发送Kafka失败')
# return 1 return 1
# 数据发送至Kafka # 数据发送至Kafka
...@@ -165,10 +165,10 @@ def selectUrl(url, social_code): ...@@ -165,10 +165,10 @@ def selectUrl(url, social_code):
def doJob(): def doJob():
# while True: while True:
start_time = time.time() start_time = time.time()
# social_code = baseCore.redicPullData('NewsEnterprise:xgqy_nyse_socialCode') social_code = baseCore.redicPullData('NewsEnterprise:xgqy_nyse_socialCode')
social_code = '91330000747735638J' # social_code = '91330000747735638J'
if not social_code or social_code == 'None': if not social_code or social_code == 'None':
time.sleep(20) time.sleep(20)
data = baseCore.getInfomation(social_code) data = baseCore.getInfomation(social_code)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论