提交 864508c6 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

......@@ -27,7 +27,7 @@ headers = {
'Cache-Control': 'no-cache',
'Pragma': 'no-cache'
}
taskType = '企业动态/新浪财经'
taskType = '企业动态/新浪财经/国内'
pattern = r"\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}"
# 获取响应页面
......
......@@ -28,7 +28,7 @@ headers = {
'Cache-Control': 'no-cache',
'Pragma': 'no-cache'
}
taskType = '企业动态/新浪财经'
taskType = '企业动态/新浪财经/香港'
# 判断时间是否是正确格式
......@@ -51,7 +51,7 @@ def format_time(time_str):
def getrequests(url):
ip = baseCore.get_proxy()
req = requests.get(url, headers=headers,proxies=ip)
req.encoding = req.apparent_encoding
req.encoding = 'gbk'
soup = BeautifulSoup(req.text, 'html.parser')
return soup
......@@ -117,7 +117,7 @@ def getDic(social_code, title, href, pub_time):
# state = 0
# takeTime = baseCore.getTimeCost(start_time, time.time())
# baseCore.recordLog(social_code, taskType, state, takeTime, href, f'{href}===发送Kafka失败')
# return 1
return 1
# 数据发送至Kafka
......@@ -165,10 +165,10 @@ def selectUrl(url, social_code):
def doJob():
# while True:
while True:
start_time = time.time()
# social_code = baseCore.redicPullData('NewsEnterprise:xgqy_nyse_socialCode')
social_code = '91330000747735638J'
social_code = baseCore.redicPullData('NewsEnterprise:xgqy_nyse_socialCode')
# social_code = '91330000747735638J'
if not social_code or social_code == 'None':
time.sleep(20)
data = baseCore.getInfomation(social_code)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论