提交 faf5ca56 作者: 薛凌堃

中国证券报·中证网

上级 841ed4b6
......@@ -51,7 +51,7 @@ def zzcx():
driver = create_driver()
driver.maximize_window()
url = 'https://zzcx.cs.com.cn/dist/publishManuscript/listES'
payload = {"pageNo": 1, "pageSize": 15, "statusList": [0], "keyword": ""}
# payload = {"pageNo": 1, "pageSize": 15, "statusList": [0], "keyword": ""}
headers = {
'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
......@@ -69,10 +69,11 @@ def zzcx():
'Origin': 'https://zzcx.cs.com.cn',
'Referer': 'https://zzcx.cs.com.cn/app/zzb/list?spm=0.0.0.0.wjnSUZ'
}
payload = json.dumps(payload)
result_json = requests.post(url=url, data=payload, headers=headers).json()
print(result_json)
pages = result_json['data']['pages']
# payload = json.dumps(payload)
# result_json = requests.post(url=url, data=payload, headers=headers).json()
# print(result_json)
# pages = result_json['data']['pages']
pages = 5
for page in range(1, int(pages) + 1):
payload_page = {"pageNo": page, "pageSize": 15, "statusList": [0], "keyword": ""}
payload_page = json.dumps(payload_page)
......@@ -81,6 +82,7 @@ def zzcx():
for news in records:
title = news['title']
news_url = 'https://zzcx.cs.com.cn/app/zzb/detail?id=' + news['manuscriptId']
# news_url = 'https://zzcx.cs.com.cn/app/zzb/detail?id=3ec65751b63e40d7813a0c6bbe9b3135'
try:
flag = r.sismember('IN-20240129-0001', news_url)
......@@ -96,6 +98,7 @@ def zzcx():
div_photo = driver.find_elements(By.ID, 'line')
for png_ in div_photo:
try:
div = png_.find_element(By.XPATH, './/div/div[1]/div')
# div = png_.find_element(By.CLASS_NAME, 'ant-col ant-col-17')
# todo:滚轮需要滑动
......@@ -122,6 +125,8 @@ def zzcx():
"var newElement = document.createElement('img'); newElement.src = 'http://zzsn.luyuen.com" + path + "'; arguments[0].insertBefore(newElement, arguments[0].firstChild);",
append_tag)
os.remove(file_path)
except:
continue
# div_undefined_line = driver.find_elements(By.ID, 'k-line-undefined')
div_undefined_line = driver.find_elements(By.ID, 'KLineSubscription')
......@@ -235,6 +240,7 @@ def zzcx():
log.info(e)
finally:
producer.close()
if __name__ == "__main__":
pathType = 'PhotoDingzhi/'
r = redis.Redis(host='114.115.236.206', port=6379, password='clbzzsn', db=5)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论