提交 faf5ca56 作者: 薛凌堃

中国证券报·中证网

上级 841ed4b6
...@@ -51,7 +51,7 @@ def zzcx(): ...@@ -51,7 +51,7 @@ def zzcx():
driver = create_driver() driver = create_driver()
driver.maximize_window() driver.maximize_window()
url = 'https://zzcx.cs.com.cn/dist/publishManuscript/listES' url = 'https://zzcx.cs.com.cn/dist/publishManuscript/listES'
payload = {"pageNo": 1, "pageSize": 15, "statusList": [0], "keyword": ""} # payload = {"pageNo": 1, "pageSize": 15, "statusList": [0], "keyword": ""}
headers = { headers = {
'Accept': 'application/json', 'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
...@@ -69,10 +69,11 @@ def zzcx(): ...@@ -69,10 +69,11 @@ def zzcx():
'Origin': 'https://zzcx.cs.com.cn', 'Origin': 'https://zzcx.cs.com.cn',
'Referer': 'https://zzcx.cs.com.cn/app/zzb/list?spm=0.0.0.0.wjnSUZ' 'Referer': 'https://zzcx.cs.com.cn/app/zzb/list?spm=0.0.0.0.wjnSUZ'
} }
payload = json.dumps(payload) # payload = json.dumps(payload)
result_json = requests.post(url=url, data=payload, headers=headers).json() # result_json = requests.post(url=url, data=payload, headers=headers).json()
print(result_json) # print(result_json)
pages = result_json['data']['pages'] # pages = result_json['data']['pages']
pages = 5
for page in range(1, int(pages) + 1): for page in range(1, int(pages) + 1):
payload_page = {"pageNo": page, "pageSize": 15, "statusList": [0], "keyword": ""} payload_page = {"pageNo": page, "pageSize": 15, "statusList": [0], "keyword": ""}
payload_page = json.dumps(payload_page) payload_page = json.dumps(payload_page)
...@@ -81,6 +82,7 @@ def zzcx(): ...@@ -81,6 +82,7 @@ def zzcx():
for news in records: for news in records:
title = news['title'] title = news['title']
news_url = 'https://zzcx.cs.com.cn/app/zzb/detail?id=' + news['manuscriptId'] news_url = 'https://zzcx.cs.com.cn/app/zzb/detail?id=' + news['manuscriptId']
# news_url = 'https://zzcx.cs.com.cn/app/zzb/detail?id=3ec65751b63e40d7813a0c6bbe9b3135'
try: try:
flag = r.sismember('IN-20240129-0001', news_url) flag = r.sismember('IN-20240129-0001', news_url)
...@@ -96,32 +98,35 @@ def zzcx(): ...@@ -96,32 +98,35 @@ def zzcx():
div_photo = driver.find_elements(By.ID, 'line') div_photo = driver.find_elements(By.ID, 'line')
for png_ in div_photo: for png_ in div_photo:
div = png_.find_element(By.XPATH, './/div/div[1]/div') try:
# div = png_.find_element(By.CLASS_NAME, 'ant-col ant-col-17') div = png_.find_element(By.XPATH, './/div/div[1]/div')
# todo:滚轮需要滑动 # div = png_.find_element(By.CLASS_NAME, 'ant-col ant-col-17')
driver.execute_script("arguments[0].scrollIntoView();", div) # todo:滚轮需要滑动
time.sleep(1) driver.execute_script("arguments[0].scrollIntoView();", div)
#todo:保存成临时文件 time.sleep(1)
temp_file =NamedTemporaryFile(delete=False, suffix=".png") #todo:保存成临时文件
temp_file.close() temp_file =NamedTemporaryFile(delete=False, suffix=".png")
div.screenshot(temp_file.name) temp_file.close()
file_path = temp_file.name div.screenshot(temp_file.name)
# todo:保存到obs链接及标签替换 file_path = temp_file.name
name = str(baseCore.getuuid()) # todo:保存到obs链接及标签替换
result = getOBSres(pathType, name, file_path) name = str(baseCore.getuuid())
path = result['body']['objectUrl'].split('.com')[1] result = getOBSres(pathType, name, file_path)
full_path = result['body']['objectUrl'] path = result['body']['objectUrl'].split('.com')[1]
full_path = result['body']['objectUrl']
#todo:替换标签 删除标签 #todo:替换标签 删除标签
dele_tag = png_.find_element(By.XPATH, './/div/div[1]//div') dele_tag = png_.find_element(By.XPATH, './/div/div[1]//div')
driver.execute_script("arguments[0].remove()", dele_tag) driver.execute_script("arguments[0].remove()", dele_tag)
#todo:将图片塞进去 新建一个new_tag #todo:将图片塞进去 新建一个new_tag
append_tag = png_.find_element(By.XPATH, './/div/div[1]') append_tag = png_.find_element(By.XPATH, './/div/div[1]')
driver.execute_script( driver.execute_script(
"var newElement = document.createElement('img'); newElement.src = 'http://zzsn.luyuen.com" + path + "'; arguments[0].insertBefore(newElement, arguments[0].firstChild);", "var newElement = document.createElement('img'); newElement.src = 'http://zzsn.luyuen.com" + path + "'; arguments[0].insertBefore(newElement, arguments[0].firstChild);",
append_tag) append_tag)
os.remove(file_path) os.remove(file_path)
except:
continue
# div_undefined_line = driver.find_elements(By.ID, 'k-line-undefined') # div_undefined_line = driver.find_elements(By.ID, 'k-line-undefined')
div_undefined_line = driver.find_elements(By.ID, 'KLineSubscription') div_undefined_line = driver.find_elements(By.ID, 'KLineSubscription')
...@@ -235,6 +240,7 @@ def zzcx(): ...@@ -235,6 +240,7 @@ def zzcx():
log.info(e) log.info(e)
finally: finally:
producer.close() producer.close()
if __name__ == "__main__": if __name__ == "__main__":
pathType = 'PhotoDingzhi/' pathType = 'PhotoDingzhi/'
r = redis.Redis(host='114.115.236.206', port=6379, password='clbzzsn', db=5) r = redis.Redis(host='114.115.236.206', port=6379, password='clbzzsn', db=5)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论