提交 01e8140c 作者: LiJunMing

9/19

上级 88707bfa
...@@ -26,7 +26,7 @@ from DBUtils.PooledDB import PooledDB ...@@ -26,7 +26,7 @@ from DBUtils.PooledDB import PooledDB
# sys.path.append('D://zzsn_spider//base//fdfs_client') # sys.path.append('D://zzsn_spider//base//fdfs_client')
from fdfs_client.client import get_tracker_conf, Fdfs_client from fdfs_client.client import get_tracker_conf, Fdfs_client
tracker_conf = get_tracker_conf('E:\\kkwork\\zzsn_spider\\base\\client.conf') tracker_conf = get_tracker_conf('D:\\kkwork\\zzsn_spider\\base\\client.conf')
client = Fdfs_client(tracker_conf) client = Fdfs_client(tracker_conf)
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
......
"""
"""
证监会公告采集,只能按照搜索企业来采,从上市库里拿企业数据,sys_enterprise_ipo_copy1
craw_state:已采集过表示为True,未采集表示为0,拿取数据表示为ing,解析失败表示为400
update_state:为1 表示需要更新,用来增量循环
如何统计出来该报告采到了没有,dt_error库统计失败的信息
"""
import json import json
import random import random
import re import re
......
...@@ -50,9 +50,8 @@ if __name__=="__main__": ...@@ -50,9 +50,8 @@ if __name__=="__main__":
opt.add_experimental_option("excludeSwitches", ["enable-automation"]) opt.add_experimental_option("excludeSwitches", ["enable-automation"])
opt.add_experimental_option('excludeSwitches', ['enable-logging']) opt.add_experimental_option('excludeSwitches', ['enable-logging'])
opt.add_experimental_option('useAutomationExtension', False) opt.add_experimental_option('useAutomationExtension', False)
# opt.binary_location =r'D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe' opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
# chromedriver = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe' chromedriver = r'D:/cmd100/chromedriver.exe'
chromedriver = r'D:/chrome/chromedriver.exe'
browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver) browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
url = "https://mp.weixin.qq.com/" url = "https://mp.weixin.qq.com/"
browser.get(url) browser.get(url)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论