提交 f4dce399 作者: LiuLiYuan

外内企业动态自动化

上级 cfbb60b7
......@@ -9,7 +9,6 @@ import logbook.more
import pymysql
from selenium import webdriver
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
from selenium.webdriver.chrome.service import Service
......@@ -18,7 +17,7 @@ class BaseCore:
# 序列号
__seq = 0
# 代理池 数据库连接
__cnx_proxy =None
__cnx_proxy = None
__cursor_proxy = None
# 基本信息 数据库连接
__cnx_infomation = None
......@@ -222,18 +221,20 @@ class BaseCore:
self.__cnx_proxy.close()
self.__cursor_infomation.close()
self.__cnx_infomation.close()
except :
except:
pass
def __init__(self):
self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project',
charset='utf8mb4')
self.__cursor_proxy= self.__cnx_proxy.cursor()
self.__cnx_infomation = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji', charset='utf8mb4')
self.__cursor_proxy = self.__cnx_proxy.cursor()
self.__cnx_infomation = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
charset='utf8mb4')
self.__cursor_infomation = self.__cnx_infomation.cursor()
pass
# 计算耗时
def getTimeCost(self,start, end):
def getTimeCost(self, start, end):
seconds = int(end - start)
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
......@@ -246,6 +247,7 @@ class BaseCore:
else:
ms = int((end - start) * 1000)
return "%d毫秒" % (ms)
# 当前时间格式化
# 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
# 2 : 010101120000 %y%m%d%H%M%S
......@@ -275,7 +277,7 @@ class BaseCore:
return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
# 日志格式
def logFormate(self,record, handler):
def logFormate(self, record, handler):
formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
date=record.time, # 日志时间
level=record.level_name, # 日志等级
......@@ -285,8 +287,9 @@ class BaseCore:
msg=record.message # 日志内容
)
return formate
# 获取logger
def getLogger(self,fileLogFlag=True, stdOutFlag=True):
def getLogger(self, fileLogFlag=True, stdOutFlag=True):
dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
dirname = os.path.join(dirname, "logs")
filename = filename.replace(".py", "") + ".log"
......@@ -335,23 +338,23 @@ class BaseCore:
proxy_list.append(proxy)
return proxy_list[random.randint(0, 3)]
#字符串截取
def getSubStr(self,str,beginStr,endStr):
if beginStr=='':
# 字符串截取
def getSubStr(self, str, beginStr, endStr):
if beginStr == '':
pass
else:
begin=str.find(beginStr)
if begin==-1:
begin=0
str=str[begin:]
if endStr=='':
begin = str.find(beginStr)
if begin == -1:
begin = 0
str = str[begin:]
if endStr == '':
pass
else:
end=str.rfind(endStr)
if end==-1:
end = str.rfind(endStr)
if end == -1:
pass
else:
str = str[0:end+1]
str = str[0:end + 1]
return str
# 获得脚本进程PID
......@@ -364,11 +367,11 @@ class BaseCore:
IP = socket.gethostbyname(socket.gethostname())
return IP
# 生成模拟浏览器 必须传入值为googledriver位置信息
# 生成google模拟浏览器 必须传入值为googledriver位置信息
# headless用于决定是否为无头浏览器,初始默认为无头浏览器
# 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
# 无头浏览器用于后续对信息采集时不会有浏览器一直弹出,
def buildDriver(self,path,headless=True):
def buildDriver(self, path, headless=True):
service = Service(path)
chrome_options = webdriver.ChromeOptions()
if headless:
......@@ -381,25 +384,33 @@ class BaseCore:
chrome_options.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
return driver
with open('./stealth.min.js') as f:
js = f.read()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
return driver
def getInfomation(self,social_code):
# 根据社会信用代码获取企业信息
def getInfomation(self, social_code):
sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
self.__cursor_infomation.execute(sql)
data = self.__cursor_infomation.fetchone()
return data
def updateRun(self,social_code,runType,count):
# 更新企业采集次数
def updateRun(self, social_code, runType, count):
sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
self.__cursor_infomation.excute(sql_update)
self.__cnx_infomation.commit()
def recordLog(self,xydm,taskType,state,takeTime,url,e):
# 保存日志入库
def recordLog(self, xydm, taskType, state, takeTime, url, e):
createTime = self.getNowTime(1)
ip = self.getIP()
pid = self.getPID()
sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm,taskType,state,takeTime,url,createTime,ip,pid,e]
self.__cursor_infomation.excute(sql,values)
self.__cnx_infomation.commit()
\ No newline at end of file
values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
self.__cursor_infomation.excute(sql, values)
self.__cnx_infomation.commit()
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论