提交 f4dce399 作者: LiuLiYuan

外内企业动态自动化

上级 cfbb60b7
...@@ -9,7 +9,6 @@ import logbook.more ...@@ -9,7 +9,6 @@ import logbook.more
import pymysql import pymysql
from selenium import webdriver from selenium import webdriver
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
...@@ -18,7 +17,7 @@ class BaseCore: ...@@ -18,7 +17,7 @@ class BaseCore:
# 序列号 # 序列号
__seq = 0 __seq = 0
# 代理池 数据库连接 # 代理池 数据库连接
__cnx_proxy =None __cnx_proxy = None
__cursor_proxy = None __cursor_proxy = None
# 基本信息 数据库连接 # 基本信息 数据库连接
__cnx_infomation = None __cnx_infomation = None
...@@ -222,18 +221,20 @@ class BaseCore: ...@@ -222,18 +221,20 @@ class BaseCore:
self.__cnx_proxy.close() self.__cnx_proxy.close()
self.__cursor_infomation.close() self.__cursor_infomation.close()
self.__cnx_infomation.close() self.__cnx_infomation.close()
except : except:
pass pass
def __init__(self): def __init__(self):
self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project', self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project',
charset='utf8mb4') charset='utf8mb4')
self.__cursor_proxy= self.__cnx_proxy.cursor() self.__cursor_proxy = self.__cnx_proxy.cursor()
self.__cnx_infomation = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji', charset='utf8mb4') self.__cnx_infomation = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
charset='utf8mb4')
self.__cursor_infomation = self.__cnx_infomation.cursor() self.__cursor_infomation = self.__cnx_infomation.cursor()
pass pass
# 计算耗时 # 计算耗时
def getTimeCost(self,start, end): def getTimeCost(self, start, end):
seconds = int(end - start) seconds = int(end - start)
m, s = divmod(seconds, 60) m, s = divmod(seconds, 60)
h, m = divmod(m, 60) h, m = divmod(m, 60)
...@@ -246,6 +247,7 @@ class BaseCore: ...@@ -246,6 +247,7 @@ class BaseCore:
else: else:
ms = int((end - start) * 1000) ms = int((end - start) * 1000)
return "%d毫秒" % (ms) return "%d毫秒" % (ms)
# 当前时间格式化 # 当前时间格式化
# 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S # 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
# 2 : 010101120000 %y%m%d%H%M%S # 2 : 010101120000 %y%m%d%H%M%S
...@@ -275,7 +277,7 @@ class BaseCore: ...@@ -275,7 +277,7 @@ class BaseCore:
return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3) return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
# 日志格式 # 日志格式
def logFormate(self,record, handler): def logFormate(self, record, handler):
formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format( formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
date=record.time, # 日志时间 date=record.time, # 日志时间
level=record.level_name, # 日志等级 level=record.level_name, # 日志等级
...@@ -285,8 +287,9 @@ class BaseCore: ...@@ -285,8 +287,9 @@ class BaseCore:
msg=record.message # 日志内容 msg=record.message # 日志内容
) )
return formate return formate
# 获取logger # 获取logger
def getLogger(self,fileLogFlag=True, stdOutFlag=True): def getLogger(self, fileLogFlag=True, stdOutFlag=True):
dirname, filename = os.path.split(os.path.abspath(sys.argv[0])) dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
dirname = os.path.join(dirname, "logs") dirname = os.path.join(dirname, "logs")
filename = filename.replace(".py", "") + ".log" filename = filename.replace(".py", "") + ".log"
...@@ -335,23 +338,23 @@ class BaseCore: ...@@ -335,23 +338,23 @@ class BaseCore:
proxy_list.append(proxy) proxy_list.append(proxy)
return proxy_list[random.randint(0, 3)] return proxy_list[random.randint(0, 3)]
#字符串截取 # 字符串截取
def getSubStr(self,str,beginStr,endStr): def getSubStr(self, str, beginStr, endStr):
if beginStr=='': if beginStr == '':
pass pass
else: else:
begin=str.find(beginStr) begin = str.find(beginStr)
if begin==-1: if begin == -1:
begin=0 begin = 0
str=str[begin:] str = str[begin:]
if endStr=='': if endStr == '':
pass pass
else: else:
end=str.rfind(endStr) end = str.rfind(endStr)
if end==-1: if end == -1:
pass pass
else: else:
str = str[0:end+1] str = str[0:end + 1]
return str return str
# 获得脚本进程PID # 获得脚本进程PID
...@@ -364,11 +367,11 @@ class BaseCore: ...@@ -364,11 +367,11 @@ class BaseCore:
IP = socket.gethostbyname(socket.gethostname()) IP = socket.gethostbyname(socket.gethostname())
return IP return IP
# 生成模拟浏览器 必须传入值为googledriver位置信息 # 生成google模拟浏览器 必须传入值为googledriver位置信息
# headless用于决定是否为无头浏览器,初始默认为无头浏览器 # headless用于决定是否为无头浏览器,初始默认为无头浏览器
# 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集 # 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
# 无头浏览器用于后续对信息采集时不会有浏览器一直弹出, # 无头浏览器用于后续对信息采集时不会有浏览器一直弹出,
def buildDriver(self,path,headless=True): def buildDriver(self, path, headless=True):
service = Service(path) service = Service(path)
chrome_options = webdriver.ChromeOptions() chrome_options = webdriver.ChromeOptions()
if headless: if headless:
...@@ -381,25 +384,33 @@ class BaseCore: ...@@ -381,25 +384,33 @@ class BaseCore:
chrome_options.add_argument( chrome_options.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36') 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
driver = webdriver.Chrome(chrome_options=chrome_options, service=service) driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
return driver with open('./stealth.min.js') as f:
js = f.read()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
return driver
def getInfomation(self,social_code): # 根据社会信用代码获取企业信息
def getInfomation(self, social_code):
sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'" sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
self.__cursor_infomation.execute(sql) self.__cursor_infomation.execute(sql)
data = self.__cursor_infomation.fetchone() data = self.__cursor_infomation.fetchone()
return data return data
def updateRun(self,social_code,runType,count): # 更新企业采集次数
def updateRun(self, social_code, runType, count):
sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'" sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
self.__cursor_infomation.excute(sql_update) self.__cursor_infomation.excute(sql_update)
self.__cnx_infomation.commit() self.__cnx_infomation.commit()
def recordLog(self,xydm,taskType,state,takeTime,url,e): # 保存日志入库
def recordLog(self, xydm, taskType, state, takeTime, url, e):
createTime = self.getNowTime(1) createTime = self.getNowTime(1)
ip = self.getIP() ip = self.getIP()
pid = self.getPID() pid = self.getPID()
sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)" sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm,taskType,state,takeTime,url,createTime,ip,pid,e] values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
self.__cursor_infomation.excute(sql,values) self.__cursor_infomation.excute(sql, values)
self.__cnx_infomation.commit() self.__cnx_infomation.commit()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论