提交 caa3a936 作者: LiuLiYuan

Merge remote-tracking branch 'origin/master'

# Conflicts:
#	base/BaseCore.py
......@@ -15,15 +15,15 @@ from selenium.webdriver.chrome.service import Service
from openpyxl import Workbook
import langid
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore:
# 序列号
__seq = 0
# 代理池 数据库连接
__cnx_proxy = None
__cnx_proxy =None
__cursor_proxy = None
# agent 池
__USER_AGENT_LIST = [
......@@ -218,9 +218,8 @@ class BaseCore:
'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
]
# Android agent池
__USER_PHONE_AGENT_LIST = [
'Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36']
#Android agent池
__USER_PHONE_AGENT_LIST = ['Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36']
def __init__(self):
self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project',
......@@ -239,11 +238,12 @@ class BaseCore:
self.__cnx_proxy.close()
self.cursor.close()
self.cnx.close()
except:
except :
pass
# 计算耗时
def getTimeCost(self, start, end):
def getTimeCost(self,start, end):
seconds = int(end - start)
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
......@@ -256,7 +256,6 @@ class BaseCore:
else:
ms = int((end - start) * 1000)
return "%d毫秒" % (ms)
# 当前时间格式化
# 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
# 2 : 010101120000 %y%m%d%H%M%S
......@@ -286,7 +285,7 @@ class BaseCore:
return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
# 日志格式
def logFormate(self, record, handler):
def logFormate(self,record, handler):
formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
date=record.time, # 日志时间
level=record.level_name, # 日志等级
......@@ -296,9 +295,8 @@ class BaseCore:
msg=record.message # 日志内容
)
return formate
# 获取logger
def getLogger(self, fileLogFlag=True, stdOutFlag=True):
def getLogger(self,fileLogFlag=True, stdOutFlag=True):
dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
dirname = os.path.join(dirname, "logs")
filename = filename.replace(".py", "") + ".log"
......@@ -347,34 +345,34 @@ class BaseCore:
proxy_list.append(proxy)
return proxy_list[random.randint(0, 3)]
# 字符串截取
def getSubStr(self, str, beginStr, endStr):
if beginStr == '':
#字符串截取
def getSubStr(self,str,beginStr,endStr):
if beginStr=='':
pass
else:
begin = str.find(beginStr)
if begin == -1:
begin = 0
str = str[begin:]
if endStr == '':
begin=str.find(beginStr)
if begin==-1:
begin=0
str=str[begin:]
if endStr=='':
pass
else:
end = str.rfind(endStr)
if end == -1:
end=str.rfind(endStr)
if end==-1:
pass
else:
str = str[0:end + 1]
str = str[0:end+1]
return str
# 繁体字转简体字
def hant_2_hans(self, hant_str: str):
def hant_2_hans(self,hant_str: str):
'''
Function: 将 hant_str 由繁体转化为简体
'''
return zhconv.convert(hant_str, 'zh-hans')
# 判断字符串里是否含数字
def str_have_num(self, str_num):
def str_have_num(self,str_num):
panduan = False
for str_1 in str_num:
......@@ -394,7 +392,7 @@ class BaseCore:
# return gw_item.decode() if gw_item else None
# 从Redis的List中获取并移除一个元素
def redicPullData(self, key):
def redicPullData(self,key):
item = self.r.lpop(key)
return item.decode() if item else None
......@@ -417,7 +415,7 @@ class BaseCore:
chrome_options = webdriver.ChromeOptions()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_experimental_option(
"excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
......@@ -460,16 +458,16 @@ class BaseCore:
print(e)
self.cnx.commit()
# 获取企查查token
#获取企查查token
def GetToken(self):
# 获取企查查token
#获取企查查token
query = "select token from QCC_token "
# token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改
self.cursor.execute(query)
token = self.cursor.fetchone()[0]
return token
# 检测语言
#检测语言
def detect_language(self, text):
# 使用langid.py判断文本的语言
result = langid.classify(text)
......@@ -479,11 +477,11 @@ class BaseCore:
return 'cn'
return result[0]
# 追加接入excel
def writerToExcel(self, detailList, filename):
#追加接入excel
def writerToExcel(self,detailList,filename):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename, engine='openpyxl')
existing_data = pd.read_excel(filename,engine='openpyxl')
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
......@@ -492,6 +490,8 @@ class BaseCore:
combined_data.to_excel(filename, index=False)
# return combined_data
# 对失败或者断掉的企业 重新放入redis
def rePutIntoR(self, item):
#对失败或者断掉的企业 重新放入redis
def rePutIntoR(self,item):
self.r.rpush('NewsEnterprise:gwqy_socialCode', item)
import pandas as pd
def writeaa():
detailList=[]
aa={
'id':3,
'name':'qqqwe'
}
detailList.append(aa)
writerToExcel(detailList)
# def writeaa():
# detailList=[]
# aa={
# 'id':3,
# 'name':'qqqwe'
# }
# detailList.append(aa)
# writerToExcel(detailList)
# 将数据追加到excel
def writerToExcel(detailList):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl')
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
# def writerToExcel(detailList):
# # filename='baidu搜索.xlsx'
# # 读取已存在的xlsx文件
# existing_data = pd.read_excel(filename,engine='openpyxl')
# # 创建新的数据
# new_data = pd.DataFrame(data=detailList)
# # 将新数据添加到现有数据的末尾
# combined_data = existing_data.append(new_data, ignore_index=True)
# # 将结果写入到xlsx文件
# combined_data.to_excel(filename, index=False)
#
# from openpyxl import Workbook
#
# if __name__ == '__main__':
# filename='test1.xlsx'
# # # 创建一个工作簿
# workbook = Workbook(filename)
# workbook.save(filename)
# writeaa()
from openpyxl import Workbook
if __name__ == '__main__':
filename='test1.xlsx'
# # 创建一个工作簿
workbook = Workbook(filename)
workbook.save(filename)
writeaa()
gpdm = '01109.HK'
if 'HK' in str(gpdm):
tmp_g = str(gpdm).split('.')[0]
if len(tmp_g) == 5:
gpdm = str(gpdm)[1:]
print(gpdm)
else:
pass
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论