提交 6146dfc0 作者: LiuLiYuan

Merge remote-tracking branch 'origin/master'

...@@ -15,6 +15,11 @@ from selenium.webdriver.chrome.service import Service ...@@ -15,6 +15,11 @@ from selenium.webdriver.chrome.service import Service
from openpyxl import Workbook from openpyxl import Workbook
import langid import langid
#创建连接池
import pymysql
from pymysql import connections
from DBUtils.PooledDB import PooledDB
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore: class BaseCore:
...@@ -233,6 +238,20 @@ class BaseCore: ...@@ -233,6 +238,20 @@ class BaseCore:
# 连接到Redis # 连接到Redis
self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6) self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
self.pool_caiji = PooledDB(
creator=pymysql,
maxconnections=5,
mincached=2,
maxcached=5,
blocking=True,
host='114.115.159.144',
port=3306,
user='root',
password='zzsn9988',
database='caiji',
charset='utf8mb4'
)
def close(self): def close(self):
try: try:
self.__cursor_proxy.close() self.__cursor_proxy.close()
...@@ -434,32 +453,66 @@ class BaseCore: ...@@ -434,32 +453,66 @@ class BaseCore:
# 根据社会信用代码获取企业信息 # 根据社会信用代码获取企业信息
def getInfomation(self, social_code): def getInfomation(self, social_code):
sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'" data = []
self.cursor.execute(sql) try:
data = self.cursor.fetchone() sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
# self.cursor.execute(sql)
# data = self.cursor.fetchone()
conn = self.pool_caiji.connection()
cursor = conn.cursor()
cursor.execute(sql)
data = cursor.fetchone()
data = list(data)
cursor.close()
conn.close()
except:
log = self.getLogger()
log.info('=========数据库操作失败========')
return data return data
# 更新企业采集次数 # 更新企业采集次数
def updateRun(self, social_code, runType, count): def updateRun(self, social_code, runType, count):
sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'" try:
self.cursor.execute(sql_update) sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
self.cnx.commit() # self.cursor.execute(sql_update)
# self.cnx.commit()
conn = self.pool_caiji.connection()
cursor = conn.cursor()
cursor.execute(sql_update)
conn.commit()
cursor.close()
conn.close()
except:
log = self.getLogger()
log.info('======更新数据库失败======')
# 保存日志入库 # 保存日志入库
def recordLog(self, xydm, taskType, state, takeTime, url, e): def recordLog(self, xydm, taskType, state, takeTime, url, e):
createTime = self.getNowTime(1)
ip = self.getIP()
pid = self.getPID()
sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
try: try:
self.cursor.execute(sql, values) createTime = self.getNowTime(1)
except Exception as e: ip = self.getIP()
print(e) pid = self.getPID()
self.cnx.commit() sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
# try:
# self.cursor.execute(sql, values)
# except Exception as e:
# print(e)
# self.cnx.commit()
cnn = self.pool_caiji.connection()
cursor = cnn.cursor()
cursor.execute(sql,values)
cnn.commit()
cursor.close()
cnn.close()
except:
log = self.getLogger()
log.info('======保存日志失败=====')
#获取企查查token #获取企查查token
def GetToken(self): def GetToken(self):
#获取企查查token #获取企查查token
query = "select token from QCC_token " query = "select token from QCC_token "
# token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改 # token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改
...@@ -491,8 +544,8 @@ class BaseCore: ...@@ -491,8 +544,8 @@ class BaseCore:
# return combined_data # return combined_data
#对失败或者断掉的企业 重新放入redis #对失败或者断掉的企业 重新放入redis
def rePutIntoR(self,item): def rePutIntoR(self,key,item):
self.r.rpush('NewsEnterprise:gwqy_socialCode', item) self.r.rpush(key, item)
#增加计数器的值并返回增加后的值 #增加计数器的值并返回增加后的值
def incrSet(self,key): def incrSet(self,key):
...@@ -518,3 +571,10 @@ class BaseCore: ...@@ -518,3 +571,10 @@ class BaseCore:
...@@ -7,14 +7,17 @@ from apscheduler.schedulers.blocking import BlockingScheduler ...@@ -7,14 +7,17 @@ from apscheduler.schedulers.blocking import BlockingScheduler
basecore = BaseCore.BaseCore() basecore = BaseCore.BaseCore()
log = basecore.getLogger() log = basecore.getLogger()
#144数据库
cnx = basecore.cnx
cursor = basecore.cursor
r = basecore.r r = basecore.r
#11数据库 def cnn11():
cnx_ = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4') #11数据库
cursor_ = cnx_.cursor() cnx_ = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
cursor_ = cnx_.cursor()
return cnx_,cursor_
def close11(cnx_,cursor_):
cnx_.close()
cursor_.close()
# # 连接到Redis # # 连接到Redis
# r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6) # r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
...@@ -37,7 +40,20 @@ cursor_ = cnx_.cursor() ...@@ -37,7 +40,20 @@ cursor_ = cnx_.cursor()
# return gn_social_list,gw_social_list # return gn_social_list,gw_social_list
#企业动态 #企业动态
#创建数据库连接
def connectSql():
cnx = basecore.cnx
cursor = basecore.cursor
return cnx,cursor
#关闭数据库连接
def closeSql(cnx,cursor):
cnx.close()
cursor.close()
def NewsEnterprise(): def NewsEnterprise():
cnx,cursor = connectSql()
# #获取国内企业 # #获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'" gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
cursor.execute(gn_query) cursor.execute(gn_query)
...@@ -51,26 +67,24 @@ def NewsEnterprise(): ...@@ -51,26 +67,24 @@ def NewsEnterprise():
#todo:打印长度 #todo:打印长度
# print(len(gw_social_list)) # print(len(gw_social_list))
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
#将数据插入到redis中 #将数据插入到redis中
# for item in gn_social_list: for item in gn_social_list:
# r.rpush('NewsEnterprise:gnqy_socialCode', item) r.rpush('NewsEnterprise:gnqy_socialCode', item)
# count = 0
for item in gw_social_list: for item in gw_social_list:
r.rpush('NewsEnterprise:gwqy_socialCode', item) r.rpush('NewsEnterprise:gwqy_socialCode', item)
# count+=1 closeSql(cnx,cursor)
# print(item)
# print(count)
#企业动态定时任务 #企业动态定时任务
def NewsEnterprise_task(): def NewsEnterprise_task():
# 实例化一个调度器 # 实例化一个调度器
scheduler = BlockingScheduler() scheduler = BlockingScheduler()
# 每天执行一次 # 每天执行一次
scheduler.add_job(NewsEnterprise, 'cron', hour=12,minute=0,max_instances=2) scheduler.add_job(NewsEnterprise, 'cron', hour=0,minute=0,max_instances=2)
try: try:
# redisPushData # 定时开始前执行一次 # redisPushData # 定时开始前执行一次
NewsEnterprise()
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
...@@ -78,14 +92,17 @@ def NewsEnterprise_task(): ...@@ -78,14 +92,17 @@ def NewsEnterprise_task():
#企业公告 #企业公告
def NoticeEnterprise(): def NoticeEnterprise():
cnx,cursor = connectSql()
# 获取国内企业 # 获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null limit 10 " gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null "
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('NoticeEnterprise:gnqy_socialCode', item) r.rpush('NoticeEnterprise:gnqy_socialCode', item)
closeSql(cnx,cursor)
#企业公告定时任务 #企业公告定时任务
def NoticeEnterprise_task(): def NoticeEnterprise_task():
# 实例化一个调度器 # 实例化一个调度器
...@@ -101,14 +118,16 @@ def NoticeEnterprise_task(): ...@@ -101,14 +118,16 @@ def NoticeEnterprise_task():
#企业年报 #企业年报
def AnnualEnterprise(): def AnnualEnterprise():
cnx,cursor = connectSql()
# 获取国内企业 # 获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null limit 10" gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null"
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('AnnualEnterprise:gnqy_socialCode', item) r.rpush('AnnualEnterprise:gnqy_socialCode', item)
closeSql(cnx,cursor)
#企业年报定时任务 #企业年报定时任务
def AnnualEnterprise_task(): def AnnualEnterprise_task():
...@@ -117,7 +136,8 @@ def AnnualEnterprise_task(): ...@@ -117,7 +136,8 @@ def AnnualEnterprise_task():
# 每年执行一次 # 每年执行一次
scheduler.add_job(AnnualEnterprise, 'cron', second='*/10') scheduler.add_job(AnnualEnterprise, 'cron', second='*/10')
try: try:
# redisPushData # 定时开始前执行一次 # 定时开始前执行一次
AnnualEnterprise()
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
...@@ -125,30 +145,36 @@ def AnnualEnterprise_task(): ...@@ -125,30 +145,36 @@ def AnnualEnterprise_task():
#企业基本信息 #企业基本信息
def BaseInfoEnterprise(): def BaseInfoEnterprise():
cnx,cursor = connectSql()
# 获取国内企业 # 获取国内企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1' limit 10 " gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('BaseInfoEnterprise:gnqy_socialCode', item) r.rpush('BaseInfoEnterprise:gnqy_socialCode', item)
closeSql(cnx,cursor)
#企业基本信息定时任务 #企业基本信息定时任务
def BaseInfoEnterprise_task(): def BaseInfoEnterprise_task():
# 实例化一个调度器 # 实例化一个调度器
scheduler = BlockingScheduler() scheduler = BlockingScheduler()
# 每年执行一次 # 每年执行一次
scheduler.add_job(BaseInfoEnterprise, 'cron', second='*/10') scheduler.add_job(BaseInfoEnterprise, 'cron', month=1, day=1, hour=0, minute=0)
try: try:
# redisPushData # 定时开始前执行一次 # 定时开始前执行一次
BaseInfoEnterprise()
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
pass pass
#企业核心人员
#东方财富网财务数据 #东方财富网财务数据
def FinanceFromEast(): def FinanceFromEast():
cnx_,cursor_ = cnn11()
#从上市企业库中读取数据 #从上市企业库中读取数据
sql_sel = '''select social_credit_code from sys_base_enterprise_ipo where category = '1' limit 10 ''' sql_sel = '''select social_credit_code from sys_base_enterprise_ipo where category = '1' limit 10 '''
cursor_.execute(sql_sel) cursor_.execute(sql_sel)
...@@ -157,14 +183,16 @@ def FinanceFromEast(): ...@@ -157,14 +183,16 @@ def FinanceFromEast():
print('=======') print('=======')
for item in finance_list: for item in finance_list:
r.rpush('FinanceFromEast:finance_socialCode', item) r.rpush('FinanceFromEast:finance_socialCode', item)
close11(cnx_,cursor_)
#东方财富网财务数据定时任务
def FinanceFromEase_task(): def FinanceFromEase_task():
# 实例化一个调度器 # 实例化一个调度器
scheduler = BlockingScheduler() scheduler = BlockingScheduler()
# 每个季度执行一次 # 每个季度执行一次
scheduler.add_job(FinanceFromEast, 'cron', month='1-12/3', day='1',hour=0, minute=0) scheduler.add_job(FinanceFromEast, 'cron', month='1-12/3', day='1',hour=0, minute=0)
try: try:
# redisPushData # 定时开始前执行一次 # 定时开始前执行一次
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
...@@ -172,29 +200,33 @@ def FinanceFromEase_task(): ...@@ -172,29 +200,33 @@ def FinanceFromEase_task():
#微信公众号 #微信公众号
def WeiXingetFromSql(): def WeiXingetFromSql():
cnx_,cursor_=cnn11()
selectSql = "SELECT info_source_code from info_source where site_uri like '%mp.weixin.qq.com%'" selectSql = "SELECT info_source_code from info_source where site_uri like '%mp.weixin.qq.com%'"
cursor.execute(selectSql) cursor_.execute(selectSql)
results = cursor.fetchall() results = cursor_.fetchall()
result_list = [item[0] for item in results] result_list = [item[0] for item in results]
#放入redis #放入redis
for item in result_list: for item in result_list:
r.rpush('WeiXinGZH:infoSourceCode', item) r.rpush('WeiXinGZH:infoSourceCode', item)
close11(cnx_,cursor_)
#微信公众号定时任务 #微信公众号定时任务
def weixin_task(): def weixin_task():
# 实例化一个调度器 # 实例化一个调度器
scheduler = BlockingScheduler() scheduler = BlockingScheduler()
# 每天执行一次 # 每天执行一次
scheduler.add_job(WeiXingetFromSql, 'cron', hour=12,minute=0) scheduler.add_job(WeiXingetFromSql, 'cron', hour=0,minute=0)
try: try:
# redisPushData # 定时开始前执行一次 # 定时开始前执行一次
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
pass pass
# 企业年报——雪球网 # 企业年报——雪球网
def AnnualEnterpriseIPO(): def AnnualEnterpriseXueQ():
cnx,cursor = connectSql()
# 获取国内上市企业 # 获取国内上市企业
gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null and isIPO = 1 limit 10" gn_query = "select SocialCode from EnterpriseInfo where Place = '1' and SecuritiesCode is not null and isIPO = 1 limit 10"
cursor.execute(gn_query) cursor.execute(gn_query)
...@@ -203,19 +235,50 @@ def AnnualEnterpriseIPO(): ...@@ -203,19 +235,50 @@ def AnnualEnterpriseIPO():
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('AnnualEnterprise:gnshqy_socialCode', item) r.rpush('AnnualEnterprise:gnshqy_socialCode', item)
closeSql(cnx,cursor)
#雪球网年报定时任务
def AnnualEnterpriseXueQ_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每年执行一次
scheduler.add_job(AnnualEnterpriseXueQ, 'cron', month=1, day=1, hour=0, minute=0)
try:
# 定时开始前执行一次
AnnualEnterpriseXueQ()
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
#国外企业基本信息 #国外企业基本信息
def BaseInfoEnterpriseAbroad(): def BaseInfoEnterpriseAbroad():
cnx,cursor = connectSql()
# 获取国外企业 # 获取国外企业
gn_query = "select id from EnterpriseInfo where Place = '2' limit 10 " gn_query = "select SocialCode from EnterpriseInfo where Place = '2' "
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('BaseInfoEnterprise:gwqy_socialCode', item) r.rpush('BaseInfoEnterprise:gwqy_socialCode', item)
closeSql(cnx,cursor)
#国外基本信息定时任务
def BaseInfoAbroad_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每个月执行一次
scheduler.add_job(BaseInfoEnterpriseAbroad, 'cron', day=1,hour=0, minute=0)
try:
# redisPushData # 定时开始前执行一次
BaseInfoEnterpriseAbroad()
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
##福布斯=====从数据库中读取信息放入redis #福布斯=====从数据库中读取信息放入redis
def FBS(): def FBS():
cnx,cursor = connectSql()
# todo:调整为获取福布斯的数据库 # todo:调整为获取福布斯的数据库
# gw_query = "select id from EnterpriseInfo where ext1='fbs2000' and ext2='1' and Place=2" # gw_query = "select id from EnterpriseInfo where ext1='fbs2000' and ext2='1' and Place=2"
# cursor.execute(gw_query) # cursor.execute(gw_query)
...@@ -228,13 +291,45 @@ def FBS(): ...@@ -228,13 +291,45 @@ def FBS():
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
# gw_social_list = [item[0] for item in gw_result] # gw_social_list = [item[0] for item in gw_result]
#
# for item in gw_social_list: # for item in gw_social_list:
# r.rpush('NewsEnterpriseFbs:gwqy_socialCode', item) # r.rpush('NewsEnterpriseFbs:gwqy_socialCode', item)
for item in gn_social_list: for item in gn_social_list:
if not r.exists(item): if not r.exists(item):
r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item) r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
closeSql(cnx,cursor)
#将IPO的国外股票代码放到redis中
def yahooCodeFromSql():
cnx_,cursor_=cnn11()
try:
gn_query = "select securities_code from sys_base_enterprise_ipo where category in ('4','5','6') "
cursor_.execute(gn_query)
gn_result = cursor_.fetchall()
gn_social_list = [item[0] for item in gn_result]
print('=======')
for item in gn_social_list:
r.rpush('NoticeEnterprise:securities_code', item)
except Exception as e:
log.info("数据查询异常")
finally:
close11(cnx_,cursor_)
#雅虎财务数据3天一次
def yahooCode_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每天执行一次
# scheduler.add_job(yahooCodeFromSql, 'cron', hour=0,minute=0)
#3天执行一次
scheduler.add_job(yahooCodeFromSql, 'interval', days=3)
try:
yahooCodeFromSql() # 定时开始前执行一次
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
if __name__ == "__main__": if __name__ == "__main__":
...@@ -242,7 +337,7 @@ if __name__ == "__main__": ...@@ -242,7 +337,7 @@ if __name__ == "__main__":
# NoticeEnterprise() # NoticeEnterprise()
# AnnualEnterpriseIPO() # AnnualEnterpriseIPO()
# AnnualEnterprise() # AnnualEnterprise()
# BaseInfoEnterpriseAbroad() BaseInfoEnterpriseAbroad()
# NewsEnterprise_task() # NewsEnterprise_task()
# NewsEnterprise() # NewsEnterprise()
# BaseInfoEnterprise() # BaseInfoEnterprise()
...@@ -250,8 +345,6 @@ if __name__ == "__main__": ...@@ -250,8 +345,6 @@ if __name__ == "__main__":
# NoticeEnterprise_task() # NoticeEnterprise_task()
# AnnualEnterprise_task() # AnnualEnterprise_task()
# NoticeEnterprise() # NoticeEnterprise()
FinanceFromEast() # FinanceFromEast()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===') log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
cnx_.close()
cursor_.close()
# basecore.close()
# 核心工具包
import os
import random
import socket
import sys
import time
import logbook
import logbook.more
import pandas as pd
import zhconv
import pymysql
import redis
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from openpyxl import Workbook
import langid
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore:
# 序列号
__seq = 0
# 代理池 数据库连接
__cnx_proxy =None
__cursor_proxy = None
# agent 池
__USER_AGENT_LIST = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Lunascape 5.0 alpha2)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.7 Safari/532.2',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon;',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.169.0 Safari/530.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040614 Firefox/0.9',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.500.0 Safari/534.6',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; TencentTraveler)',
'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.4 (KHTML, like Gecko) Chrome/6.0.481.0 Safari/534.4',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.370.0 Safari/533.4',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.31 Safari/525.19',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.42 Safari/525.19',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.3 (KHTML, like Gecko) Chrome/4.0.227.0 Safari/532.3',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.463.0 Safari/534.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9',
'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24',
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1',
'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.4 Safari/532.2',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.9 (KHTML, like Gecko) Chrome/7.0.531.0 Safari/534.9',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1',
'ozilla/5.0 (Windows; U; Windows NT 5.0; de-DE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.127 Safari/533.4',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; zh-cn) Opera 8.50',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/7.0.0 Safari/700.13',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/0.9.2 StumbleUpon/1.994',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) Gecko/20041110 Firefox/1.0',
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.1 (KHTML, like Gecko) Chrome/2.0.168.0 Safari/530.1',
'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.8 (KHTML, like Gecko) Chrome/2.0.177.1 Safari/530.8',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2',
'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.0; rv:1.7.3) Gecko/20041001 Firefox/0.10.1',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2',
'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
'Mozilla/5.0 (Windows NT 5.1; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.548.0 Safari/534.10',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10 ChromePlus/1.5.2.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.0 Safari/532.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.2 Safari/533.2',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.4 Safari/532.1',
'Mozilla/5.0 (Windows NT 6.0; rv:2.1.1) Gecko/20110415 Firefox/4.0.2pre Fennec/4.0.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; sv-SE; rv:1.7.5) Gecko/20041108 Firefox/1.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.7.5) Gecko/20041122 Firefox/1.0',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; uZardWeb/1.0; Server_JP)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HCI0449; .NET CLR 1.0.3705)',
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt); Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1);',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.23 Safari/530.5',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
'Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0a1) Gecko/20110623 Firefox/7.0a1 Fennec/7.0a1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; WOW64; SV1; uZardWeb/1.0; Server_HK)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
'Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.2 (KHTML, like Gecko) Chrome/3.0.191.3 Safari/531.2',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.38 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8b) Gecko/20050118 Firefox/1.0+',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP; rv:1.7) Gecko/20040707 Firefox/0.9.2',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.171.0 Safari/530.4',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; nl-NL; rv:1.7.5) Gecko/20041202 Firefox/1.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/1.0.156.0 Safari/528.8',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 1.1.4322)',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.43 Safari/534.7',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13',
'Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6',
'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.6 Safari/525.19',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.8 (KHTML, like Gecko) Chrome/7.0.521.0 Safari/534.8',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b2pre) Gecko/20081015 Fennec/1.0a1',
'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'
]
#Android agent池
__USER_PHONE_AGENT_LIST = ['Mozilla/5.0 (Linux; Android 7.1.1; OPPO R9sk) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36']
def __init__(self):
self.__cnx_proxy = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='clb_project',
charset='utf8mb4')
self.__cursor_proxy = self.__cnx_proxy.cursor()
self.cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
charset='utf8mb4')
self.cursor = self.cnx.cursor()
# 连接到Redis
self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
def close(self):
try:
self.__cursor_proxy.close()
self.__cnx_proxy.close()
self.cursor.close()
self.cnx.close()
except :
pass
# 计算耗时
def getTimeCost(self,start, end):
seconds = int(end - start)
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
if (h > 0):
return "%d小时%d分钟%d秒" % (h, m, s)
elif (m > 0):
return "%d分钟%d秒" % (m, s)
elif (seconds > 0):
return "%d秒" % (s)
else:
ms = int((end - start) * 1000)
return "%d毫秒" % (ms)
# 当前时间格式化
# 1 : 2001-01-01 12:00:00 %Y-%m-%d %H:%M:%S
# 2 : 010101120000 %y%m%d%H%M%S
# 时间戳 3:1690179526555 精确到秒
def getNowTime(self, type):
now_time = ""
if type == 1:
now_time = time.strftime("%Y-%m-%d %H:%M:%S")
if type == 2:
now_time = time.strftime("%y%m%d%H%M%S")
if type == 3:
now_time = int(time.time() * 1000)
return now_time
# 获取流水号
def getNextSeq(self):
self.__seq += 1
if self.__seq > 1000:
self.__seq = 0
return self.getNowTime(2) + str(self.__seq).zfill(3)
# 获取信用代码
def getNextXydm(self):
self.__seq += 1
if self.__seq > 1000:
self.__seq = 0
return "ZZSN" + self.getNowTime(2) + str(self.__seq).zfill(3)
# 日志格式
def logFormate(self,record, handler):
formate = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
date=record.time, # 日志时间
level=record.level_name, # 日志等级
filename=os.path.split(record.filename)[-1], # 文件名
func_name=record.func_name, # 函数名
lineno=record.lineno, # 行号
msg=record.message # 日志内容
)
return formate
# 获取logger
def getLogger(self,fileLogFlag=True, stdOutFlag=True):
dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
dirname = os.path.join(dirname, "logs")
filename = filename.replace(".py", "") + ".log"
if not os.path.exists(dirname):
os.mkdir(dirname)
logbook.set_datetime_format('local')
logger = logbook.Logger(filename)
logger.handlers = []
if fileLogFlag: # 日志输出到文件
logFile = logbook.TimedRotatingFileHandler(os.path.join(dirname, filename), date_format='%Y-%m-%d',
bubble=True, encoding='utf-8')
logFile.formatter = self.logFormate
logger.handlers.append(logFile)
if stdOutFlag: # 日志打印到屏幕
logStd = logbook.more.ColorizedStderrHandler(bubble=True)
logStd.formatter = self.logFormate
logger.handlers.append(logStd)
return logger
# 获取随机的userAgent
def getRandomUserAgent(self):
return random.choice(self.__USER_AGENT_LIST)
# 获取代理
def get_proxy(self):
sql = "select proxy from clb_proxy"
self.__cursor_proxy.execute(sql)
proxy_lists = self.__cursor_proxy.fetchall()
ip_list = []
for proxy_ in proxy_lists:
ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
proxy_list = []
for str_ip in ip_list:
str_ip_list = str_ip.split('-')
proxyMeta = "http://%(host)s:%(port)s" % {
"host": str_ip_list[0],
"port": str_ip_list[1],
}
proxy = {
"HTTP": proxyMeta,
"HTTPS": proxyMeta
}
proxy_list.append(proxy)
return proxy_list[random.randint(0, 3)]
#字符串截取
def getSubStr(self,str,beginStr,endStr):
if beginStr=='':
pass
else:
begin=str.find(beginStr)
if begin==-1:
begin=0
str=str[begin:]
if endStr=='':
pass
else:
end=str.rfind(endStr)
if end==-1:
pass
else:
str = str[0:end+1]
return str
# 繁体字转简体字
def hant_2_hans(self,hant_str: str):
'''
Function: 将 hant_str 由繁体转化为简体
'''
return zhconv.convert(hant_str, 'zh-hans')
# 判断字符串里是否含数字
def str_have_num(self,str_num):
panduan = False
for str_1 in str_num:
ppp = str_1.isdigit()
if ppp:
panduan = ppp
return panduan
# # 从Redis的List中获取并移除一个元素
# def redicPullData(self,type,key):
# #1 表示国内 2 表示国外
# if type == 1:
# gn_item = self.r.lpop(key)
# return gn_item.decode() if gn_item else None
# if type == 2:
# gw_item = self.r.lpop(key)
# return gw_item.decode() if gw_item else None
# 从Redis的List中获取并移除一个元素
def redicPullData(self,key):
item = self.r.rpop(key)
return item.decode() if item else None
# 获得脚本进程PID
def getPID(self):
PID = os.getpid()
return PID
# 获取本机IP
def getIP(self):
IP = socket.gethostbyname(socket.gethostname())
return IP
# 生成google模拟浏览器 必须传入值为googledriver位置信息
# headless用于决定是否为无头浏览器,初始默认为无头浏览器
# 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
# 无头浏览器用于后续对信息采集时不会有浏览器一直弹出,
def buildDriver(self, path, headless=True):
service = Service(path)
chrome_options = webdriver.ChromeOptions()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_experimental_option(
"excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('lang=zh-CN,zh,zh-TW,en-US,en')
chrome_options.add_argument('user-agent=' + self.getRandomUserAgent())
# 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
# with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
# js = f.read()
#
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
# "source": js
# })
return driver
# 根据社会信用代码获取企业信息
def getInfomation(self, social_code):
sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
self.cursor.execute(sql)
data = self.cursor.fetchone()
return data
# 更新企业采集次数
def updateRun(self, social_code, runType, count):
sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
self.cursor.execute(sql_update)
self.cnx.commit()
# 保存日志入库
def recordLog(self, xydm, taskType, state, takeTime, url, e):
createTime = self.getNowTime(1)
ip = self.getIP()
pid = self.getPID()
sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
try:
self.cursor.execute(sql, values)
except Exception as e:
print(e)
self.cnx.commit()
#获取企查查token
def GetToken(self):
#获取企查查token
query = "select token from QCC_token "
# token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改
self.cursor.execute(query)
token = self.cursor.fetchone()[0]
return token
#检测语言
def detect_language(self, text):
# 使用langid.py判断文本的语言
result = langid.classify(text)
if result == '':
return 'cn'
if result[0] == '':
return 'cn'
return result[0]
#追加接入excel
def writerToExcel(self,detailList,filename):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename,engine='openpyxl')
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
# return combined_data
#对失败或者断掉的企业 重新放入redis
def rePutIntoR(self,item):
self.r.rpush('NewsEnterprise:gwqy_socialCode', item)
#增加计数器的值并返回增加后的值
def incrSet(self,key):
# 增加计数器的值并返回增加后的值
new_value = self.r.incr(key)
print("增加后的值:", new_value)
return new_value
#获取key剩余的过期时间
def getttl(self,key):
# 获取key的剩余过期时间
ttl = self.r.ttl(key)
print("剩余过期时间:", ttl)
# 判断key是否已过期
if ttl < 0:
# key已过期,将key的值重置为0
self.r.set(key, 0)
self.r.expire(key, 3600)
time.sleep(2)
import time
from base import BaseCore
from apscheduler.schedulers.blocking import BlockingScheduler
import pymysql
basecore = BaseCore.BaseCore()
log = basecore.getLogger()
r = basecore.r
def conn11():
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
return conn,cursor
#企业公告
def yahooCodeFromSql():
conn,cursor=conn11()
try:
gn_query = "select securities_code from sys_base_enterprise_ipo where category in ('4','5','6') "
cursor.execute(gn_query)
gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result]
print('=======')
for item in gn_social_list:
r.rpush('NoticeEnterprise:securities_code', item)
except Exception as e:
log.info("数据查询异常")
finally:
cursor.close()
conn.close()
def yahooCode_task():
# 实例化一个调度器
scheduler = BlockingScheduler()
# 每天执行一次
# scheduler.add_job(yahooCodeFromSql, 'cron', hour=0,minute=0)
#3天执行一次
scheduler.add_job(yahooCodeFromSql, 'interval', days=3)
try:
yahooCodeFromSql() # 定时开始前执行一次
scheduler.start()
except Exception as e:
print('定时采集异常', e)
pass
if __name__ == "__main__":
start = time.time()
# NoticeEnterprise()
# AnnualEnterpriseIPO()
# AnnualEnterprise()
# BaseInfoEnterpriseAbroad()
# NewsEnterprise_task()
# NewsEnterprise()
# BaseInfoEnterprise()
# FBS()
# NoticeEnterprise_task()
# AnnualEnterprise_task()
# NoticeEnterprise()
yahooCode_task()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
# cnx.close()
# cursor.close()
# basecore.close()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
...@@ -16,6 +16,9 @@ urllib3.disable_warnings() ...@@ -16,6 +16,9 @@ urllib3.disable_warnings()
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
import re import re
from BaseCore import BaseCore
import configparser
import redis
""" """
将请求链接分成三个 将请求链接分成三个
https://finance.yahoo.com/quote/WMT/financials?p=WMT https://finance.yahoo.com/quote/WMT/financials?p=WMT
...@@ -38,311 +41,334 @@ create_time ...@@ -38,311 +41,334 @@ create_time
""" """
class YahooCaiwu(object):
def __init__(self):
self.config = configparser.ConfigParser()
# 读取配置文件
self.config.read('config.ini')
baseCore=BaseCore()
self.logger=baseCore.getLogger()
self.r = redis.Redis(host=self.config.get('redis', 'host'),
port=self.config.get('redis', 'port'),
password=self.config.get('redis', 'pass'), db=6)
self.driver=self.get_webdriver()
# 雅虎财经处理表格 # 雅虎财经处理表格
def deal_table(doc_resp): def deal_table(self,doc_resp):
all_dict = {} all_dict = {}
resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(3)>div>div').children() resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(3)>div>div').children()
catalogue_title = pq(resp1_table[0]).text().split('\n')
doc_items = pq(resp1_table[1]).children()
if len(doc_items)<1:
resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(4)>div>div').children()
catalogue_title = pq(resp1_table[0]).text().split('\n') catalogue_title = pq(resp1_table[0]).text().split('\n')
doc_items = pq(resp1_table[1]).children() doc_items = pq(resp1_table[1]).children()
catalogue_dict = {} if len(doc_items)<1:
content_dict = {} resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(4)>div>div').children()
for doc_item in doc_items: catalogue_title = pq(resp1_table[0]).text().split('\n')
if pq(doc_item).text() == '': doc_items = pq(resp1_table[1]).children()
continue catalogue_dict = {}
a = pq(pq(doc_item).children()[0]).text().split('\n')[0] content_dict = {}
a_list = pq(pq(doc_item).children()[0]).text().split('\n')[1:] for doc_item in doc_items:
content_dict[a] = a_list if pq(doc_item).text() == '':
b_dict = {}
for doc_item1 in pq(doc_item).children()[1]:
b = pq(pq(doc_item1).children()[0]).text().split('\n')[0]
if not b:
continue continue
b_list = pq(pq(doc_item1).children()[0]).text().split('\n')[1:] a = pq(pq(doc_item).children()[0]).text().split('\n')[0]
content_dict[b] = b_list a_list = pq(pq(doc_item).children()[0]).text().split('\n')[1:]
c_dict = {} content_dict[a] = a_list
for doc_item2 in pq(doc_item1).children()[1]: b_dict = {}
c = pq(pq(doc_item2).children()[0]).text().split('\n')[0] for doc_item1 in pq(doc_item).children()[1]:
if not c: b = pq(pq(doc_item1).children()[0]).text().split('\n')[0]
if not b:
continue continue
c_list = pq(pq(doc_item2).children()[0]).text().split('\n')[1:] b_list = pq(pq(doc_item1).children()[0]).text().split('\n')[1:]
content_dict[c] = c_list content_dict[b] = b_list
d_dict = {} c_dict = {}
for doc_item3 in pq(doc_item2).children()[1]: for doc_item2 in pq(doc_item1).children()[1]:
d = pq(pq(doc_item3).children()[0]).text().split('\n')[0] c = pq(pq(doc_item2).children()[0]).text().split('\n')[0]
if not d: if not c:
continue continue
d_list = pq(pq(doc_item3).children()[0]).text().split('\n')[1:] c_list = pq(pq(doc_item2).children()[0]).text().split('\n')[1:]
content_dict[d] = d_list content_dict[c] = c_list
e_dict = {} d_dict = {}
for doc_item4 in pq(doc_item3).children()[1]: for doc_item3 in pq(doc_item2).children()[1]:
e = pq(pq(doc_item4).children()[0]).text().split('\n')[0] d = pq(pq(doc_item3).children()[0]).text().split('\n')[0]
if not e: if not d:
continue continue
e_list = pq(pq(doc_item4).children()[0]).text().split('\n')[1:] d_list = pq(pq(doc_item3).children()[0]).text().split('\n')[1:]
content_dict[e] = e_list content_dict[d] = d_list
f_dict = {} e_dict = {}
for doc_item5 in pq(doc_item4).children()[1]: for doc_item4 in pq(doc_item3).children()[1]:
f = pq(pq(doc_item5).children()[0]).text().split('\n')[0] e = pq(pq(doc_item4).children()[0]).text().split('\n')[0]
if not f: if not e:
continue continue
f_list = pq(pq(doc_item5).children()[0]).text().split('\n')[1:] e_list = pq(pq(doc_item4).children()[0]).text().split('\n')[1:]
content_dict[f] = f_list content_dict[e] = e_list
g_dict = {} f_dict = {}
for doc_item6 in pq(doc_item5).children()[1]: for doc_item5 in pq(doc_item4).children()[1]:
g = pq(pq(doc_item6).children()[0]).text().split('\n')[0] f = pq(pq(doc_item5).children()[0]).text().split('\n')[0]
if not g: if not f:
continue continue
g_list = pq(pq(doc_item6).children()[0]).text().split('\n')[1:] f_list = pq(pq(doc_item5).children()[0]).text().split('\n')[1:]
content_dict[g] = g_list content_dict[f] = f_list
g_dict[g] = {} g_dict = {}
f_dict[f] = g_dict for doc_item6 in pq(doc_item5).children()[1]:
e_dict[e] = f_dict g = pq(pq(doc_item6).children()[0]).text().split('\n')[0]
d_dict[d] = e_dict if not g:
c_dict[c] = d_dict continue
b_dict[b] = c_dict g_list = pq(pq(doc_item6).children()[0]).text().split('\n')[1:]
catalogue_dict[a] = b_dict content_dict[g] = g_list
all_dict['表头'] = catalogue_title g_dict[g] = {}
all_dict['目录'] = catalogue_dict f_dict[f] = g_dict
all_dict['内容'] = content_dict e_dict[e] = f_dict
return all_dict d_dict[d] = e_dict
c_dict[c] = d_dict
b_dict[b] = c_dict
catalogue_dict[a] = b_dict
all_dict['表头'] = catalogue_title
all_dict['目录'] = catalogue_dict
all_dict['内容'] = content_dict
return all_dict
def get_webdriver(): def get_webdriver(self):
chrome_options = webdriver.ChromeOptions() chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--start-maximized")
chrome_options.binary_location = r"D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe" # chrome_options.add_argument('--headless')
# chrome_options.add_argument('--headless') chrome_options.binary_location = self.config.get('selenium', 'binary_location')
executable_path = r'D:\crawler\baidu_crawler\baidu_crawler1\cmd100\chromedriver.exe' executable_path =self.config.get('selenium', 'chrome_driver')
# 创建Chrome浏览器选项对象 driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
# 设置浏览器应用程序路径 return driver
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver def conn11(self):
# 雅虎财经 conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
def get_content2(): charset='utf8')
driver=get_webdriver() cursor = conn.cursor()
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project', return conn,cursor
charset='utf8')
cursor = conn.cursor() def getCodeFromRedis(self):
#todo:新的接口需要用信用代码 securitiescode=self.r.lpop('NoticeEnterprise:securities_code')
# sql1 = """SELECT id, stock_code, content from config_finance_data_sync WHERE origin_type = 1""" # and stock_code = "SYNH" securitiescode = securitiescode.decode('utf-8')
sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4') """ # and stock_code = "SYNH" return securitiescode
# sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code in ('005930.KS','1802.T','1803.T','028260.KS') """ # and stock_code = "SYNH" # 雅虎财经
cursor.execute(sql1) def get_content2(self,securitiescode):
result_data = cursor.fetchall() self.logger.info(f"需要采集的股票代码{securitiescode}")
for data in result_data: conn,cursor=self.conn11()
try: try:
data_list = list(data) # sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4') """ # and stock_code = "SYNH"
print(data_list) sql1 = f"select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code='{securitiescode}' " # and stock_code = "SYNH"
social_credit_code = data_list[0] cursor.execute(sql1)
stock = data_list[1] result_data = cursor.fetchall()
securities_short_name = data_list[2] if data_list[2] is not None else "" except Exception as e:
# content_sql = '' self.logger.info("数据查询异常!")
stock2=str(stock)
if stock2.upper().endswith("HK") and stock2.upper().startswith("0") : for data in result_data:
stock2=stock2[1:]
url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
try: try:
print(f'正在采集:{url}') data_list = list(data)
driver.get(url) print(data_list)
# 等待页面加载完成 social_credit_code = data_list[0]
wait = WebDriverWait(driver, 300) stock = data_list[1]
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body"))) securities_short_name = data_list[2] if data_list[2] is not None else ""
time.sleep(2) # content_sql = ''
stock2=str(stock)
if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
stock2=stock2[1:]
url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
try: try:
# driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() self.logger.info(f'正在采集:{url}')
driver.find_element(By.XPATH,'//div[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span[text()="Expand All"]').click() self.driver.get(url)
wait = WebDriverWait(driver, 60) # 等待页面加载完成
wait = WebDriverWait(self.driver, 300)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body"))) wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(2) time.sleep(2)
except Exception as e: try:
print(e) # driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass self.driver.find_element(By.XPATH,'//div[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span[text()="Expand All"]').click()
doc_resp1 = pq(driver.page_source) wait = WebDriverWait(self.driver, 60)
unit=get_unit(doc_resp1) wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
financials1 = deal_table(doc_resp1) time.sleep(2)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click() except Exception as e:
time.sleep(5) print(e)
try: pass
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() doc_resp1 = pq(self.driver.page_source)
unit=self.get_unit(doc_resp1)
financials1 = self.deal_table(doc_resp1)
self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5) time.sleep(5)
except Exception as e: try:
print(e) self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass time.sleep(5)
doc_resp2 = pq(driver.page_source) except Exception as e:
financials2 = deal_table(doc_resp2) print(e)
driver.find_element(By.XPATH,'//div/span[text()="Balance Sheet"]').click() pass
time.sleep(5) doc_resp2 = pq(self.driver.page_source)
try: #//*[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span financials2 = self.deal_table(doc_resp2)
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() self.driver.find_element(By.XPATH,'//div/span[text()="Balance Sheet"]').click()
time.sleep(5) time.sleep(5)
except Exception as e: try: #//*[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span
print(e) self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass time.sleep(5)
doc_resp3 = pq(driver.page_source) except Exception as e:
financials3 = deal_table(doc_resp3) print(e)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click() pass
time.sleep(5) doc_resp3 = pq(self.driver.page_source)
try: financials3 = self.deal_table(doc_resp3)
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5) time.sleep(5)
except Exception as e: try:
print(e) self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass time.sleep(5)
doc_resp4 = pq(driver.page_source) except Exception as e:
financials4 = deal_table(doc_resp4) print(e)
driver.find_element(By.XPATH,'//div/span[text()="Cash Flow"]').click() pass
time.sleep(5) doc_resp4 = pq(self.driver.page_source)
try: financials4 = self.deal_table(doc_resp4)
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() self.driver.find_element(By.XPATH,'//div/span[text()="Cash Flow"]').click()
time.sleep(5) time.sleep(5)
except Exception as e: try:
print(e) self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass time.sleep(5)
doc_resp5 = pq(driver.page_source) except Exception as e:
financials5 = deal_table(doc_resp5) print(e)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click() pass
time.sleep(5) doc_resp5 = pq(self.driver.page_source)
try: financials5 = self.deal_table(doc_resp5)
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click() self.driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5) time.sleep(5)
except Exception as e: try:
print(e) self.driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
pass time.sleep(5)
doc_resp6 = pq(driver.page_source) except Exception as e:
financials6 = deal_table(doc_resp6) print(e)
financials_dict = { pass
'表1': financials1, doc_resp6 = pq(self.driver.page_source)
'表2': financials2, financials6 = self.deal_table(doc_resp6)
'表3': financials3, financials_dict = {
'表4': financials4, '表1': financials1,
'表5': financials5, '表2': financials2,
'表6': financials6, '表3': financials3,
} '表4': financials4,
mu_lus = '' '表5': financials5,
for i in range(1, 7): '表6': financials6,
mu_lu = financials_dict[f'表{i}']['目录'] }
mu_lu = json.dumps(mu_lu, ensure_ascii=False, indent=4) mu_lus = ''
mu_lus += mu_lu + '&&&&' for i in range(1, 7):
level_relation = mu_lus[:-4] mu_lu = financials_dict[f'表{i}']['目录']
financials = '' mu_lu = json.dumps(mu_lu, ensure_ascii=False, indent=4)
for i in range(1, 7): mu_lus += mu_lu + '&&&&'
a_list = financials_dict[f'表{i}']['表头'] level_relation = mu_lus[:-4]
for a in a_list: financials = ''
financials += a + '\n' for i in range(1, 7):
b_dict = financials_dict[f'表{i}']['内容'] a_list = financials_dict[f'表{i}']['表头']
for key, values in b_dict.items(): for a in a_list:
financials += key + '\n' financials += a + '\n'
for b in values: b_dict = financials_dict[f'表{i}']['内容']
financials += b + '\n' for key, values in b_dict.items():
financials += '&&&&' + '\n' financials += key + '\n'
financials = financials.strip() for b in values:
content = financials[:-4].strip().replace('\n&&&&\n', '&&&&') financials += b + '\n'
financials += '&&&&' + '\n'
financials = financials.strip()
content = financials[:-4].strip().replace('\n&&&&\n', '&&&&')
# if content[:100] in str(content_sql).replace("\\n","\n"): # if content[:100] in str(content_sql).replace("\\n","\n"):
# print(f"{orc_id}:无最新数据") # print(f"{orc_id}:无最新数据")
# continue # continue
# sql = "UPDATE config_finance_data_sync SET level_relation=%s, content=%s WHERE ID = %s" # sql = "UPDATE config_finance_data_sync SET level_relation=%s, content=%s WHERE ID = %s"
# val = (level_relation, content, orc_id) # val = (level_relation, content, orc_id)
# cursor.execute(sql, val) # cursor.execute(sql, val)
# conn.commit() # conn.commit()
ipo_data={ ipo_data={
'social_credit_code':social_credit_code, 'social_credit_code':social_credit_code,
'stock':stock, 'stock':stock,
'securities_short_name':securities_short_name, 'securities_short_name':securities_short_name,
'content':content, 'content':content,
'level_relation':level_relation, 'level_relation':level_relation,
'unit':unit, 'unit':unit,
'origin_type':1 'origin_type':1
} }
flag=dataToSql(conn,cursor,ipo_data) flag=self.dataToSql(conn,cursor,ipo_data)
if flag: if flag:
#todo:更换接口,需要用信用代码 social_credit_code # get_url = f'http://192.168.1.49:8088/sync/finance/yh?securitiesCode={stock}'
# get_url = f'http://192.168.1.49:8088/sync/finance/yh?securitiesCode={stock}' get_url = f'http://114.115.236.206:8088/sync/finance/yh?securitiesCode={stock}'
get_url = f'http://114.115.236.206:8088/sync/finance/yh?securitiesCode={stock}' try:
try: resp = requests.get(get_url)
resp = requests.get(get_url) print(resp.text)
print(resp.text) self.logger.info('调用接口成功!!')
print('调用接口成功!!') except:
except: with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f: f.write(stock + '\n')
f.write(stock + '\n') except Exception as e:
except Exception as e: print(e)
print(e) self.logger.info(f'采集:{url}失败')
print(f'采集:{url}失败') self.driver.quit()
driver.quit() time.sleep(10)
self.driver=self.get_webdriver()
except:
# time.sleep(60 * 10)
self.driver.quit()
time.sleep(10) time.sleep(10)
driver=get_webdriver() driver=self.get_webdriver()
except: self.logger.info('出错,重试中!')
# time.sleep(60 * 10) continue
driver.quit() # driver.close()
time.sleep(10)
driver=get_webdriver()
print('出错,重试中!')
continue
driver.close()
def dataToSql(conn,cursor,ipo_data): def dataToSql(self,conn,cursor,ipo_data):
try: try:
social_credit_code=ipo_data['social_credit_code'] social_credit_code=ipo_data['social_credit_code']
stock=str(ipo_data['stock']) stock=str(ipo_data['stock'])
securities_short_name=ipo_data['securities_short_name'] securities_short_name=ipo_data['securities_short_name']
content=ipo_data['content'] content=ipo_data['content']
level_relation=ipo_data['level_relation'] level_relation=ipo_data['level_relation']
unit=ipo_data['unit'] unit=ipo_data['unit']
origin_type=ipo_data['origin_type'] origin_type=ipo_data['origin_type']
if len(unit) == 0: if len(unit) == 0:
return False return False
if len(content) == 0: if len(content) == 0:
return False return False
if len(level_relation) == 0: if len(level_relation) == 0:
return False
# 检查记录是否存在
select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
cursor.execute(select_sql)
existing_record = cursor.fetchone()
if existing_record:
# 记录已存在,执行更新操作
update_param=(social_credit_code,content,level_relation,origin_type,stock)
update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s WHERE stock_code=%s "
cursor.execute(update_sql,update_param)
else:
insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type)
insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type) VALUES ( %s, %s, %s, %s, %s, %s)"
# 记录不存在,执行插入操作
cursor.execute(insert_sql,insert_param)
# 提交事务
conn.commit()
except Exception as e:
return False return False
# 检查记录是否存在 finally:
select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'" cursor.close()
cursor.execute(select_sql) conn.close()
existing_record = cursor.fetchone() return True
if existing_record:
# 记录已存在,执行更新操作
update_param=(social_credit_code,content,level_relation,origin_type,stock)
update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s WHERE stock_code=%s "
cursor.execute(update_sql,update_param)
else:
insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type)
insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type) VALUES ( %s, %s, %s, %s, %s, %s)"
# 记录不存在,执行插入操作
cursor.execute(insert_sql,insert_param)
# 提交事务 def get_unit(self,doc_resp):
conn.commit() try:
except Exception as e: resp1_table = doc_resp('#quote-header-info >div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
return False currency = pq(resp1_table[0]).text()
return True if 'Currency in' in currency:
result = re.findall(r'(?<=Currency in\s).*', currency)
def get_unit(doc_resp): currency=result[0]+'(千)'
try: except Exception as e:
resp1_table = doc_resp('#quote-header-info >div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span') currency=''
currency = pq(resp1_table[0]).text() return currency
if 'Currency in' in currency:
result = re.findall(r'(?<=Currency in\s).*', currency)
currency=result[0]+'(千)'
except Exception as e:
currency=''
return currency
if __name__ == '__main__': if __name__ == '__main__':
# parse_excel() # parse_excel()
#get_content1() #get_content1()
get_content2() yahoo=YahooCaiwu()
# conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project', while True:
# charset='utf8') try:
# cursor = conn.cursor() securitiescode=yahoo.getCodeFromRedis()
# ipo_data={'social_credit_code': 'ZZSN22080900000014', 'stock': '005930.KS', 'securities_short_name': '', 'content': "Breakdown\nttm\n12/30/2022\n12/30/2021\n12/30/2020\n12/30/2019\nTotal Revenue\n302,231,360,000\n302,231,360,000\n279,604,799,000\n236,806,988,000\n230,400,881,000\nOperating Revenue\n302,231,360,000\n302,231,360,000\n279,604,799,000\n236,806,988,000\n230,400,881,000\nCost of Revenue\n190,041,770,000\n190,041,770,000\n166,411,342,000\n144,488,296,000\n147,239,549,000\nGross Profit\n112,189,590,000\n112,189,590,000\n113,193,457,000\n92,318,692,000\n83,161,332,000\nOperating Expense\n68,812,960,000\n68,812,960,000\n61,559,601,000\n56,324,816,000\n55,392,823,000\nSelling General and Administrative\n33,560,956,000\n33,560,956,000\n29,528,020,000\n26,084,793,000\n27,022,180,000\nGeneral & Administrative Expense\n5,993,246,000\n5,993,246,000\n4,840,946,000\n4,688,270,000\n5,172,398,000\nSelling & Marketing Expense\n27,567,710,000\n27,567,710,000\n24,687,074,000\n26,084,793,000\n27,022,180,000\nResearch & Development\n24,919,198,000\n24,919,198,000\n22,401,726,000\n21,111,490,000\n19,907,236,000\nOperating Income\n43,376,630,000\n43,376,630,000\n51,633,856,000\n35,993,876,000\n27,768,509,000\nNet Non Operating Interest Income Expense\n1,957,464,000\n1,957,464,000\n846,738,000\n1,391,445,000\n1,973,668,000\nInterest Income Non Operating\n2,720,479,000\n2,720,479,000\n1,278,278,000\n1,974,458,000\n2,660,024,000\nInterest Expense Non Operating\n763,015,000\n763,015,000\n431,540,000\n583,013,000\n686,356,000\nPretax Income\n46,440,474,000\n46,440,474,000\n53,351,827,000\n36,345,117,000\n30,432,189,000\nTax Provision\n-9,213,603,000\n-9,213,603,000\n13,444,377,000\n9,937,285,000\n8,693,324,000\nNet Income Common Stockholders\n54,730,018,000\n54,730,018,000\n39,243,791,000\n22,929,390,000\n18,899,137,000\nNet Income\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nNet Income Including Non-Controlling Interests\n55,654,077,000\n55,654,077,000\n39,907,450,000\n26,407,832,000\n21,738,865,000\nNet Income Continuous Operations\n55,654,077,000\n55,654,077,000\n39,907,450,000\n26,407,832,000\n21,738,865,000\nMinority Interests\n-924,059,000\n-924,059,000\n-663,659,000\n-316,986,000\n-233,811,000\nPreferred Stock Dividends\n-\n-\n-\n3,161,456,000\n2,605,917,000\nOtherunder Preferred Stock Dividend\n-\n-\n-\n0\n0\nDiluted NI Available to Com Stockholders\n-\n-\n-\n22,929,390,000\n18,899,137,000\nBasic EPS\n-\n8.06k\n5.78k\n3.84k\n3.17k\nDiluted EPS\n-\n8.06k\n5.78k\n3.84k\n3.17k\nBasic Average Shares\n-\n5,969,783\n6,793,109\n6,792,722\n6,792,500\nDiluted Average Shares\n-\n5,969,783\n6,793,109\n6,792,722\n6,792,500\nTotal Operating Income as Reported\n43,376,630,000\n43,376,630,000\n51,633,856,000\n35,993,876,000\n27,768,509,000\nTotal Expenses\n258,854,730,000\n258,854,730,000\n227,970,943,000\n200,813,112,000\n202,632,372,000\nNet Income from Continuing & Discontinued Operation\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nNormalized Income\n54,785,448,400\n54,785,448,400\n39,284,034,434\n25,897,644,758\n21,359,049,769\nInterest Income\n2,720,479,000\n2,720,479,000\n1,278,278,000\n1,974,458,000\n2,660,024,000\nInterest Expense\n763,015,000\n763,015,000\n431,540,000\n583,013,000\n686,356,000\nNet Interest Income\n1,957,464,000\n1,957,464,000\n846,738,000\n1,391,445,000\n1,973,668,000\nEBIT\n47,203,489,000\n47,203,489,000\n53,783,367,000\n36,928,130,000\n31,118,545,000\nEBITDA\n86,311,148,000\n-\n-\n-\n-\nReconciled Cost of Revenue\n190,041,770,000\n190,041,770,000\n166,411,342,000\n144,488,296,000\n147,239,549,000\nReconciled Depreciation\n39,107,659,000\n39,107,659,000\n34,247,361,000\n30,335,616,000\n29,597,638,000\nNet Income from Continuing Operation Net Minority Interest\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nTotal Unusual Items Excluding Goodwill\n-92,384,000\n-92,384,000\n-53,801,000\n265,903,000\n204,391,000\nTotal Unusual Items\n-92,384,000\n-92,384,000\n-53,801,000\n265,903,000\n204,391,000\nNormalized EBITDA\n86,403,532,000\n86,403,532,000\n88,084,529,000\n66,997,843,000\n60,511,792,000\nTax Rate for Calcs\n0\n0\n0\n0\n0\nTax Effect of Unusual Items\n-36,953,600\n-36,953,600\n-13,557,566\n72,701,758\n58,386,769&&&&Breakdown\nttm\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Revenue\n302,231,360,000\n63,745,371,000\n70,464,575,000\n76,781,680,000\n77,203,607,000\nOperating Revenue\n302,231,360,000\n63,745,371,000\n70,464,575,000\n76,781,680,000\n77,203,607,000\nCost of Revenue\n190,041,770,000\n46,007,093,000\n48,627,728,000\n48,072,237,000\n46,269,748,000\nGross Profit\n112,189,590,000\n17,738,278,000\n21,836,847,000\n28,709,443,000\n30,933,859,000\nOperating Expense\n68,812,960,000\n17,098,100,000\n17,530,716,000\n17,857,398,000\n16,836,814,000\nSelling General and Administrative\n33,560,956,000\n7,804,860,000\n8,474,365,000\n8,961,809,000\n8,078,864,000\nGeneral & Administrative Expense\n5,993,246,000\n1,446,500,000\n1,627,015,000\n1,576,562,000\n1,516,418,000\nSelling & Marketing Expense\n27,567,710,000\n6,358,360,000\n6,847,350,000\n7,385,247,000\n6,562,446,000\nResearch & Development\n24,919,198,000\n6,578,734,000\n6,472,511,000\n6,269,581,000\n6,254,874,000\nOperating Income\n43,376,630,000\n640,178,000\n4,306,131,000\n10,852,045,000\n14,097,045,000\nNet Non Operating Interest Income Expense\n1,957,464,000\n768,769,000\n790,554,000\n582,979,000\n364,130,000\nInterest Income Non Operating\n2,720,479,000\n1,060,446,000\n1,064,811,000\n799,442,000\n487,194,000\nInterest Expense Non Operating\n763,015,000\n291,677,000\n274,257,000\n216,463,000\n123,064,000\nTotal Other Finance Cost\n-\n-\n-\n-657,417,000\n-240,906,000\nPretax Income\n46,440,474,000\n1,826,380,000\n5,054,918,000\n11,854,958,000\n14,460,758,000\nTax Provision\n-9,213,603,000\n251,780,000\n-18,786,528,000\n2,465,760,000\n3,361,953,000\nNet Income Common Stockholders\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNet Income\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNet Income Including Non-Controlling Interests\n55,654,077,000\n1,574,600,000\n23,841,446,000\n9,389,198,000\n11,098,805,000\nNet Income Continuous Operations\n55,654,077,000\n1,574,600,000\n23,841,446,000\n9,389,198,000\n11,098,805,000\nMinority Interests\n-924,059,000\n-173,348,000\n-338,937,000\n-245,298,000\n-144,290,000\nBasic EPS\n-\n206.00\n-\n1.35k\n1.61k\nDiluted EPS\n-\n206.00\n-\n1.35k\n1.61k\nBasic Average Shares\n-\n5,969,783\n-\n5,969,783\n5,969,783\nDiluted Average Shares\n-\n5,969,783\n-\n5,969,783\n5,969,783\nTotal Operating Income as Reported\n43,376,630,000\n640,178,000\n4,306,131,000\n10,852,045,000\n14,097,045,000\nTotal Expenses\n258,854,730,000\n63,105,193,000\n66,158,444,000\n65,929,635,000\n63,106,562,000\nNet Income from Continuing & Discontinued Operation\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNormalized Income\n54,785,448,400\n1,305,451,577\n23,577,614,600\n9,179,200,500\n10,939,510,141\nInterest Income\n2,720,479,000\n1,060,446,000\n1,064,811,000\n799,442,000\n487,194,000\nInterest Expense\n763,015,000\n291,677,000\n274,257,000\n216,463,000\n123,064,000\nNet Interest Income\n1,957,464,000\n768,769,000\n790,554,000\n582,979,000\n364,130,000\nEBIT\n47,203,489,000\n2,118,057,000\n5,329,175,000\n12,071,421,000\n14,583,822,000\nEBITDA\n86,311,148,000\n-\n-\n-\n-\nReconciled Cost of Revenue\n190,041,770,000\n46,007,093,000\n48,627,728,000\n48,072,237,000\n46,269,748,000\nReconciled Depreciation\n39,107,659,000\n9,586,493,000\n9,649,058,000\n9,837,727,000\n9,841,366,000\nNet Income from Continuing Operation Net Minority Interest\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nTotal Unusual Items Excluding Goodwill\n-92,384,000\n111,119,000\n-125,176,000\n-44,571,000\n19,550,000\nTotal Unusual Items\n-92,384,000\n111,119,000\n-125,176,000\n-44,571,000\n19,550,000\nNormalized EBITDA\n86,403,532,000\n11,593,431,000\n15,103,409,000\n21,953,719,000\n24,405,638,000\nTax Rate for Calcs\n0\n0\n0\n0\n0\nTax Effect of Unusual Items\n-36,953,600\n15,318,577\n-50,070,400\n-9,270,500\n4,545,141&&&&Breakdown\n12/30/2022\n12/30/2021\n12/30/2020\n12/30/2019\nTotal Assets\n448,424,507,000\n426,621,158,000\n378,235,718,000\n352,564,497,000\nCurrent Assets\n218,470,581,000\n218,163,185,000\n198,215,579,000\n181,385,260,000\nCash, Cash Equivalents & Short Term Investments\n115,227,286,000\n124,150,192,000\n124,652,843,000\n108,779,703,000\nCash And Cash Equivalents\n49,680,710,000\n39,031,415,000\n29,382,578,000\n26,885,999,000\nCash\n-\n-\n-\n32,861,000\nCash Equivalents\n49,680,710,000\n39,031,415,000\n29,382,578,000\n26,853,138,000\nOther Short Term Investments\n65,546,576,000\n85,118,777,000\n95,270,265,000\n81,893,704,000\nInventory\n52,187,866,000\n41,384,404,000\n32,043,145,000\n26,766,464,000\nRaw Materials\n16,268,974,000\n14,864,486,000\n10,837,169,000\n8,764,714,000\nWork in Process\n21,612,965,000\n13,967,331,000\n11,818,090,000\n9,886,634,000\nFinished Goods\n18,625,019,000\n14,445,566,000\n9,387,886,000\n8,115,116,000\nInventories Adjustments Allowances\n-4,319,092,000\n-1,892,979,000\n-1,324,492,000\n-1,424,906,000\nPrepaid Assets\n2,867,823,000\n2,336,252,000\n2,266,100,000\n3,833,053,000\nAssets Held for Sale Current\n-\n-\n929,432,000\n-\nOther Current Assets\n6,316,834,000\n5,081,665,000\n7,359,001,000\n6,874,697,000\nTotal non-current assets\n229,953,926,000\n208,457,973,000\n180,020,139,000\n171,179,237,000\nNet PPE\n168,045,388,000\n149,928,539,000\n128,952,892,000\n119,825,474,000\nGross PPE\n427,595,058,000\n377,471,994,000\n328,606,313,000\n304,075,769,000\nProperties\n0\n0\n0\n0\nLand And Improvements\n10,024,569,000\n9,943,570,000\n9,850,942,000\n9,828,309,000\nBuildings And Improvements\n67,713,808,000\n62,651,459,000\n55,026,369,000\n48,839,439,000\nMachinery Furniture Equipment\n303,000,627,000\n274,909,571,000\n233,056,501,000\n211,416,021,000\nOther Properties\n13,248,490,000\n11,958,070,000\n10,496,584,000\n10,061,981,000\nConstruction in Progress\n33,607,564,000\n18,009,324,000\n20,175,917,000\n23,930,019,000\nAccumulated Depreciation\n-259,549,670,000\n-227,543,455,000\n-199,653,421,000\n-184,250,295,000\nGoodwill And Other Intangible Assets\n20,217,754,000\n20,236,244,000\n18,468,502,000\n20,703,504,000\nGoodwill\n6,014,422,000\n5,844,259,000\n5,673,642,000\n6,250,439,000\nOther Intangible Assets\n14,203,332,000\n14,391,985,000\n12,794,860,000\n14,453,065,000\nDefined Pension Benefit\n5,851,972,000\n2,809,590,000\n1,355,502,000\n589,832,000\nOther Non Current Assets\n6,012,671,000\n5,571,099,000\n5,113,279,000\n7,994,050,000\nTotal Liabilities Net Minority Interest\n93,674,903,000\n121,721,227,000\n102,287,702,000\n89,684,076,000\nCurrent Liabilities\n78,344,852,000\n88,117,133,000\n75,604,351,000\n63,782,764,000\nCurrent Provisions\n5,844,907,000\n5,372,872,000\n4,349,563,000\n4,068,627,000\nCurrent Debt And Capital Lease Obligation\n6,236,477,000\n15,017,761,000\n17,269,528,000\n15,239,558,000\nCurrent Debt\n5,362,458,000\n14,205,859,000\n16,558,747,000\n14,393,468,000\nCurrent Capital Lease Obligation\n874,019,000\n811,902,000\n710,781,000\n805,068,000\nOther Current Liabilities\n1,951,354,000\n1,492,239,000\n1,127,719,000\n1,037,030,000\nTotal Non Current Liabilities Net Minority Interest\n15,330,051,000\n33,604,094,000\n26,683,351,000\n25,901,312,000\nLong Term Provisions\n1,928,518,000\n2,306,994,000\n1,051,428,000\n611,100,000\nLong Term Debt And Capital Lease Obligation\n4,096,765,000\n3,374,388,000\n2,947,853,000\n3,172,479,000\nLong Term Debt\n569,939,000\n509,732,000\n948,137,000\n975,298,000\nLong Term Capital Lease Obligation\n3,526,826,000\n2,864,656,000\n1,999,716,000\n2,197,181,000\nTradeand Other Payables Non Current\n2,753,305,000\n2,991,440,000\n1,682,910,000\n2,184,249,000\nOther Non Current Liabilities\n1,171,761,000\n1,267,183,000\n1,725,857,000\n2,408,896,000\nTotal Equity Gross Minority Interest\n354,749,604,000\n304,899,931,000\n275,948,016,000\n262,880,421,000\nStockholders' Equity\n345,186,142,000\n296,237,697,000\n267,670,331,000\n254,915,472,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n897,514,000\n897,514,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n337,946,407,000\n293,064,763,000\n271,068,211,000\n254,582,894,000\nOther Equity Interest\n-\n-\n26,726,000\n60,429,000\nMinority Interest\n9,563,462,000\n8,662,234,000\n8,277,685,000\n7,964,949,000\nTotal Capitalization\n345,756,081,000\n296,747,429,000\n268,618,468,000\n255,890,770,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n345,066,675,000\n296,118,230,000\n267,670,331,000\n254,915,472,000\nCapital Lease Obligations\n4,400,845,000\n3,676,558,000\n2,710,497,000\n2,197,181,000\nNet Tangible Assets\n324,968,388,000\n276,001,453,000\n249,201,829,000\n234,211,968,000\nWorking Capital\n140,125,729,000\n130,046,052,000\n122,611,228,000\n117,602,496,000\nInvested Capital\n350,999,072,000\n310,833,821,000\n285,177,215,000\n270,284,238,000\nTangible Book Value\n324,848,921,000\n275,881,986,000\n249,201,829,000\n234,211,968,000\nTotal Debt\n10,333,242,000\n18,392,149,000\n20,217,381,000\n18,412,037,000\nShare Issued\n6,792,669\n5,969,783\n5,969,783\n5,970,148\nOrdinary Shares Number\n6,792,669\n5,969,783\n5,969,783\n5,969,783\nPreferred Shares Number\n822,887\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0", 'level_relation': '{\n "Total Revenue": {\n "Operating Revenue": {}\n },\n "Cost of Revenue": {},\n "Gross Profit": {},\n "Operating Expense": {\n "Selling General and Administrative": {\n "General & Administrative Expense": {},\n "Selling & Marketing Expense": {}\n },\n "Research & Development": {}\n },\n "Operating Income": {},\n "Net Non Operating Interest Income Expense": {\n "Interest Income Non Operating": {},\n "Interest Expense Non Operating": {}\n },\n "Pretax Income": {},\n "Tax Provision": {},\n "Net Income Common Stockholders": {\n "Net Income": {\n "Net Income Including Non-Controlling Interests": {\n "Net Income Continuous Operations": {}\n },\n "Minority Interests": {}\n },\n "Preferred Stock Dividends": {},\n "Otherunder Preferred Stock Dividend": {}\n },\n "Diluted NI Available to Com Stockholders": {},\n "Basic EPS": {},\n "Diluted EPS": {},\n "Basic Average Shares": {},\n "Diluted Average Shares": {},\n "Total Operating Income as Reported": {},\n "Total Expenses": {},\n "Net Income from Continuing & Discontinued Operation": {},\n "Normalized Income": {},\n "Interest Income": {},\n "Interest Expense": {},\n "Net Interest Income": {},\n "EBIT": {},\n "EBITDA": {},\n "Reconciled Cost of Revenue": {},\n "Reconciled Depreciation": {},\n "Net Income from Continuing Operation Net Minority Interest": {},\n "Total Unusual Items Excluding Goodwill": {},\n "Total Unusual Items": {},\n "Normalized EBITDA": {},\n "Tax Rate for Calcs": {},\n "Tax Effect of Unusual Items": {}\n}&&&&{\n "Total Revenue": {\n "Operating Revenue": {}\n },\n "Cost of Revenue": {},\n "Gross Profit": {},\n "Operating Expense": {\n "Selling General and Administrative": {\n "General & Administrative Expense": {},\n "Selling & Marketing Expense": {}\n },\n "Research & Development": {}\n },\n "Operating Income": {},\n "Net Non Operating Interest Income Expense": {\n "Interest Income Non Operating": {},\n "Interest Expense Non Operating": {},\n "Total Other Finance Cost": {}\n },\n "Pretax Income": {},\n "Tax Provision": {},\n "Net Income Common Stockholders": {\n "Net Income": {\n "Net Income Including Non-Controlling Interests": {\n "Net Income Continuous Operations": {}\n },\n "Minority Interests": {}\n }\n },\n "Basic EPS": {},\n "Diluted EPS": {},\n "Basic Average Shares": {},\n "Diluted Average Shares": {},\n "Total Operating Income as Reported": {},\n "Total Expenses": {},\n "Net Income from Continuing & Discontinued Operation": {},\n "Normalized Income": {},\n "Interest Income": {},\n "Interest Expense": {},\n "Net Interest Income": {},\n "EBIT": {},\n "EBITDA": {},\n "Reconciled Cost of Revenue": {},\n "Reconciled Depreciation": {},\n "Net Income from Continuing Operation Net Minority Interest": {},\n "Total Unusual Items Excluding Goodwill": {},\n "Total Unusual Items": {},\n "Normalized EBITDA": {},\n "Tax Rate for Calcs": {},\n "Tax Effect of Unusual Items": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash": {},\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Assets Held for Sale Current": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {},\n "Other Equity Interest": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}', 'unit': 'KRW(千)'} yahoo.get_content2(securitiescode)
# dataToSql(conn,cursor,ipo_data) except Exception as e:
yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
[redis]
host=114.115.236.206
port=6379
pass=clbzzsn
[mysql]
host=114.115.159.144
username=root
password=zzsn9988
database=caiji
url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
[kafka]
bootstrap_servers=114.115.159.144:9092
topic=keyWordsInfo
groupId=python_baidu
[selenium]
chrome_driver=C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe
binary_location=D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe
"""
"""
修改东方财富网财务数据 存储redis的方式 修改成功
"""
import requests, json, time, re, random, pymysql, redis
from datetime import datetime,timedelta
import pandas as pd
from bs4 import BeautifulSoup
from base.BaseCore import BaseCore
baseCore = BaseCore()
cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
cursor = cnx.cursor()
cnx_ = baseCore.cnx
cursor_ = baseCore.cursor
log = baseCore.getLogger()
# 判断股票代码是否存在
def check_code(com_code):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
res = r.exists('com_caiwushuju_code::'+com_code)
#如果key存在 则不是第一次采集该企业, res = 1
if res:
return False #表示不是第一次采集
else:
return True #表示是第一次采集
def check_date(com_code,info_date):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
res = r.sismember('com_caiwushuju_date::'+com_code, info_date) # 注意是 保存set的方式
if res:
return True
else:
return False
# 将采集后的股票代码对应的报告期保存进redis
def add_date(com_code,date_list):
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
#遍历date_list 放入redis
for date in date_list:
res = r.sadd('com_caiwushuju_code::'+com_code,date)
# 根据信用代码、股票代码、报告时间采集三张表的数据
def get_info(social_code, com_code,info_date,delist_all,info_date_list,taskType):
dic_info = {}
# 第一次采集的股票代码做处理
for nnn in range(0, 3):
try:
ynFirst = check_code(com_code)
break
except:
time.sleep(1)
#判断该报告期是否已采过
for nnn in range(0, 3):
try:
panduan = check_date(com_code,info_date)
if panduan:
return dic_info
else:
pass
break
except:
time.sleep(1)
# 页面url,用于采集字段名称
url_name = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/Index?type=web&code={com_code}'
# print(f'url_name:{url_name}')
#todo:循环20次还是采集不到的记录
try:
start_time = time.time()
for i in range(1, 20):
# 资产负债表,返回资产负债表json数据
url_data_zcfzb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/zcfzbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates={info_date}&code={com_code}'
# 利润表,返回利润表json数据
url_data_lrb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/lrbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates={info_date}&code={com_code}'
# 现金流量表,返回现金流量表json数据
url_data_xjllb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/xjllbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates={info_date}&code={com_code}'
res_data_zcfzb = requests.get(url_data_zcfzb1)
res_data_lrb = requests.get(url_data_lrb1)
res_data_xjllb = requests.get(url_data_xjllb1)
#如果没有解析成功就继续循环
try:
data_json_zcfzb = res_data_zcfzb.json()['data'][0]
print(f'{info_date}第{i}次解析成功')
except:
continue
#只要第一个能解析成功那其他的就都可以解析成功
data_json_lrb = res_data_lrb.json()['data'][0]
data_json_xjllb = res_data_xjllb.json()['data'][0]
res_name = requests.get(url_name)
soup_name = BeautifulSoup(res_name.content, 'html.parser')
#第一个表
try:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qy'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qs'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_yh'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
# bx
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_bx'})
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
except:
log.info(f'---error: {social_code}, {com_code}---')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}资产负债表失败')
#第二个表
try:
script_lrb = soup_name.find('script', {'id': 'lrb_qy'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
script_lrb = soup_name.find('script', {'id': 'lrb_qs'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
# zcfzb_yh
script_lrb = soup_name.find('script', {'id': 'lrb_yh'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
script_lrb = soup_name.find('script', {'id': 'lrb_bx'})
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
except:
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}利润表失败')
#第三个表
try:
script_xjllb = soup_name.find('script', {'id': 'xjllb_qy'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_qs'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_yh'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_bx'})
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
except:
log.info(f'---error: {social_code}, {com_code}---')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}现金流量表失败')
list_zcfzb = []
for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_zcfzb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_zcfzb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_zcfzb.append(dic_info_zcfzb)
log.info(f'----list_zcfzb:采集条数{len(list_zcfzb)}----')
list_lrb = []
for one_info in soup_lrb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_lrb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_lrb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_lrb.append(dic_info_lrb)
list_xjllb = []
for one_info in soup_xjllb.find_all('tr')[2:]:
if '补充资料' in one_info.text:
break
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_xjllb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_xjllb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_xjllb.append(dic_info_xjllb)
dic_info = {
"socialCreditCode": social_code,
"securitiesCode": com_code[2:],
"date": info_date,
"debt": list_zcfzb,
"profit": list_lrb,
"cash": list_xjllb,
"ynFirst": ynFirst,
}
#当前报告期加入列表
info_date_list.append(info_date)
return dic_info
except:
start_time = time.time()
try:
for i in range(1, 20):
# 资产负债表,返回资产负债表json数据
url_data_zcfzb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/zcfzbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates=2023-03-31%2C2022-12-31%2C2022-09-30%2C2022-06-30%2C2022-03-31&code={com_code}'
# 利润表,返回利润表json数据
url_data_lrb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/lrbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates=2023-03-31%2C2022-12-31%2C2022-09-30%2C2022-06-30%2C2022-03-31&code={com_code}'
# 现金流量表,返回现金流量表json数据
url_data_xjllb1 = f'https://emweb.eastmoney.com/PC_HSF10/NewFinanceAnalysis/xjllbAjaxNew?companyType={i}&reportDateType=0&reportType=1&dates=2023-03-31%2C2022-12-31%2C2022-09-30%2C2022-06-30%2C2022-03-31&code={com_code}'
res_data_zcfzb = requests.get(url_data_zcfzb1)
res_data_lrb = requests.get(url_data_lrb1)
res_data_xjllb = requests.get(url_data_xjllb1)
# 如果没有解析成功就继续循环
try:
data_json_zcfzb = res_data_zcfzb.json()['data'][0]
log.info(f'----{com_code}---{info_date}--第{i}次解析成功-----')
except:
continue
# 只要第一个能解析成功那其他的就都可以解析成功
data_json_lrb = res_data_lrb.json()['data'][0]
data_json_xjllb = res_data_xjllb.json()['data'][0]
res_name = requests.get(url_name)
soup_name = BeautifulSoup(res_name.content, 'html.parser')
# 第一个表
try:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qy'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_qs'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_yh'})
if script_zcfzb:
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
# bx
else:
script_zcfzb = soup_name.find('script', {'id': 'zcfzb_bx'})
soup_zcfzb = BeautifulSoup(script_zcfzb.text.strip(), 'lxml')
except:
log.info(f'---error: {social_code}, {com_code}---')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}资产负债表失败')
# 第二个表
try:
script_lrb = soup_name.find('script', {'id': 'lrb_qy'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
script_lrb = soup_name.find('script', {'id': 'lrb_qs'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
# zcfzb_yh
script_lrb = soup_name.find('script', {'id': 'lrb_yh'})
if script_lrb:
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
else:
script_lrb = soup_name.find('script', {'id': 'lrb_bx'})
soup_lrb = BeautifulSoup(script_lrb.text.strip(), 'lxml')
except:
log.info(f'---error: {social_code}, {com_code}---')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}利润表失败')
# 第三个表
try:
script_xjllb = soup_name.find('script', {'id': 'xjllb_qy'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_qs'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_yh'})
if script_xjllb:
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
else:
script_xjllb = soup_name.find('script', {'id': 'xjllb_bx'})
soup_xjllb = BeautifulSoup(script_xjllb.text.strip(), 'lxml')
except:
log.info(f'---error: {social_code}, {com_code}---')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}现金流量表失败')
list_zcfzb = []
for one_info in soup_zcfzb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_zcfzb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_zcfzb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_zcfzb.append(dic_info_zcfzb)
log.info(f'----list_zcfzb:采集条数{len(list_zcfzb)}----')
list_lrb = []
for one_info in soup_lrb.find_all('tr')[2:]:
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_lrb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_lrb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_lrb.append(dic_info_lrb)
list_xjllb = []
for one_info in soup_xjllb.find_all('tr')[2:]:
if '补充资料' in one_info.text:
break
if 'value.' not in one_info.text:
continue
info_name = one_info.find('span').text
if '审计意见' in info_name:
continue
info_name_en = re.findall('value\.(.*?)\)}}', one_info.text)[0]
if info_name_en:
try:
info_data = data_json_xjllb[info_name_en]
except:
continue
else:
continue
if not info_data:
info_data = '--'
dic_info_xjllb = {
"name": info_name,
'enName': info_name_en,
"value": info_data
}
list_xjllb.append(dic_info_xjllb)
dic_info = {
"socialCreditCode": social_code,
"securitiesCode": com_code[2:],
"date": info_date,
"debt": list_zcfzb,
"profit": list_lrb,
"cash": list_xjllb,
"ynFirst": ynFirst,
}
info_date_list.append(info_date)
return dic_info
except:
# delist_json = {'info_date':info_date,'com_code': com_code, 'social_code': social_code}
log.info(f'---{info_date}报告期无数据,股票代码:{com_code}----')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url_name, f'{info_date}--报告期无数据--{com_code}')
#如果本期无数据 就把日期记录下来
delist_all.append(info_date)
def getReportTime():
# timeNow = baseCore.getNowTime(1)[:10]
list_date = []
# 2023-04-01
# 获取当前日期和时间
current_date = datetime.now()
# 计算昨天的日期
yesterday = current_date - timedelta(days=1)
# 格式化昨天的日期
report_date = yesterday.strftime('%Y-%m-%d')
list_date.append(report_date)
year = int(current_date.strftime('%Y'))
# list_date = ['2023-03-31']
list_month = ['-12-31', '-09-30', '-06-30', '-03-31']
for year in range(2022, 2018, -1):
for month in list_month:
date = str(year) + month
list_date.append(date)
return list_date
def job(taskType):
# 将上市企业库中的全部A股代码存入list
# 需要提供股票代码、企业信用代码
while True:
#从redis中获取企业信用代码
social_code = baseCore.redicPullData('FinanceFromEast:finance_socialCode')
# 判断 如果Redis中已经没有数据,则等待
if social_code == None:
time.sleep(20)
continue
sql_sel = f'''select securities_code,exchange from sys_base_enterprise_ipo where category = '1' and social_credit_code='{social_code}' '''
cursor.execute(sql_sel)
row = cursor.fetchone()
securities_code = row[0]
exchange = row[1]
# for code in list_code:
# social_code = rows[0]
# exchange = rows[2]
# if code==rows[1]:
# securities_code = code
# else:
# continue
if exchange == 1:
com_code = 'bj' + securities_code
if exchange == 2:
com_code = 'sh' + securities_code
if exchange == 3:
com_code = 'sz' + securities_code
# if com_code=='sz002163':
list_date = getReportTime()
delist = [] # 记录该企业所有无数据的报告期
date_list = [] # 记录该企业所有数据的报告期
start_time = time.time()
# 分别对每个报告期进行采集
for info_date in list_date:
delist_all = []
info_date_list = []
dic_info = get_info(social_code, com_code, info_date, delist_all, info_date_list,taskType)
# print(dic_info)
# 将采集后的报告期存入redis
if len(dic_info)!=0:
# 调凯歌接口存储数据
data = json.dumps(dic_info)
# print(data)
url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
for nnn in range(0, 3):
try:
res_baocun = requests.post(url_baocun, data=data)
break
except:
time.sleep(1)
print(res_baocun.text)
if len(info_date_list) != 0:
for date in info_date_list:
date_list.append(date)
print(date_list)
date_list = str(date_list)
for nnn in range(0, 3):
try:
add_date(com_code,date_list)
break
except:
time.sleep(1)
end_time = time.time()
log.info(f'===={com_code}====该企业耗时{end_time-start_time}===')
cnx.close()
cursor.close()
baseCore.close()
if __name__=='__main__':
task_type = '财务数据/东方财富网'
job(task_type)
...@@ -9,111 +9,125 @@ import requests ...@@ -9,111 +9,125 @@ import requests
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
baseCore = BaseCore() class Gpdm(object):
log = baseCore.getLogger() def __int__(self):
headers={ pass
'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ', baseCore = BaseCore()
'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83', log = baseCore.getLogger()
'version':'TYC-Web', headers={
'Content-Type':'application/json;charset=UTF-8' 'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ',
} 'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83',
cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4') 'version':'TYC-Web',
cursor= cnx.cursor() 'Content-Type':'application/json;charset=UTF-8'
}
cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
cursor= cnx.cursor()
taskType = '股票代码/东方财富网' taskType = '股票代码/东方财富网'
def getTotal(pageSize,start): def getTotal(self,pageSize,start):
total=0 total=0
for num in range(3): for num in range(3):
try: try:
url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn=1&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={baseCore.getNowTime(3)}"; url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn=1&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={self.baseCore.getNowTime(3)}"
ip = baseCore.get_proxy() ip = self.baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() self.headers['User-Agent'] = self.baseCore.getRandomUserAgent()
response = requests.get(url, headers=headers, verify=False, proxies=ip) response = requests.get(url, headers=self.headers, verify=False, proxies=ip)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
# jQuery1124020359136113854692_1688967721474({"rc":0,"rt":6,"svr":182993358,"lt":1,"full":1,"dlmkts":"","data":{"total":5488,"diff":[{"f1":2,"f2":35.37,"f3":130.87,"f4":20.05,"f5":505082,"f6":1561753667.0,"f7":72.85,"f8":73.63,"f9":79.87,"f10":"-","f11":-0.34,"f12":"603119","f13":1,"f14":"N\xe6\xb5\x99\xe8\x8d\xa3","f15":37.54,"f16":26.38,"f17":28.88,"f18":15.32,"f20":9903600000,"f21":2426214099,"f22":-0.03,"f23":6.46,"f24":130.87,"f25":130.87,"f62":503279629.0,"f115":70.77,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":70.7,"f3":26.98,"f4":15.02,"f5":278191,"f6":2015432017.69,"f7":19.83,"f8":73.92,"f9":44.38,"f10":"-","f11":0.41,"f12":"301371","f13":0,"f14":"N\xe6\x95\xb7\xe5\xb0\x94\xe4\xbd\xb3","f15":80.04,"f16":69.0,"f17":80.0,"f18":55.68,"f20":28285656000,"f21":2660599297,"f22":0.11,"f23":5.64,"f24":26.98,"f25":26.98,"f62":476657031.0,"f115":33.47,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.6,"f3":20.0,"f4":4.6,"f5":135775,"f6":348360366.27,"f7":21.04,"f8":33.94,"f9":212.8,"f10":3.1,"f11":0.0,"f12":"301316","f13":0,"f14":"\xe6\x85\xa7\xe5\x8d\x9a\xe4\xba\x91\xe9\x80\x9a","f15":27.6,"f16":22.76,"f17":23.11,"f18":23.0,"f20":11040276000,"f21":1104274261,"f22":0.0,"f23":11.68,"f24":18.1,"f25":44.43,"f62":107348086.0,"f115":124.43,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":43.62,"f3":20.0,"f4":7.27,"f5":75204,"f6":311935188.44,"f7":21.79,"f8":29.67,"f9":56.11,"f10":13.27,"f11":0.0,"f12":"301289","f13":0,"f14":"\xe5\x9b\xbd\xe7\xbc\x86\xe6\xa3\x80\xe6\xb5\x8b","f15":43.62,"f16":35.7,"f17":36.61,"f18":36.35,"f20":3402360000,"f21":1105762682,"f22":0.0,"f23":3.86,"f24":28.26,"f25":35.55,"f62":80534335.0,"f115":47.25,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":40.98,"f3":20.0,"f4":6.83,"f5":118733,"f6":464542197.42,"f7":20.73,"f8":40.73,"f9":56.02,"f10":2.57,"f11":0.0,"f12":"300881","f13":0,"f14":"\xe7\x9b\x9b\xe5\xbe\xb7\xe9\x91\xab\xe6\xb3\xb0","f15":40.98,"f16":33.9,"f17":33.9,"f18":34.15,"f20":4507800000,"f21":1194567000,"f22":0.0,"f23":5.48,"f24":23.81,"f25":42.05,"f62":16802132.0,"f115":56.01,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":21.0,"f3":19.45,"f4":3.42,"f5":50301,"f6":97244231.42,"f7":16.1,"f8":16.87,"f9":46.64,"f10":1.95,"f11":1.35,"f12":"873576","f13":0,"f14":"\xe5\xa4\xa9\xe5\x8a\x9b\xe5\xa4\x8d\xe5\x90\x88","f15":21.0,"f16":18.17,"f17":18.18,"f18":17.58,"f20":2247000000,"f21":626162250,"f22":0.72,"f23":5.16,"f24":50.21,"f25":50.21,"f62":11286257.0,"f115":29.96,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":76.8,"f3":16.21,"f4":10.71,"f5":153518,"f6":1100431330.98,"f7":23.24,"f8":73.58,"f9":190.79,"f10":1.6,"f11":0.27,"f12":"301315","f13":0,"f14":"\xe5\xa8\x81\xe5\xa3\xab\xe9\xa1\xbf","f15":79.31,"f16":63.95,"f17":63.95,"f18":66.09,"f20":6758400000,"f21":1602347750,"f22":0.17,"f23":7.03,"f24":137.84,"f25":137.84,"f62":112419255.0,"f115":102.68,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":72.99,"f3":16.17,"f4":10.16,"f5":106236,"f6":714127513.24,"f7":23.68,"f8":52.41,"f9":123.41,"f10":1.71,"f11":0.4,"f12":"301141","f13":0,"f14":"\xe4\xb8\xad\xe7\xa7\x91\xe7\xa3\x81\xe4\xb8\x9a","f15":74.88,"f16":60.0,"f17":62.85,"f18":62.83,"f20":6466528467,"f21":1479619267,"f22":0.07,"f23":3.14,"f24":96.74,"f25":78.02,"f62":-26422445.0,"f115":87.31,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.3,"f3":12.81,"f4":3.1,"f5":171865,"f6":442577004.48,"f7":15.25,"f8":7.3,"f9":-156.2,"f10":0.94,"f11":-0.15,"f12":"300551","f13":0,"f14":"\xe5\x8f\xa4\xe9\xb3\x8c\xe7\xa7\x91\xe6\x8a\x80","f15":27.55,"f16":23.86,"f17":24.2,"f18":24.2,"f20":9439055235,"f21":6427896275,"f22":-0.11,"f23":8.93,"f24":48.37,"f25":133.73,"f62":16013778.0,"f115":-126.12,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":84.3,"f3":12.18,"f4":9.15,"f5":124022,"f6":989104033.4,"f7":17.33,"f8":64.35,"f9":99.53,"f10":1.15,"f11":0.19,"f12":"301398","f13":0,"f14":"\xe6\x98\x9f\xe6\xba\x90\xe5\x8d\x93\xe9\x95\x81","f15":86.5,"f16":73.48,"f17":75.48,"f18":75.15,"f20":6744000000,"f21":1624735481,"f22":-0.04,"f23":6.81,"f24":157.88,"f25":173.35,"f62":-26812467.0,"f115":105.29,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":34.85,"f3":10.95,"f4":3.44,"f5":27626,"f6":95746251.0,"f7":9.87,"f8":7.18,"f9":-37.27,"f10":9.74,"f11":-0.03,"f12":"688622","f13":1,"f14":"\xe7\xa6\xbe\xe4\xbf\xa1\xe4\xbb\xaa\xe5\x99\xa8","f15":36.0,"f16":32.9,"f17":35.0,"f18":31.41,"f20":2439416569,"f21":1341637317,"f22":-0.03,"f23":4.74,"f24":-5.76,"f25":7.23,"f62":18152096.0,"f115":-36.22,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":87.8,"f3":10.66,"f4":8.46,"f5":22037,"f6":184811228.0,"f7":11.33,"f8":6.52,"f9":116.36,"f10":4.84,"f11":1.09,"f12":"688776","f13":1,"f14":"\xe5\x9b\xbd\xe5\x85\x89\xe7\x94\xb5\xe6\xb0\x94","f15":87.99,"f16":79.0,"f17":79.0,"f18":79.34,"f20":9516064188,"f21":2968587801,"f22":-0.22,"f23":5.39,"f24":-5.88,"f25":-29.79,"f62":2907315.0,"f115":65.69,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.05,"f3":10.22,"f4":0.19,"f5":3258788,"f6":657251653.18,"f7":9.68,"f8":6.48,"f9":-12.82,"f10":3.95,"f11":0.0,"f12":"000413","f13":0,"f14":"\xe4\xb8\x9c\xe6\x97\xad\xe5\x85\x89\xe7\x94\xb5","f15":2.05,"f16":1.87,"f17":1.87,"f18":1.86,"f20":11547137393,"f21":10310048690,"f22":0.0,"f23":0.52,"f24":17.82,"f25":15.82,"f62":213263692.0,"f115":-8.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.7,"f3":10.2,"f4":0.25,"f5":1107878,"f6":291343381.08,"f7":11.84,"f8":7.94,"f9":-19.65,"f10":2.01,"f11":0.0,"f12":"002256","f13":0,"f14":"\xe5\x85\x86\xe6\x96\xb0\xe8\x82\xa1\xe4\xbb\xbd","f15":2.7,"f16":2.41,"f17":2.44,"f18":2.45,"f20":5082512054,"f21":3769280384,"f22":0.0,"f23":4.31,"f24":11.11,"f25":12.97,"f62":96164236.0,"f115":-99.3,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.92,"f3":10.19,"f4":0.27,"f5":1178068,"f6":333498626.0,"f7":9.06,"f8":7.34,"f9":7.63,"f10":1.4,"f11":0.0,"f12":"600239","f13":1,"f14":"\xe4\xba\x91\xe5\x8d\x97\xe5\x9f\x8e\xe6\x8a\x95","f15":2.92,"f16":2.68,"f17":2.69,"f18":2.65,"f20":4688605774,"f21":4688605774,"f22":0.0,"f23":2.89,"f24":28.07,"f25":51.3,"f62":27795948.0,"f115":-16.59,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.15,"f3":10.14,"f4":0.29,"f5":2973491,"f6":920586623.66,"f7":8.74,"f8":28.9,"f9":-7.07,"f10":4.18,"f11":0.0,"f12":"002630","f13":0,"f14":"\xe5\x8d\x8e\xe8\xa5\xbf\xe8\x83\xbd\xe6\xba\x90","f15":3.15,"f16":2.9,"f17":2.95,"f18":2.86,"f20":3719520000,"f21":3240482440,"f22":0.0,"f23":4.9,"f24":26.51,"f25":7.14,"f62":-18293260.0,"f115":-5.07,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.79,"f3":10.11,"f4":0.44,"f5":1857359,"f6":864538200.0,"f7":10.8,"f8":9.31,"f9":24.64,"f10":9.05,"f11":0.0,"f12":"600577","f13":1,"f14":"\xe7\xb2\xbe\xe8\xbe\xbe\xe8\x82\xa1\xe4\xbb\xbd","f15":4.79,"f16":4.32,"f17":4.35,"f18":4.35,"f20":9959122877,"f21":9559956211,"f22":0.0,"f23":2.07,"f24":14.05,"f25":16.26,"f62":161845983.0,"f115":26.21,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.36,"f3":10.1,"f4":0.4,"f5":617159,"f6":264661451.0,"f7":11.62,"f8":2.74,"f9":122.48,"f10":3.79,"f11":0.0,"f12":"601777","f13":1,"f14":"\xe5\x8a\x9b\xe5\xb8\x86\xe7\xa7\x91\xe6\x8a\x80","f15":4.36,"f16":3.9,"f17":3.95,"f18":3.96,"f20":19931840280,"f21":9811962000,"f22":0.0,"f23":1.94,"f24":24.22,"f25":12.95,"f62":41966291.0,"f115":137.9,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.27,"f3":10.1,"f4":0.3,"f5":290547,"f6":93712867.6,"f7":8.08,"f8":2.28,"f9":1394.52,"f10":1.03,"f11":0.0,"f12":"002175","f13":0,"f14":"\xe4\xb8\x9c\xe6\x96\xb9\xe6\x99\xba\xe9\x80\xa0","f15":3.27,"f16":3.03,"f17":3.04,"f18":2.97,"f20":4175072977,"f21":4175040277,"f22":0.0,"f23":8.53,"f24":13.54,"f25":-8.66,"f62":52561839.0,"f115":41.98,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.51,"f3":10.09,"f4":0.23,"f5":1715205,"f6":423246793.0,"f7":10.96,"f8":5.97,"f9":-4.84,"f10":2.8,"f11":0.0,"f12":"600569","f13":1,"f14":"\xe5\xae\x89\xe9\x98\xb3\xe9\x92\xa2\xe9\x93\x81","f15":2.51,"f16":2.26,"f17":2.26,"f18":2.28,"f20":7209777679,"f21":7209777679,"f22":0.0,"f23":1.02,"f24":17.84,"f25":21.26,"f62":88473646.0,"f115":-2.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2}]}}); # jQuery1124020359136113854692_1688967721474({"rc":0,"rt":6,"svr":182993358,"lt":1,"full":1,"dlmkts":"","data":{"total":5488,"diff":[{"f1":2,"f2":35.37,"f3":130.87,"f4":20.05,"f5":505082,"f6":1561753667.0,"f7":72.85,"f8":73.63,"f9":79.87,"f10":"-","f11":-0.34,"f12":"603119","f13":1,"f14":"N\xe6\xb5\x99\xe8\x8d\xa3","f15":37.54,"f16":26.38,"f17":28.88,"f18":15.32,"f20":9903600000,"f21":2426214099,"f22":-0.03,"f23":6.46,"f24":130.87,"f25":130.87,"f62":503279629.0,"f115":70.77,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":70.7,"f3":26.98,"f4":15.02,"f5":278191,"f6":2015432017.69,"f7":19.83,"f8":73.92,"f9":44.38,"f10":"-","f11":0.41,"f12":"301371","f13":0,"f14":"N\xe6\x95\xb7\xe5\xb0\x94\xe4\xbd\xb3","f15":80.04,"f16":69.0,"f17":80.0,"f18":55.68,"f20":28285656000,"f21":2660599297,"f22":0.11,"f23":5.64,"f24":26.98,"f25":26.98,"f62":476657031.0,"f115":33.47,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.6,"f3":20.0,"f4":4.6,"f5":135775,"f6":348360366.27,"f7":21.04,"f8":33.94,"f9":212.8,"f10":3.1,"f11":0.0,"f12":"301316","f13":0,"f14":"\xe6\x85\xa7\xe5\x8d\x9a\xe4\xba\x91\xe9\x80\x9a","f15":27.6,"f16":22.76,"f17":23.11,"f18":23.0,"f20":11040276000,"f21":1104274261,"f22":0.0,"f23":11.68,"f24":18.1,"f25":44.43,"f62":107348086.0,"f115":124.43,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":43.62,"f3":20.0,"f4":7.27,"f5":75204,"f6":311935188.44,"f7":21.79,"f8":29.67,"f9":56.11,"f10":13.27,"f11":0.0,"f12":"301289","f13":0,"f14":"\xe5\x9b\xbd\xe7\xbc\x86\xe6\xa3\x80\xe6\xb5\x8b","f15":43.62,"f16":35.7,"f17":36.61,"f18":36.35,"f20":3402360000,"f21":1105762682,"f22":0.0,"f23":3.86,"f24":28.26,"f25":35.55,"f62":80534335.0,"f115":47.25,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":40.98,"f3":20.0,"f4":6.83,"f5":118733,"f6":464542197.42,"f7":20.73,"f8":40.73,"f9":56.02,"f10":2.57,"f11":0.0,"f12":"300881","f13":0,"f14":"\xe7\x9b\x9b\xe5\xbe\xb7\xe9\x91\xab\xe6\xb3\xb0","f15":40.98,"f16":33.9,"f17":33.9,"f18":34.15,"f20":4507800000,"f21":1194567000,"f22":0.0,"f23":5.48,"f24":23.81,"f25":42.05,"f62":16802132.0,"f115":56.01,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":21.0,"f3":19.45,"f4":3.42,"f5":50301,"f6":97244231.42,"f7":16.1,"f8":16.87,"f9":46.64,"f10":1.95,"f11":1.35,"f12":"873576","f13":0,"f14":"\xe5\xa4\xa9\xe5\x8a\x9b\xe5\xa4\x8d\xe5\x90\x88","f15":21.0,"f16":18.17,"f17":18.18,"f18":17.58,"f20":2247000000,"f21":626162250,"f22":0.72,"f23":5.16,"f24":50.21,"f25":50.21,"f62":11286257.0,"f115":29.96,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":76.8,"f3":16.21,"f4":10.71,"f5":153518,"f6":1100431330.98,"f7":23.24,"f8":73.58,"f9":190.79,"f10":1.6,"f11":0.27,"f12":"301315","f13":0,"f14":"\xe5\xa8\x81\xe5\xa3\xab\xe9\xa1\xbf","f15":79.31,"f16":63.95,"f17":63.95,"f18":66.09,"f20":6758400000,"f21":1602347750,"f22":0.17,"f23":7.03,"f24":137.84,"f25":137.84,"f62":112419255.0,"f115":102.68,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":72.99,"f3":16.17,"f4":10.16,"f5":106236,"f6":714127513.24,"f7":23.68,"f8":52.41,"f9":123.41,"f10":1.71,"f11":0.4,"f12":"301141","f13":0,"f14":"\xe4\xb8\xad\xe7\xa7\x91\xe7\xa3\x81\xe4\xb8\x9a","f15":74.88,"f16":60.0,"f17":62.85,"f18":62.83,"f20":6466528467,"f21":1479619267,"f22":0.07,"f23":3.14,"f24":96.74,"f25":78.02,"f62":-26422445.0,"f115":87.31,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.3,"f3":12.81,"f4":3.1,"f5":171865,"f6":442577004.48,"f7":15.25,"f8":7.3,"f9":-156.2,"f10":0.94,"f11":-0.15,"f12":"300551","f13":0,"f14":"\xe5\x8f\xa4\xe9\xb3\x8c\xe7\xa7\x91\xe6\x8a\x80","f15":27.55,"f16":23.86,"f17":24.2,"f18":24.2,"f20":9439055235,"f21":6427896275,"f22":-0.11,"f23":8.93,"f24":48.37,"f25":133.73,"f62":16013778.0,"f115":-126.12,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":84.3,"f3":12.18,"f4":9.15,"f5":124022,"f6":989104033.4,"f7":17.33,"f8":64.35,"f9":99.53,"f10":1.15,"f11":0.19,"f12":"301398","f13":0,"f14":"\xe6\x98\x9f\xe6\xba\x90\xe5\x8d\x93\xe9\x95\x81","f15":86.5,"f16":73.48,"f17":75.48,"f18":75.15,"f20":6744000000,"f21":1624735481,"f22":-0.04,"f23":6.81,"f24":157.88,"f25":173.35,"f62":-26812467.0,"f115":105.29,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":34.85,"f3":10.95,"f4":3.44,"f5":27626,"f6":95746251.0,"f7":9.87,"f8":7.18,"f9":-37.27,"f10":9.74,"f11":-0.03,"f12":"688622","f13":1,"f14":"\xe7\xa6\xbe\xe4\xbf\xa1\xe4\xbb\xaa\xe5\x99\xa8","f15":36.0,"f16":32.9,"f17":35.0,"f18":31.41,"f20":2439416569,"f21":1341637317,"f22":-0.03,"f23":4.74,"f24":-5.76,"f25":7.23,"f62":18152096.0,"f115":-36.22,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":87.8,"f3":10.66,"f4":8.46,"f5":22037,"f6":184811228.0,"f7":11.33,"f8":6.52,"f9":116.36,"f10":4.84,"f11":1.09,"f12":"688776","f13":1,"f14":"\xe5\x9b\xbd\xe5\x85\x89\xe7\x94\xb5\xe6\xb0\x94","f15":87.99,"f16":79.0,"f17":79.0,"f18":79.34,"f20":9516064188,"f21":2968587801,"f22":-0.22,"f23":5.39,"f24":-5.88,"f25":-29.79,"f62":2907315.0,"f115":65.69,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.05,"f3":10.22,"f4":0.19,"f5":3258788,"f6":657251653.18,"f7":9.68,"f8":6.48,"f9":-12.82,"f10":3.95,"f11":0.0,"f12":"000413","f13":0,"f14":"\xe4\xb8\x9c\xe6\x97\xad\xe5\x85\x89\xe7\x94\xb5","f15":2.05,"f16":1.87,"f17":1.87,"f18":1.86,"f20":11547137393,"f21":10310048690,"f22":0.0,"f23":0.52,"f24":17.82,"f25":15.82,"f62":213263692.0,"f115":-8.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.7,"f3":10.2,"f4":0.25,"f5":1107878,"f6":291343381.08,"f7":11.84,"f8":7.94,"f9":-19.65,"f10":2.01,"f11":0.0,"f12":"002256","f13":0,"f14":"\xe5\x85\x86\xe6\x96\xb0\xe8\x82\xa1\xe4\xbb\xbd","f15":2.7,"f16":2.41,"f17":2.44,"f18":2.45,"f20":5082512054,"f21":3769280384,"f22":0.0,"f23":4.31,"f24":11.11,"f25":12.97,"f62":96164236.0,"f115":-99.3,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.92,"f3":10.19,"f4":0.27,"f5":1178068,"f6":333498626.0,"f7":9.06,"f8":7.34,"f9":7.63,"f10":1.4,"f11":0.0,"f12":"600239","f13":1,"f14":"\xe4\xba\x91\xe5\x8d\x97\xe5\x9f\x8e\xe6\x8a\x95","f15":2.92,"f16":2.68,"f17":2.69,"f18":2.65,"f20":4688605774,"f21":4688605774,"f22":0.0,"f23":2.89,"f24":28.07,"f25":51.3,"f62":27795948.0,"f115":-16.59,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.15,"f3":10.14,"f4":0.29,"f5":2973491,"f6":920586623.66,"f7":8.74,"f8":28.9,"f9":-7.07,"f10":4.18,"f11":0.0,"f12":"002630","f13":0,"f14":"\xe5\x8d\x8e\xe8\xa5\xbf\xe8\x83\xbd\xe6\xba\x90","f15":3.15,"f16":2.9,"f17":2.95,"f18":2.86,"f20":3719520000,"f21":3240482440,"f22":0.0,"f23":4.9,"f24":26.51,"f25":7.14,"f62":-18293260.0,"f115":-5.07,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.79,"f3":10.11,"f4":0.44,"f5":1857359,"f6":864538200.0,"f7":10.8,"f8":9.31,"f9":24.64,"f10":9.05,"f11":0.0,"f12":"600577","f13":1,"f14":"\xe7\xb2\xbe\xe8\xbe\xbe\xe8\x82\xa1\xe4\xbb\xbd","f15":4.79,"f16":4.32,"f17":4.35,"f18":4.35,"f20":9959122877,"f21":9559956211,"f22":0.0,"f23":2.07,"f24":14.05,"f25":16.26,"f62":161845983.0,"f115":26.21,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.36,"f3":10.1,"f4":0.4,"f5":617159,"f6":264661451.0,"f7":11.62,"f8":2.74,"f9":122.48,"f10":3.79,"f11":0.0,"f12":"601777","f13":1,"f14":"\xe5\x8a\x9b\xe5\xb8\x86\xe7\xa7\x91\xe6\x8a\x80","f15":4.36,"f16":3.9,"f17":3.95,"f18":3.96,"f20":19931840280,"f21":9811962000,"f22":0.0,"f23":1.94,"f24":24.22,"f25":12.95,"f62":41966291.0,"f115":137.9,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.27,"f3":10.1,"f4":0.3,"f5":290547,"f6":93712867.6,"f7":8.08,"f8":2.28,"f9":1394.52,"f10":1.03,"f11":0.0,"f12":"002175","f13":0,"f14":"\xe4\xb8\x9c\xe6\x96\xb9\xe6\x99\xba\xe9\x80\xa0","f15":3.27,"f16":3.03,"f17":3.04,"f18":2.97,"f20":4175072977,"f21":4175040277,"f22":0.0,"f23":8.53,"f24":13.54,"f25":-8.66,"f62":52561839.0,"f115":41.98,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.51,"f3":10.09,"f4":0.23,"f5":1715205,"f6":423246793.0,"f7":10.96,"f8":5.97,"f9":-4.84,"f10":2.8,"f11":0.0,"f12":"600569","f13":1,"f14":"\xe5\xae\x89\xe9\x98\xb3\xe9\x92\xa2\xe9\x93\x81","f15":2.51,"f16":2.26,"f17":2.26,"f18":2.28,"f20":7209777679,"f21":7209777679,"f22":0.0,"f23":1.02,"f24":17.84,"f25":21.26,"f62":88473646.0,"f115":-2.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2}]}});
content = response.content.decode('utf-8') content = response.content.decode('utf-8')
content = baseCore.getSubStr(content, '{', '}') content = self.baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content) retJson = json.loads(content)
total = retJson['data']['total'] total = retJson['data']['total']
response.close() response.close()
break break
except Exception as e: except Exception as e:
log.info(f"------第{num}次出错---{e}") self.log.info(f"------第{num}次出错---{e}")
continue continue
exception = '链接失败' exception = '链接失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = self.baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception) self.baseCore.recordLog('', self.taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
return total
return total
def getPageDta(pageIndex,pageSize,totalPage):
for num in range(3):
try:
start = time.time()
log.info(f"【{pageIndex}/{totalPage}】-----------begin")
url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn={pageIndex}&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={baseCore.getNowTime(3)}";
ip = baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent()
response = requests.get(url, headers=headers, verify=False, proxies=ip)
time.sleep(random.randint(3, 5))
content = response.content.decode('utf-8')
content = baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content)
dataList= retJson['data']['diff']
for dataIndex in range(len(dataList)):
gpdm=dataList[dataIndex]['f12']
name=dataList[dataIndex]['f14']
selectSql = f"select count(1) from gpdm where gpdm='{gpdm}' and name='{name}'"
cursor.execute(selectSql)
count = cursor.fetchone()[0]
if count>0:
log.info(f"{gpdm}-------{name}---已经存在")
continue
else:
log.info(f"{gpdm}-------{name}---新增")
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
cursor.execute(insertSql)
cnx.commit()
response.close()
log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{baseCore.getTimeCost(start, time.time())}")
break
except Exception as e:
log.info(f"------第{num}次出错---{e}")
continue
exception = f'第{pageIndex}页链接失败'
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', exception)
def doJob(): def getPageDta(self,pageIndex,pageSize,totalPage,gpdmList):
pageSize=20 gpdmListPage = []
start_time = time.time() for num in range(3):
total=getTotal(pageSize,start_time) try:
if total==0: start = time.time()
exception = '股票代码总数为零' self.log.info(f"【{pageIndex}/{totalPage}】-----------begin")
url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn={pageIndex}&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={self.baseCore.getNowTime(3)}"
ip = self.baseCore.get_proxy()
self.headers['User-Agent'] = self.baseCore.getRandomUserAgent()
response = requests.get(url, headers=self.headers, verify=False, proxies=ip)
time.sleep(random.randint(3, 5))
content = response.content.decode('utf-8')
content = self.baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content)
dataList= retJson['data']['diff']
for dataIndex in range(len(dataList)):
gpdm=dataList[dataIndex]['f12']
name=dataList[dataIndex]['f14']
gpdmListPage.append(gpdm)
selectSql = f"select count(1) from gpdm where gpdm='{gpdm}' and name='{name}'"
self.cursor.execute(selectSql)
count = self.cursor.fetchone()[0]
if count>0:
self.log.info(f"{gpdm}-------{name}---已经存在")
continue
else:
self.log.info(f"{gpdm}-------{name}---新增")
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
self.cursor.execute(insertSql)
self.cnx.commit()
response.close()
self.log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{self.baseCore.getTimeCost(start, time.time())}")
# break
return gpdmListPage
except Exception as e:
self.log.info(f"------第{num}次出错---{e}")
continue
exception = f'第{pageIndex}页链接失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = self.baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception) self.baseCore.recordLog('', self.taskType, state, takeTime, '', exception)
log.info(f"股票代码总数-----------{total},请检查") return gpdmListPage
return
log.info(f"股票代码总数-----------{total}")
if (total % pageSize == 0):
totalPage = total // pageSize
else:
totalPage = total // pageSize + 1
for pageIndex in range(1, totalPage + 1):
getPageDta(pageIndex,pageSize,totalPage)
state = 1
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', '')
# 释放资源
cursor.close()
cnx.close()
baseCore.close()
if __name__ == '__main__': def doJob(self):
doJob() pageSize=20
start_time = time.time()
total=self.getTotal(pageSize,start_time)
gpdmList = []
if total==0:
exception = '股票代码总数为零'
state = 0
takeTime = self.baseCore.getTimeCost(start_time, time.time())
self.baseCore.recordLog('', self.taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
self.log.info(f"股票代码总数-----------{total},请检查")
return
self.log.info(f"股票代码总数-----------{total}")
if (total % pageSize == 0):
totalPage = total // pageSize
else:
totalPage = total // pageSize + 1
#测试:
# totalPage = 2
for pageIndex in range(1, totalPage + 1):
gpdmListPage = self.getPageDta(pageIndex,pageSize,totalPage,gpdmList)
if gpdmListPage != []:
pass
else:
continue
[gpdmList.append(gpdm) for gpdm in gpdmListPage]
# print(len(gpdmList))
state = 1
takeTime = self.baseCore.getTimeCost(start_time, time.time())
self.baseCore.recordLog('', self.taskType, state, takeTime, '', '')
# 释放资源
self.cursor.close()
self.cnx.close()
self.baseCore.close()
return gpdmList
#
# if __name__ == '__main__':
# doJob()
"""
企业上市信息:只有上市的企业才能如企业库,未上市企业跳过采集步骤。退市企业标注为0
"""
import json
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import urllib3
from base.BaseCore import BaseCore
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from gpdm import Gpdm
baseCore = BaseCore()
chromedriver = "./chromedriver"
browser = webdriver.Chrome(chromedriver)
taskType = '上市信息/东方财富网'
gpdm = Gpdm()
gpdmList = gpdm.doJob()
log = baseCore.getLogger()
error_list = []
list_all_info = []
# 需要提供股票代码、企业信用代码
for com_code1 in gpdmList:
start = time.time()
# 股票代码0、2、3开头的为深圳交易所,6、9开头的为上海交易所,8开头的为北京交易所
if com_code1[0] == '2' or com_code1[0] == '0' or com_code1[0] == '3':
com_code = 'sz' + com_code1
if com_code1[0] == '9' or com_code1[0] == '6':
com_code = 'sh' + com_code1
if com_code1[0] == '8' or com_code1[0] == '4':
com_code = 'bj' + com_code1
if com_code1[0] == 'A':
com_code = ''
log.info(f'======开始采集{com_code}======')
url = f'https://quote.eastmoney.com/{com_code}.html'
url_1 = f'https://emweb.eastmoney.com/PC_HSF10/CompanySurvey/PageAjax?code={com_code}'
url_2 = f'https://emweb.eastmoney.com/PC_HSF10/BusinessAnalysis/PageAjax?code={com_code}'
browser.get(url)
time.sleep(8)
page_source = browser.page_source
soup_t = BeautifulSoup(page_source, 'html.parser')
try:
result = soup_t.find('div',class_='quote_quotenums').text
# print(f'result:{result}')
# if result=='未上市'or result=='已退市':
if result == '未上市' :
continue
if result == '已退市':
tag = 0
else:
tag = 1
except Exception as e:
error_list.append(com_code)
log.info(f'={com_code}===解析上市状态失败=====')
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', f'{com_code}解析上市状态失败--e:{e}')
print('error')
requests.adapters.DEFAULT_RETRIES = 5
json_1 = requests.get(url_1,verify=False).json()
json_2 = requests.get(url_2,verify=False).json()
# SECURITY_TYPE
try:
jys = json_1['jbzl'][0]['TRADE_MARKET']
except Exception as e:
log.info(f'====={com_code}=====解析交易所失败======')
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', f'{com_code}解析交易所失败--e:{e}')
continue
try:
if "上海" in jys:
jys_code = '2'
if "深圳" in jys:
jys_code = '3'
except:
jys = json_1['jbzl'][0]['SECURITY_TYPE']
if "北京" in jys:
jys_code = '1'
short_name = json_1['jbzl'][0]['STR_NAMEA']
zhengquan_type = json_1['jbzl'][0]['SECURITY_TYPE']
# print(zhengquan_type)
if 'A' in zhengquan_type:
# print(zhengquan_type)
category = '1'
if 'B' in zhengquan_type:
category = '2'
if '新三板' in zhengquan_type:
category = '3'
if 'H' in zhengquan_type:
category = '4'
id_code = json_1['jbzl'][0]['REG_NUM']
dongcai = json_1['jbzl'][0]['EM2016']
zhengjian = json_1['jbzl'][0]['INDUSTRYCSRC1']
try:
shangshishijian = json_1['fxxg'][0]['LISTING_DATE'][:10]
except:
shangshishijian = ''
zhuyingfanwei = json_2['zyfw'][0]['BUSINESS_SCOPE']
dic_cwsj = {
"exchange": jys_code,
"category": category, # 股票类型(1-A股;2-B股;3-新三板;4-H股)
'listed':tag,
"listingDate": shangshishijian,
"securitiesCode": com_code[2:],
"securitiesShortName": short_name,
"securitiesType": zhengquan_type,
"socialCreditCode": id_code,
"businessScope": zhuyingfanwei,
"eastIndustry": dongcai,
"csrcIndustry": zhengjian
}
list_all_info.append(dic_cwsj)
log.info(f'======{com_code}====采集成功=====')
# 通过接口将数据保存进数据库
for num in range(0, len(list_all_info),100):
json_updata = json.dumps(list_all_info[num:num+100])
# print(json_updata)
try:
response = requests.post('http://114.115.236.206:8088/sync/enterpriseIpo', data=json_updata, timeout=300,
verify=False)
except Exception as e:
print(e)
print("{}:到:{}".format(num, num + 100))
print(response.text)
...@@ -212,76 +212,30 @@ def get_info(sid,json_search,origin,url_,info_source_code,page): ...@@ -212,76 +212,30 @@ def get_info(sid,json_search,origin,url_,info_source_code,page):
continue continue
return list_all_info,num_caiji return list_all_info,num_caiji
def job(count,key): def RequestUrl(dic_url,token,key):
# 刷新浏览器并获取当前token和cookie
token, cookies = flushAndGetToken(list_b)
log.info('===========获取公众号============')
start_ = time.time() start_ = time.time()
#todo:redis中数据 pop一条
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
if infoSourceCode == 'None' or infoSourceCode == None:
#当一次采集完之后,重新插入数据并等待插入完成
getFromSql()
time.sleep(20)
log.info(f'========本次公众号已采集完毕,共采集{count}个公众号=========总耗时:{baseCore.getTimeCost(start_,time.time())}')
return count
sql = f"SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = '{infoSourceCode}' "
# '一带一路百人论坛'
# sql = f"-- SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = 'IN-20220609-57436' "
cursor.execute(sql)
row = cursor.fetchone()
dic_url = {
'url_': row[0],
'sid': row[1],
'name': row[2],
'info_source_code': row[3],
'biz': ''
}
log.info('===========获取biz==========')
s.cookies.update(cookies)
s.keep_alive = False
url_ = dic_url['url_'] url_ = dic_url['url_']
origin = dic_url['name'] origin = dic_url['name']
info_source_code = dic_url['info_source_code'] info_source_code = dic_url['info_source_code']
sid = dic_url['sid'] sid = dic_url['sid']
try: biz = dic_url['biz']
biz = url_.split('__biz=')[1].split('==&')[0].split('=')[0]
dic_url['biz'] = biz
except Exception as e:
log.info(f'---公众号--{origin}---biz错误')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
error = [
origin,
url_,
info_source_code,
e,
'biz错误',
time_now
]
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error))
cnx_.commit()
return count
fakeid = biz + '==' fakeid = biz + '=='
url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1' url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1'
#获取页数 ret = -1
json_search = ''
# 获取页数
try: try:
# ip = baseCore.get_proxy() # ip = baseCore.get_proxy()
json_search = s.get(url_search, headers=headers, json_search = s.get(url_search, headers=headers,
verify=False).json() # , proxies=ip, verify=False verify=False).json() # , proxies=ip, verify=False
str_t = json.dumps(json_search) str_t = json.dumps(json_search)
time.sleep(1) time.sleep(1)
except Exception as e: except Exception as e:
log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===') log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===')
rePutIntoR(info_source_code) rePutIntoR(info_source_code)
time.sleep(20) time.sleep(20)
return count return json_search,ret
ret = json_search['base_resp']['ret'] ret = json_search['base_resp']['ret']
# {"base_resp": {"ret": 200003, "err_msg": "invalid session"}} # {"base_resp": {"ret": 200003, "err_msg": "invalid session"}}
# TODO:需要判断返回值,根据返回值判断是封号还是biz错误 # TODO:需要判断返回值,根据返回值判断是封号还是biz错误
...@@ -304,7 +258,7 @@ def job(count,key): ...@@ -304,7 +258,7 @@ def job(count,key):
# browser_run.refresh() # browser_run.refresh()
r.set(key, 50) r.set(key, 50)
r.expire(key, 5400) r.expire(key, 5400)
return count return json_search,ret
elif ret == 200002: elif ret == 200002:
# 公众号链接错误 保存库里 记录错误信息及错误类型 # 公众号链接错误 保存库里 记录错误信息及错误类型
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -320,7 +274,7 @@ def job(count,key): ...@@ -320,7 +274,7 @@ def job(count,key):
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}')
return count return json_search,ret
elif ret == 200003: elif ret == 200003:
# 无效的session # 无效的session
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -336,7 +290,7 @@ def job(count,key): ...@@ -336,7 +290,7 @@ def job(count,key):
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}')
return count return json_search,ret
else: else:
log.info(f'----其他情况-----{json_search}---公众号{origin}------') log.info(f'----其他情况-----{json_search}---公众号{origin}------')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -351,80 +305,132 @@ def job(count,key): ...@@ -351,80 +305,132 @@ def job(count,key):
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)" insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
return json_search,ret
def job(count,key):
# 刷新浏览器并获取当前token和cookie
token, cookies = flushAndGetToken(list_b)
log.info('===========获取公众号============')
start_ = time.time()
#todo:redis中数据 pop一条
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
if infoSourceCode == 'None' or infoSourceCode == None:
#当一次采集完之后,重新插入数据并等待插入完成
getFromSql()
time.sleep(20)
log.info(f'========本次公众号已采集完毕,共采集{count}个公众号=========总耗时:{baseCore.getTimeCost(start_,time.time())}')
return count return count
try:
Max_data = int(json_search['app_msg_cnt'])
Max_page = int(int(json_search['app_msg_cnt']) / 5)
if int(json_search['app_msg_cnt']) % 5 != 0:
Max_page = Max_page + 1
else:
Max_page = Max_page
except:
Max_page = 1
Max_data = 5
log.info(f'开始采集{origin}-----共{Max_page}页---{Max_data}条数据-----')
for i in range(0, Max_data, 5):
url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin={i}&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1'
# url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1'
# https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid=MzAwNDA5Njc1Mg==&type=9&query=&token=550883192&lang=zh_CN&f=json&ajax=1
try:
# ip = get_proxy()[random.randint(0, 3)]
json_search = s.get(url_search, headers=headers,
verify=False).json() # , proxies=ip, verify=False
str_t = json.dumps(json_search)
time.sleep(2)
except Exception as e:
log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===')
rePutIntoR(info_source_code)
return count
list_all = json_search['app_msg_list'] sql = f"SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = '{infoSourceCode}' "
# '一带一路百人论坛'
# sql = f"-- SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = 'IN-20220609-57436' "
cursor.execute(sql)
row = cursor.fetchone()
dic_url = {
'url_': row[0],
'sid': row[1],
'name': row[2],
'info_source_code': row[3],
'biz': ''
}
log.info('===========获取biz==========')
s.cookies.update(cookies)
s.keep_alive = False
url_ = dic_url['url_']
origin = dic_url['name']
info_source_code = dic_url['info_source_code']
sid = dic_url['sid']
try:
biz = url_.split('__biz=')[1].split('==&')[0].split('=')[0]
dic_url['biz'] = biz
except Exception as e:
log.info(f'---公众号--{origin}---biz错误')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
error = [
origin,
url_,
info_source_code,
e,
'biz错误',
time_now
]
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error))
cnx_.commit()
return count
json_search,ret = RequestUrl(dic_url,token,key)
if ret == 0:
try: try:
#开始采集每一页文章信息 Max_data = int(json_search['app_msg_cnt'])
page = int(i/5+1) Max_page = int(int(json_search['app_msg_cnt']) / 5)
log.info(f'---{origin}---------开始采集第{page}个分页-----------') if int(json_search['app_msg_cnt']) % 5 != 0:
list_all_info,num_caiji= get_info(sid,json_search,origin,url_,info_source_code,page) Max_page = Max_page + 1
print(f'----第{page}页采集到文章个数-----{len(list_all_info)}------{num_caiji}-------') else:
time.sleep(2) Max_page = Max_page
if len(list_all_info) != 0: except:
count += 1 Max_page = 1
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) Max_data = 5
success = [ log.info(f'开始采集{origin}-----共{Max_page}页---{Max_data}条数据-----')
origin, for i in range(0, Max_data, 5):
url_, json_search,ret = RequestUrl(dic_url,token,key)
info_source_code, if ret == 0:
'采集成功', pass
num_caiji,
time_now,
]
#成功信息保存
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,success_info,success_num,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(success))
cnx_.commit()
# 该公众号的所有文章采集完成
log.info(f'---第{page}页采集到文章个数---{len(list_all_info)}---{num_caiji}---耗时{baseCore.getTimeCost(start_,time.time())}')
else: else:
log.info(f'----第{page}页采集到文章个数{num_caiji}--网址已存在!-----耗时{baseCore.getTimeCost(start_,time.time())}')
return count return count
except Exception as e: if json_search != '':
# json解析该公众号成功但采集数据失败 # list_all = json_search['app_msg_list']
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) try:
false = [ #开始采集每一页文章信息
origin, page = int(i/5+1)
url_, log.info(f'---{origin}---------开始采集第{page}个分页-----------')
info_source_code, list_all_info,num_caiji= get_info(sid,json_search,origin,url_,info_source_code,page)
e, print(f'----第{page}页采集到文章个数-----{len(list_all_info)}------{num_caiji}-------')
'采集失败', time.sleep(2)
time_now if len(list_all_info) != 0:
] count += 1
# 失败信息保存 time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)" success = [
cursor_.execute(insertSql, tuple(false)) origin,
cnx_.commit() url_,
log.info(f'{fakeid}、公众号:{origin}采集失败!!!!!!耗时{baseCore.getTimeCost(start_, time.time())}') info_source_code,
count += 1 '采集成功',
log.info(f'{fakeid}、公众号{origin}:采集成功!、已采集{count}个公众号、耗时{baseCore.getTimeCost(start_, time.time())}') num_caiji,
time_now,
]
#成功信息保存
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,success_info,success_num,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(success))
cnx_.commit()
# 该公众号的所有文章采集完成
log.info(f'---第{page}页采集到文章个数---{len(list_all_info)}---{num_caiji}---耗时{baseCore.getTimeCost(start_,time.time())}')
else:
log.info(f'----第{page}页采集到文章个数{num_caiji}--网址已存在!-----耗时{baseCore.getTimeCost(start_,time.time())}')
return count
except Exception as e:
# json解析该公众号成功但采集数据失败
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
false = [
origin,
url_,
info_source_code,
e,
'采集失败',
time_now
]
# 失败信息保存
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(false))
cnx_.commit()
log.info(f'{biz}、公众号:{origin}采集失败!!!!!!耗时{baseCore.getTimeCost(start_, time.time())}')
count += 1
log.info(f'{biz}、公众号{origin}:采集成功!、已采集{count}个公众号、耗时{baseCore.getTimeCost(start_, time.time())}')
return count
else:
return count
time.sleep(2) time.sleep(2)
return count return count
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论