提交 4c6ee47a 作者: 刘伟刚

Merge remote-tracking branch 'origin/master'

...@@ -15,6 +15,11 @@ from selenium.webdriver.chrome.service import Service ...@@ -15,6 +15,11 @@ from selenium.webdriver.chrome.service import Service
from openpyxl import Workbook from openpyxl import Workbook
import langid import langid
#创建连接池
import pymysql
from pymysql import connections
from DBUtils.PooledDB import PooledDB
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
class BaseCore: class BaseCore:
...@@ -233,6 +238,20 @@ class BaseCore: ...@@ -233,6 +238,20 @@ class BaseCore:
# 连接到Redis # 连接到Redis
self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6) self.r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=6)
self.pool_caiji = PooledDB(
creator=pymysql,
maxconnections=5,
mincached=2,
maxcached=5,
blocking=True,
host='114.115.159.144',
port=3306,
user='root',
password='zzsn9988',
database='caiji',
charset='utf8mb4'
)
def close(self): def close(self):
try: try:
self.__cursor_proxy.close() self.__cursor_proxy.close()
...@@ -434,32 +453,66 @@ class BaseCore: ...@@ -434,32 +453,66 @@ class BaseCore:
# 根据社会信用代码获取企业信息 # 根据社会信用代码获取企业信息
def getInfomation(self, social_code): def getInfomation(self, social_code):
data = []
try:
sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'" sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
self.cursor.execute(sql) # self.cursor.execute(sql)
data = self.cursor.fetchone() # data = self.cursor.fetchone()
conn = self.pool_caiji.connection()
cursor = conn.cursor()
cursor.execute(sql)
data = cursor.fetchone()
data = list(data)
cursor.close()
conn.close()
except:
log = self.getLogger()
log.info('=========数据库操作失败========')
return data return data
# 更新企业采集次数 # 更新企业采集次数
def updateRun(self, social_code, runType, count): def updateRun(self, social_code, runType, count):
try:
sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'" sql_update = f"UPDATE EnterpriseInfo SET {runType} = {count} WHERE SocialCode = '{social_code}'"
self.cursor.execute(sql_update) # self.cursor.execute(sql_update)
self.cnx.commit() # self.cnx.commit()
conn = self.pool_caiji.connection()
cursor = conn.cursor()
cursor.execute(sql_update)
conn.commit()
cursor.close()
conn.close()
except:
log = self.getLogger()
log.info('======更新数据库失败======')
# 保存日志入库 # 保存日志入库
def recordLog(self, xydm, taskType, state, takeTime, url, e): def recordLog(self, xydm, taskType, state, takeTime, url, e):
try:
createTime = self.getNowTime(1) createTime = self.getNowTime(1)
ip = self.getIP() ip = self.getIP()
pid = self.getPID() pid = self.getPID()
sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)" sql = "INSERT INTO LogTable(SocialCode,TaskType,state,TakeTime,url,CreateTime,ProcessIp,PID,Exception) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e] values = [xydm, taskType, state, takeTime, url, createTime, ip, pid, e]
try: # try:
self.cursor.execute(sql, values) # self.cursor.execute(sql, values)
except Exception as e: # except Exception as e:
print(e) # print(e)
self.cnx.commit() # self.cnx.commit()
cnn = self.pool_caiji.connection()
cursor = cnn.cursor()
cursor.execute(sql,values)
cnn.commit()
cursor.close()
cnn.close()
except:
log = self.getLogger()
log.info('======保存日志失败=====')
#获取企查查token #获取企查查token
def GetToken(self): def GetToken(self):
#获取企查查token #获取企查查token
query = "select token from QCC_token " query = "select token from QCC_token "
# token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改 # token = '67ec7402166df1da84ae83c4b95cefc0' # 需要隔两个小时左右抓包修改
...@@ -491,8 +544,8 @@ class BaseCore: ...@@ -491,8 +544,8 @@ class BaseCore:
# return combined_data # return combined_data
#对失败或者断掉的企业 重新放入redis #对失败或者断掉的企业 重新放入redis
def rePutIntoR(self,item): def rePutIntoR(self,key,item):
self.r.rpush('NewsEnterprise:gwqy_socialCode', item) self.r.rpush(key, item)
#增加计数器的值并返回增加后的值 #增加计数器的值并返回增加后的值
def incrSet(self,key): def incrSet(self,key):
...@@ -518,3 +571,10 @@ class BaseCore: ...@@ -518,3 +571,10 @@ class BaseCore:
...@@ -170,6 +170,8 @@ def BaseInfoEnterprise_task(): ...@@ -170,6 +170,8 @@ def BaseInfoEnterprise_task():
print('定时采集异常', e) print('定时采集异常', e)
pass pass
#企业核心人员
#东方财富网财务数据 #东方财富网财务数据
def FinanceFromEast(): def FinanceFromEast():
cnx_,cursor_ = cnn11() cnx_,cursor_ = cnn11()
...@@ -183,6 +185,7 @@ def FinanceFromEast(): ...@@ -183,6 +185,7 @@ def FinanceFromEast():
r.rpush('FinanceFromEast:finance_socialCode', item) r.rpush('FinanceFromEast:finance_socialCode', item)
close11(cnx_,cursor_) close11(cnx_,cursor_)
#东方财富网财务数据定时任务
def FinanceFromEase_task(): def FinanceFromEase_task():
# 实例化一个调度器 # 实例化一个调度器
scheduler = BlockingScheduler() scheduler = BlockingScheduler()
...@@ -251,7 +254,7 @@ def AnnualEnterpriseXueQ_task(): ...@@ -251,7 +254,7 @@ def AnnualEnterpriseXueQ_task():
def BaseInfoEnterpriseAbroad(): def BaseInfoEnterpriseAbroad():
cnx,cursor = connectSql() cnx,cursor = connectSql()
# 获取国外企业 # 获取国外企业
gn_query = "select id from EnterpriseInfo where Place = '2' limit 10 " gn_query = "select SocialCode from EnterpriseInfo where Place = '2' "
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
...@@ -334,13 +337,13 @@ if __name__ == "__main__": ...@@ -334,13 +337,13 @@ if __name__ == "__main__":
# NoticeEnterprise() # NoticeEnterprise()
# AnnualEnterpriseIPO() # AnnualEnterpriseIPO()
# AnnualEnterprise() # AnnualEnterprise()
# BaseInfoEnterpriseAbroad() BaseInfoEnterpriseAbroad()
NewsEnterprise_task() # NewsEnterprise_task()
# NewsEnterprise() # NewsEnterprise()
# BaseInfoEnterprise() # BaseInfoEnterprise()
# FBS() # FBS()
NoticeEnterprise_task() # NoticeEnterprise_task()
AnnualEnterprise_task() # AnnualEnterprise_task()
# NoticeEnterprise() # NoticeEnterprise()
# FinanceFromEast() # FinanceFromEast()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===') log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
......
...@@ -9,111 +9,125 @@ import requests ...@@ -9,111 +9,125 @@ import requests
from base.BaseCore import BaseCore from base.BaseCore import BaseCore
baseCore = BaseCore() class Gpdm(object):
log = baseCore.getLogger() def __int__(self):
headers={ pass
baseCore = BaseCore()
log = baseCore.getLogger()
headers={
'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ', 'X-AUTH-TOKEN':'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzY4MzgxNjk4NCIsImlhdCI6MTY5MDE3ODYyOCwiZXhwIjoxNjkyNzcwNjI4fQ.VV3Zoa4RM5nVN8UXBc0-81KMGqLzTOme6rButeETGfFQi7p5h4ydg8CFrEsizr_iFwB3_BVaKR2o2xR-M4ipbQ',
'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83', 'X-TYCID':'77e997401d5f11ee9e91d5a0fd3c0b83',
'version':'TYC-Web', 'version':'TYC-Web',
'Content-Type':'application/json;charset=UTF-8' 'Content-Type':'application/json;charset=UTF-8'
} }
cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4') cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
cursor= cnx.cursor() cursor= cnx.cursor()
taskType = '股票代码/东方财富网' taskType = '股票代码/东方财富网'
def getTotal(pageSize,start): def getTotal(self,pageSize,start):
total=0 total=0
for num in range(3): for num in range(3):
try: try:
url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn=1&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={baseCore.getNowTime(3)}"; url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn=1&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={self.baseCore.getNowTime(3)}"
ip = baseCore.get_proxy() ip = self.baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() self.headers['User-Agent'] = self.baseCore.getRandomUserAgent()
response = requests.get(url, headers=headers, verify=False, proxies=ip) response = requests.get(url, headers=self.headers, verify=False, proxies=ip)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
# jQuery1124020359136113854692_1688967721474({"rc":0,"rt":6,"svr":182993358,"lt":1,"full":1,"dlmkts":"","data":{"total":5488,"diff":[{"f1":2,"f2":35.37,"f3":130.87,"f4":20.05,"f5":505082,"f6":1561753667.0,"f7":72.85,"f8":73.63,"f9":79.87,"f10":"-","f11":-0.34,"f12":"603119","f13":1,"f14":"N\xe6\xb5\x99\xe8\x8d\xa3","f15":37.54,"f16":26.38,"f17":28.88,"f18":15.32,"f20":9903600000,"f21":2426214099,"f22":-0.03,"f23":6.46,"f24":130.87,"f25":130.87,"f62":503279629.0,"f115":70.77,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":70.7,"f3":26.98,"f4":15.02,"f5":278191,"f6":2015432017.69,"f7":19.83,"f8":73.92,"f9":44.38,"f10":"-","f11":0.41,"f12":"301371","f13":0,"f14":"N\xe6\x95\xb7\xe5\xb0\x94\xe4\xbd\xb3","f15":80.04,"f16":69.0,"f17":80.0,"f18":55.68,"f20":28285656000,"f21":2660599297,"f22":0.11,"f23":5.64,"f24":26.98,"f25":26.98,"f62":476657031.0,"f115":33.47,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.6,"f3":20.0,"f4":4.6,"f5":135775,"f6":348360366.27,"f7":21.04,"f8":33.94,"f9":212.8,"f10":3.1,"f11":0.0,"f12":"301316","f13":0,"f14":"\xe6\x85\xa7\xe5\x8d\x9a\xe4\xba\x91\xe9\x80\x9a","f15":27.6,"f16":22.76,"f17":23.11,"f18":23.0,"f20":11040276000,"f21":1104274261,"f22":0.0,"f23":11.68,"f24":18.1,"f25":44.43,"f62":107348086.0,"f115":124.43,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":43.62,"f3":20.0,"f4":7.27,"f5":75204,"f6":311935188.44,"f7":21.79,"f8":29.67,"f9":56.11,"f10":13.27,"f11":0.0,"f12":"301289","f13":0,"f14":"\xe5\x9b\xbd\xe7\xbc\x86\xe6\xa3\x80\xe6\xb5\x8b","f15":43.62,"f16":35.7,"f17":36.61,"f18":36.35,"f20":3402360000,"f21":1105762682,"f22":0.0,"f23":3.86,"f24":28.26,"f25":35.55,"f62":80534335.0,"f115":47.25,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":40.98,"f3":20.0,"f4":6.83,"f5":118733,"f6":464542197.42,"f7":20.73,"f8":40.73,"f9":56.02,"f10":2.57,"f11":0.0,"f12":"300881","f13":0,"f14":"\xe7\x9b\x9b\xe5\xbe\xb7\xe9\x91\xab\xe6\xb3\xb0","f15":40.98,"f16":33.9,"f17":33.9,"f18":34.15,"f20":4507800000,"f21":1194567000,"f22":0.0,"f23":5.48,"f24":23.81,"f25":42.05,"f62":16802132.0,"f115":56.01,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":21.0,"f3":19.45,"f4":3.42,"f5":50301,"f6":97244231.42,"f7":16.1,"f8":16.87,"f9":46.64,"f10":1.95,"f11":1.35,"f12":"873576","f13":0,"f14":"\xe5\xa4\xa9\xe5\x8a\x9b\xe5\xa4\x8d\xe5\x90\x88","f15":21.0,"f16":18.17,"f17":18.18,"f18":17.58,"f20":2247000000,"f21":626162250,"f22":0.72,"f23":5.16,"f24":50.21,"f25":50.21,"f62":11286257.0,"f115":29.96,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":76.8,"f3":16.21,"f4":10.71,"f5":153518,"f6":1100431330.98,"f7":23.24,"f8":73.58,"f9":190.79,"f10":1.6,"f11":0.27,"f12":"301315","f13":0,"f14":"\xe5\xa8\x81\xe5\xa3\xab\xe9\xa1\xbf","f15":79.31,"f16":63.95,"f17":63.95,"f18":66.09,"f20":6758400000,"f21":1602347750,"f22":0.17,"f23":7.03,"f24":137.84,"f25":137.84,"f62":112419255.0,"f115":102.68,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":72.99,"f3":16.17,"f4":10.16,"f5":106236,"f6":714127513.24,"f7":23.68,"f8":52.41,"f9":123.41,"f10":1.71,"f11":0.4,"f12":"301141","f13":0,"f14":"\xe4\xb8\xad\xe7\xa7\x91\xe7\xa3\x81\xe4\xb8\x9a","f15":74.88,"f16":60.0,"f17":62.85,"f18":62.83,"f20":6466528467,"f21":1479619267,"f22":0.07,"f23":3.14,"f24":96.74,"f25":78.02,"f62":-26422445.0,"f115":87.31,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.3,"f3":12.81,"f4":3.1,"f5":171865,"f6":442577004.48,"f7":15.25,"f8":7.3,"f9":-156.2,"f10":0.94,"f11":-0.15,"f12":"300551","f13":0,"f14":"\xe5\x8f\xa4\xe9\xb3\x8c\xe7\xa7\x91\xe6\x8a\x80","f15":27.55,"f16":23.86,"f17":24.2,"f18":24.2,"f20":9439055235,"f21":6427896275,"f22":-0.11,"f23":8.93,"f24":48.37,"f25":133.73,"f62":16013778.0,"f115":-126.12,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":84.3,"f3":12.18,"f4":9.15,"f5":124022,"f6":989104033.4,"f7":17.33,"f8":64.35,"f9":99.53,"f10":1.15,"f11":0.19,"f12":"301398","f13":0,"f14":"\xe6\x98\x9f\xe6\xba\x90\xe5\x8d\x93\xe9\x95\x81","f15":86.5,"f16":73.48,"f17":75.48,"f18":75.15,"f20":6744000000,"f21":1624735481,"f22":-0.04,"f23":6.81,"f24":157.88,"f25":173.35,"f62":-26812467.0,"f115":105.29,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":34.85,"f3":10.95,"f4":3.44,"f5":27626,"f6":95746251.0,"f7":9.87,"f8":7.18,"f9":-37.27,"f10":9.74,"f11":-0.03,"f12":"688622","f13":1,"f14":"\xe7\xa6\xbe\xe4\xbf\xa1\xe4\xbb\xaa\xe5\x99\xa8","f15":36.0,"f16":32.9,"f17":35.0,"f18":31.41,"f20":2439416569,"f21":1341637317,"f22":-0.03,"f23":4.74,"f24":-5.76,"f25":7.23,"f62":18152096.0,"f115":-36.22,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":87.8,"f3":10.66,"f4":8.46,"f5":22037,"f6":184811228.0,"f7":11.33,"f8":6.52,"f9":116.36,"f10":4.84,"f11":1.09,"f12":"688776","f13":1,"f14":"\xe5\x9b\xbd\xe5\x85\x89\xe7\x94\xb5\xe6\xb0\x94","f15":87.99,"f16":79.0,"f17":79.0,"f18":79.34,"f20":9516064188,"f21":2968587801,"f22":-0.22,"f23":5.39,"f24":-5.88,"f25":-29.79,"f62":2907315.0,"f115":65.69,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.05,"f3":10.22,"f4":0.19,"f5":3258788,"f6":657251653.18,"f7":9.68,"f8":6.48,"f9":-12.82,"f10":3.95,"f11":0.0,"f12":"000413","f13":0,"f14":"\xe4\xb8\x9c\xe6\x97\xad\xe5\x85\x89\xe7\x94\xb5","f15":2.05,"f16":1.87,"f17":1.87,"f18":1.86,"f20":11547137393,"f21":10310048690,"f22":0.0,"f23":0.52,"f24":17.82,"f25":15.82,"f62":213263692.0,"f115":-8.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.7,"f3":10.2,"f4":0.25,"f5":1107878,"f6":291343381.08,"f7":11.84,"f8":7.94,"f9":-19.65,"f10":2.01,"f11":0.0,"f12":"002256","f13":0,"f14":"\xe5\x85\x86\xe6\x96\xb0\xe8\x82\xa1\xe4\xbb\xbd","f15":2.7,"f16":2.41,"f17":2.44,"f18":2.45,"f20":5082512054,"f21":3769280384,"f22":0.0,"f23":4.31,"f24":11.11,"f25":12.97,"f62":96164236.0,"f115":-99.3,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.92,"f3":10.19,"f4":0.27,"f5":1178068,"f6":333498626.0,"f7":9.06,"f8":7.34,"f9":7.63,"f10":1.4,"f11":0.0,"f12":"600239","f13":1,"f14":"\xe4\xba\x91\xe5\x8d\x97\xe5\x9f\x8e\xe6\x8a\x95","f15":2.92,"f16":2.68,"f17":2.69,"f18":2.65,"f20":4688605774,"f21":4688605774,"f22":0.0,"f23":2.89,"f24":28.07,"f25":51.3,"f62":27795948.0,"f115":-16.59,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.15,"f3":10.14,"f4":0.29,"f5":2973491,"f6":920586623.66,"f7":8.74,"f8":28.9,"f9":-7.07,"f10":4.18,"f11":0.0,"f12":"002630","f13":0,"f14":"\xe5\x8d\x8e\xe8\xa5\xbf\xe8\x83\xbd\xe6\xba\x90","f15":3.15,"f16":2.9,"f17":2.95,"f18":2.86,"f20":3719520000,"f21":3240482440,"f22":0.0,"f23":4.9,"f24":26.51,"f25":7.14,"f62":-18293260.0,"f115":-5.07,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.79,"f3":10.11,"f4":0.44,"f5":1857359,"f6":864538200.0,"f7":10.8,"f8":9.31,"f9":24.64,"f10":9.05,"f11":0.0,"f12":"600577","f13":1,"f14":"\xe7\xb2\xbe\xe8\xbe\xbe\xe8\x82\xa1\xe4\xbb\xbd","f15":4.79,"f16":4.32,"f17":4.35,"f18":4.35,"f20":9959122877,"f21":9559956211,"f22":0.0,"f23":2.07,"f24":14.05,"f25":16.26,"f62":161845983.0,"f115":26.21,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.36,"f3":10.1,"f4":0.4,"f5":617159,"f6":264661451.0,"f7":11.62,"f8":2.74,"f9":122.48,"f10":3.79,"f11":0.0,"f12":"601777","f13":1,"f14":"\xe5\x8a\x9b\xe5\xb8\x86\xe7\xa7\x91\xe6\x8a\x80","f15":4.36,"f16":3.9,"f17":3.95,"f18":3.96,"f20":19931840280,"f21":9811962000,"f22":0.0,"f23":1.94,"f24":24.22,"f25":12.95,"f62":41966291.0,"f115":137.9,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.27,"f3":10.1,"f4":0.3,"f5":290547,"f6":93712867.6,"f7":8.08,"f8":2.28,"f9":1394.52,"f10":1.03,"f11":0.0,"f12":"002175","f13":0,"f14":"\xe4\xb8\x9c\xe6\x96\xb9\xe6\x99\xba\xe9\x80\xa0","f15":3.27,"f16":3.03,"f17":3.04,"f18":2.97,"f20":4175072977,"f21":4175040277,"f22":0.0,"f23":8.53,"f24":13.54,"f25":-8.66,"f62":52561839.0,"f115":41.98,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.51,"f3":10.09,"f4":0.23,"f5":1715205,"f6":423246793.0,"f7":10.96,"f8":5.97,"f9":-4.84,"f10":2.8,"f11":0.0,"f12":"600569","f13":1,"f14":"\xe5\xae\x89\xe9\x98\xb3\xe9\x92\xa2\xe9\x93\x81","f15":2.51,"f16":2.26,"f17":2.26,"f18":2.28,"f20":7209777679,"f21":7209777679,"f22":0.0,"f23":1.02,"f24":17.84,"f25":21.26,"f62":88473646.0,"f115":-2.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2}]}}); # jQuery1124020359136113854692_1688967721474({"rc":0,"rt":6,"svr":182993358,"lt":1,"full":1,"dlmkts":"","data":{"total":5488,"diff":[{"f1":2,"f2":35.37,"f3":130.87,"f4":20.05,"f5":505082,"f6":1561753667.0,"f7":72.85,"f8":73.63,"f9":79.87,"f10":"-","f11":-0.34,"f12":"603119","f13":1,"f14":"N\xe6\xb5\x99\xe8\x8d\xa3","f15":37.54,"f16":26.38,"f17":28.88,"f18":15.32,"f20":9903600000,"f21":2426214099,"f22":-0.03,"f23":6.46,"f24":130.87,"f25":130.87,"f62":503279629.0,"f115":70.77,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":70.7,"f3":26.98,"f4":15.02,"f5":278191,"f6":2015432017.69,"f7":19.83,"f8":73.92,"f9":44.38,"f10":"-","f11":0.41,"f12":"301371","f13":0,"f14":"N\xe6\x95\xb7\xe5\xb0\x94\xe4\xbd\xb3","f15":80.04,"f16":69.0,"f17":80.0,"f18":55.68,"f20":28285656000,"f21":2660599297,"f22":0.11,"f23":5.64,"f24":26.98,"f25":26.98,"f62":476657031.0,"f115":33.47,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.6,"f3":20.0,"f4":4.6,"f5":135775,"f6":348360366.27,"f7":21.04,"f8":33.94,"f9":212.8,"f10":3.1,"f11":0.0,"f12":"301316","f13":0,"f14":"\xe6\x85\xa7\xe5\x8d\x9a\xe4\xba\x91\xe9\x80\x9a","f15":27.6,"f16":22.76,"f17":23.11,"f18":23.0,"f20":11040276000,"f21":1104274261,"f22":0.0,"f23":11.68,"f24":18.1,"f25":44.43,"f62":107348086.0,"f115":124.43,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":43.62,"f3":20.0,"f4":7.27,"f5":75204,"f6":311935188.44,"f7":21.79,"f8":29.67,"f9":56.11,"f10":13.27,"f11":0.0,"f12":"301289","f13":0,"f14":"\xe5\x9b\xbd\xe7\xbc\x86\xe6\xa3\x80\xe6\xb5\x8b","f15":43.62,"f16":35.7,"f17":36.61,"f18":36.35,"f20":3402360000,"f21":1105762682,"f22":0.0,"f23":3.86,"f24":28.26,"f25":35.55,"f62":80534335.0,"f115":47.25,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":40.98,"f3":20.0,"f4":6.83,"f5":118733,"f6":464542197.42,"f7":20.73,"f8":40.73,"f9":56.02,"f10":2.57,"f11":0.0,"f12":"300881","f13":0,"f14":"\xe7\x9b\x9b\xe5\xbe\xb7\xe9\x91\xab\xe6\xb3\xb0","f15":40.98,"f16":33.9,"f17":33.9,"f18":34.15,"f20":4507800000,"f21":1194567000,"f22":0.0,"f23":5.48,"f24":23.81,"f25":42.05,"f62":16802132.0,"f115":56.01,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":21.0,"f3":19.45,"f4":3.42,"f5":50301,"f6":97244231.42,"f7":16.1,"f8":16.87,"f9":46.64,"f10":1.95,"f11":1.35,"f12":"873576","f13":0,"f14":"\xe5\xa4\xa9\xe5\x8a\x9b\xe5\xa4\x8d\xe5\x90\x88","f15":21.0,"f16":18.17,"f17":18.18,"f18":17.58,"f20":2247000000,"f21":626162250,"f22":0.72,"f23":5.16,"f24":50.21,"f25":50.21,"f62":11286257.0,"f115":29.96,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":76.8,"f3":16.21,"f4":10.71,"f5":153518,"f6":1100431330.98,"f7":23.24,"f8":73.58,"f9":190.79,"f10":1.6,"f11":0.27,"f12":"301315","f13":0,"f14":"\xe5\xa8\x81\xe5\xa3\xab\xe9\xa1\xbf","f15":79.31,"f16":63.95,"f17":63.95,"f18":66.09,"f20":6758400000,"f21":1602347750,"f22":0.17,"f23":7.03,"f24":137.84,"f25":137.84,"f62":112419255.0,"f115":102.68,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":72.99,"f3":16.17,"f4":10.16,"f5":106236,"f6":714127513.24,"f7":23.68,"f8":52.41,"f9":123.41,"f10":1.71,"f11":0.4,"f12":"301141","f13":0,"f14":"\xe4\xb8\xad\xe7\xa7\x91\xe7\xa3\x81\xe4\xb8\x9a","f15":74.88,"f16":60.0,"f17":62.85,"f18":62.83,"f20":6466528467,"f21":1479619267,"f22":0.07,"f23":3.14,"f24":96.74,"f25":78.02,"f62":-26422445.0,"f115":87.31,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":27.3,"f3":12.81,"f4":3.1,"f5":171865,"f6":442577004.48,"f7":15.25,"f8":7.3,"f9":-156.2,"f10":0.94,"f11":-0.15,"f12":"300551","f13":0,"f14":"\xe5\x8f\xa4\xe9\xb3\x8c\xe7\xa7\x91\xe6\x8a\x80","f15":27.55,"f16":23.86,"f17":24.2,"f18":24.2,"f20":9439055235,"f21":6427896275,"f22":-0.11,"f23":8.93,"f24":48.37,"f25":133.73,"f62":16013778.0,"f115":-126.12,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":84.3,"f3":12.18,"f4":9.15,"f5":124022,"f6":989104033.4,"f7":17.33,"f8":64.35,"f9":99.53,"f10":1.15,"f11":0.19,"f12":"301398","f13":0,"f14":"\xe6\x98\x9f\xe6\xba\x90\xe5\x8d\x93\xe9\x95\x81","f15":86.5,"f16":73.48,"f17":75.48,"f18":75.15,"f20":6744000000,"f21":1624735481,"f22":-0.04,"f23":6.81,"f24":157.88,"f25":173.35,"f62":-26812467.0,"f115":105.29,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":34.85,"f3":10.95,"f4":3.44,"f5":27626,"f6":95746251.0,"f7":9.87,"f8":7.18,"f9":-37.27,"f10":9.74,"f11":-0.03,"f12":"688622","f13":1,"f14":"\xe7\xa6\xbe\xe4\xbf\xa1\xe4\xbb\xaa\xe5\x99\xa8","f15":36.0,"f16":32.9,"f17":35.0,"f18":31.41,"f20":2439416569,"f21":1341637317,"f22":-0.03,"f23":4.74,"f24":-5.76,"f25":7.23,"f62":18152096.0,"f115":-36.22,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":87.8,"f3":10.66,"f4":8.46,"f5":22037,"f6":184811228.0,"f7":11.33,"f8":6.52,"f9":116.36,"f10":4.84,"f11":1.09,"f12":"688776","f13":1,"f14":"\xe5\x9b\xbd\xe5\x85\x89\xe7\x94\xb5\xe6\xb0\x94","f15":87.99,"f16":79.0,"f17":79.0,"f18":79.34,"f20":9516064188,"f21":2968587801,"f22":-0.22,"f23":5.39,"f24":-5.88,"f25":-29.79,"f62":2907315.0,"f115":65.69,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.05,"f3":10.22,"f4":0.19,"f5":3258788,"f6":657251653.18,"f7":9.68,"f8":6.48,"f9":-12.82,"f10":3.95,"f11":0.0,"f12":"000413","f13":0,"f14":"\xe4\xb8\x9c\xe6\x97\xad\xe5\x85\x89\xe7\x94\xb5","f15":2.05,"f16":1.87,"f17":1.87,"f18":1.86,"f20":11547137393,"f21":10310048690,"f22":0.0,"f23":0.52,"f24":17.82,"f25":15.82,"f62":213263692.0,"f115":-8.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.7,"f3":10.2,"f4":0.25,"f5":1107878,"f6":291343381.08,"f7":11.84,"f8":7.94,"f9":-19.65,"f10":2.01,"f11":0.0,"f12":"002256","f13":0,"f14":"\xe5\x85\x86\xe6\x96\xb0\xe8\x82\xa1\xe4\xbb\xbd","f15":2.7,"f16":2.41,"f17":2.44,"f18":2.45,"f20":5082512054,"f21":3769280384,"f22":0.0,"f23":4.31,"f24":11.11,"f25":12.97,"f62":96164236.0,"f115":-99.3,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.92,"f3":10.19,"f4":0.27,"f5":1178068,"f6":333498626.0,"f7":9.06,"f8":7.34,"f9":7.63,"f10":1.4,"f11":0.0,"f12":"600239","f13":1,"f14":"\xe4\xba\x91\xe5\x8d\x97\xe5\x9f\x8e\xe6\x8a\x95","f15":2.92,"f16":2.68,"f17":2.69,"f18":2.65,"f20":4688605774,"f21":4688605774,"f22":0.0,"f23":2.89,"f24":28.07,"f25":51.3,"f62":27795948.0,"f115":-16.59,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.15,"f3":10.14,"f4":0.29,"f5":2973491,"f6":920586623.66,"f7":8.74,"f8":28.9,"f9":-7.07,"f10":4.18,"f11":0.0,"f12":"002630","f13":0,"f14":"\xe5\x8d\x8e\xe8\xa5\xbf\xe8\x83\xbd\xe6\xba\x90","f15":3.15,"f16":2.9,"f17":2.95,"f18":2.86,"f20":3719520000,"f21":3240482440,"f22":0.0,"f23":4.9,"f24":26.51,"f25":7.14,"f62":-18293260.0,"f115":-5.07,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.79,"f3":10.11,"f4":0.44,"f5":1857359,"f6":864538200.0,"f7":10.8,"f8":9.31,"f9":24.64,"f10":9.05,"f11":0.0,"f12":"600577","f13":1,"f14":"\xe7\xb2\xbe\xe8\xbe\xbe\xe8\x82\xa1\xe4\xbb\xbd","f15":4.79,"f16":4.32,"f17":4.35,"f18":4.35,"f20":9959122877,"f21":9559956211,"f22":0.0,"f23":2.07,"f24":14.05,"f25":16.26,"f62":161845983.0,"f115":26.21,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":4.36,"f3":10.1,"f4":0.4,"f5":617159,"f6":264661451.0,"f7":11.62,"f8":2.74,"f9":122.48,"f10":3.79,"f11":0.0,"f12":"601777","f13":1,"f14":"\xe5\x8a\x9b\xe5\xb8\x86\xe7\xa7\x91\xe6\x8a\x80","f15":4.36,"f16":3.9,"f17":3.95,"f18":3.96,"f20":19931840280,"f21":9811962000,"f22":0.0,"f23":1.94,"f24":24.22,"f25":12.95,"f62":41966291.0,"f115":137.9,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":3.27,"f3":10.1,"f4":0.3,"f5":290547,"f6":93712867.6,"f7":8.08,"f8":2.28,"f9":1394.52,"f10":1.03,"f11":0.0,"f12":"002175","f13":0,"f14":"\xe4\xb8\x9c\xe6\x96\xb9\xe6\x99\xba\xe9\x80\xa0","f15":3.27,"f16":3.03,"f17":3.04,"f18":2.97,"f20":4175072977,"f21":4175040277,"f22":0.0,"f23":8.53,"f24":13.54,"f25":-8.66,"f62":52561839.0,"f115":41.98,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2},{"f1":2,"f2":2.51,"f3":10.09,"f4":0.23,"f5":1715205,"f6":423246793.0,"f7":10.96,"f8":5.97,"f9":-4.84,"f10":2.8,"f11":0.0,"f12":"600569","f13":1,"f14":"\xe5\xae\x89\xe9\x98\xb3\xe9\x92\xa2\xe9\x93\x81","f15":2.51,"f16":2.26,"f17":2.26,"f18":2.28,"f20":7209777679,"f21":7209777679,"f22":0.0,"f23":1.02,"f24":17.84,"f25":21.26,"f62":88473646.0,"f115":-2.55,"f128":"-","f140":"-","f141":"-","f136":"-","f152":2}]}});
content = response.content.decode('utf-8') content = response.content.decode('utf-8')
content = baseCore.getSubStr(content, '{', '}') content = self.baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content) retJson = json.loads(content)
total = retJson['data']['total'] total = retJson['data']['total']
response.close() response.close()
break break
except Exception as e: except Exception as e:
log.info(f"------第{num}次出错---{e}") self.log.info(f"------第{num}次出错---{e}")
continue continue
exception = '链接失败' exception = '链接失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = self.baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception) self.baseCore.recordLog('', self.taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
return total return total
def getPageDta(pageIndex,pageSize,totalPage): def getPageDta(self,pageIndex,pageSize,totalPage,gpdmList):
gpdmListPage = []
for num in range(3): for num in range(3):
try: try:
start = time.time() start = time.time()
log.info(f"【{pageIndex}/{totalPage}】-----------begin") self.log.info(f"【{pageIndex}/{totalPage}】-----------begin")
url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn={pageIndex}&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={baseCore.getNowTime(3)}"; url = f"http://17.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124020359136113854692_1688967721474&pn={pageIndex}&pz={pageSize}&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_={self.baseCore.getNowTime(3)}"
ip = baseCore.get_proxy() ip = self.baseCore.get_proxy()
headers['User-Agent'] = baseCore.getRandomUserAgent() self.headers['User-Agent'] = self.baseCore.getRandomUserAgent()
response = requests.get(url, headers=headers, verify=False, proxies=ip) response = requests.get(url, headers=self.headers, verify=False, proxies=ip)
time.sleep(random.randint(3, 5)) time.sleep(random.randint(3, 5))
content = response.content.decode('utf-8') content = response.content.decode('utf-8')
content = baseCore.getSubStr(content, '{', '}') content = self.baseCore.getSubStr(content, '{', '}')
retJson = json.loads(content) retJson = json.loads(content)
dataList= retJson['data']['diff'] dataList= retJson['data']['diff']
for dataIndex in range(len(dataList)): for dataIndex in range(len(dataList)):
gpdm=dataList[dataIndex]['f12'] gpdm=dataList[dataIndex]['f12']
name=dataList[dataIndex]['f14'] name=dataList[dataIndex]['f14']
gpdmListPage.append(gpdm)
selectSql = f"select count(1) from gpdm where gpdm='{gpdm}' and name='{name}'" selectSql = f"select count(1) from gpdm where gpdm='{gpdm}' and name='{name}'"
cursor.execute(selectSql) self.cursor.execute(selectSql)
count = cursor.fetchone()[0] count = self.cursor.fetchone()[0]
if count>0: if count>0:
log.info(f"{gpdm}-------{name}---已经存在") self.log.info(f"{gpdm}-------{name}---已经存在")
continue continue
else: else:
log.info(f"{gpdm}-------{name}---新增") self.log.info(f"{gpdm}-------{name}---新增")
insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())" insertSql= f"insert into gpdm(gpdm,name,state,create_date) values ('{gpdm}','{name}',1,now())"
cursor.execute(insertSql) self.cursor.execute(insertSql)
cnx.commit() self.cnx.commit()
response.close() response.close()
log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{baseCore.getTimeCost(start, time.time())}") self.log.info(f"【{pageIndex}/{totalPage}】-----------end,耗时{self.baseCore.getTimeCost(start, time.time())}")
break # break
return gpdmListPage
except Exception as e: except Exception as e:
log.info(f"------第{num}次出错---{e}") self.log.info(f"------第{num}次出错---{e}")
continue continue
exception = f'第{pageIndex}页链接失败' exception = f'第{pageIndex}页链接失败'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start, time.time()) takeTime = self.baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', exception) self.baseCore.recordLog('', self.taskType, state, takeTime, '', exception)
return gpdmListPage
def doJob(): def doJob(self):
pageSize=20 pageSize=20
start_time = time.time() start_time = time.time()
total=getTotal(pageSize,start_time) total=self.getTotal(pageSize,start_time)
gpdmList = []
if total==0: if total==0:
exception = '股票代码总数为零' exception = '股票代码总数为零'
state = 0 state = 0
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = self.baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception) self.baseCore.recordLog('', self.taskType, state, takeTime, 'http://quote.eastmoney.com/center/gridlist.html?st=ChangePercent&sortType=C&sortRule=-1#hs_a_board', exception)
log.info(f"股票代码总数-----------{total},请检查") self.log.info(f"股票代码总数-----------{total},请检查")
return return
log.info(f"股票代码总数-----------{total}") self.log.info(f"股票代码总数-----------{total}")
if (total % pageSize == 0): if (total % pageSize == 0):
totalPage = total // pageSize totalPage = total // pageSize
else: else:
totalPage = total // pageSize + 1 totalPage = total // pageSize + 1
#测试:
# totalPage = 2
for pageIndex in range(1, totalPage + 1): for pageIndex in range(1, totalPage + 1):
getPageDta(pageIndex,pageSize,totalPage) gpdmListPage = self.getPageDta(pageIndex,pageSize,totalPage,gpdmList)
if gpdmListPage != []:
pass
else:
continue
[gpdmList.append(gpdm) for gpdm in gpdmListPage]
# print(len(gpdmList))
state = 1 state = 1
takeTime = baseCore.getTimeCost(start_time, time.time()) takeTime = self.baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', '') self.baseCore.recordLog('', self.taskType, state, takeTime, '', '')
# 释放资源 # 释放资源
cursor.close() self.cursor.close()
cnx.close() self.cnx.close()
baseCore.close() self.baseCore.close()
return gpdmList
if __name__ == '__main__': #
doJob() # if __name__ == '__main__':
# doJob()
"""
企业上市信息:只有上市的企业才能如企业库,未上市企业跳过采集步骤。退市企业标注为0
"""
import json
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import urllib3
from base.BaseCore import BaseCore
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from gpdm import Gpdm
baseCore = BaseCore()
chromedriver = "./chromedriver"
browser = webdriver.Chrome(chromedriver)
taskType = '上市信息/东方财富网'
gpdm = Gpdm()
gpdmList = gpdm.doJob()
log = baseCore.getLogger()
error_list = []
list_all_info = []
# 需要提供股票代码、企业信用代码
for com_code1 in gpdmList:
start = time.time()
# 股票代码0、2、3开头的为深圳交易所,6、9开头的为上海交易所,8开头的为北京交易所
if com_code1[0] == '2' or com_code1[0] == '0' or com_code1[0] == '3':
com_code = 'sz' + com_code1
if com_code1[0] == '9' or com_code1[0] == '6':
com_code = 'sh' + com_code1
if com_code1[0] == '8' or com_code1[0] == '4':
com_code = 'bj' + com_code1
if com_code1[0] == 'A':
com_code = ''
log.info(f'======开始采集{com_code}======')
url = f'https://quote.eastmoney.com/{com_code}.html'
url_1 = f'https://emweb.eastmoney.com/PC_HSF10/CompanySurvey/PageAjax?code={com_code}'
url_2 = f'https://emweb.eastmoney.com/PC_HSF10/BusinessAnalysis/PageAjax?code={com_code}'
browser.get(url)
time.sleep(8)
page_source = browser.page_source
soup_t = BeautifulSoup(page_source, 'html.parser')
try:
result = soup_t.find('div',class_='quote_quotenums').text
# print(f'result:{result}')
# if result=='未上市'or result=='已退市':
if result == '未上市' :
continue
if result == '已退市':
tag = 0
else:
tag = 1
except Exception as e:
error_list.append(com_code)
log.info(f'={com_code}===解析上市状态失败=====')
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', f'{com_code}解析上市状态失败--e:{e}')
print('error')
requests.adapters.DEFAULT_RETRIES = 5
json_1 = requests.get(url_1,verify=False).json()
json_2 = requests.get(url_2,verify=False).json()
# SECURITY_TYPE
try:
jys = json_1['jbzl'][0]['TRADE_MARKET']
except Exception as e:
log.info(f'====={com_code}=====解析交易所失败======')
state = 0
takeTime = baseCore.getTimeCost(start, time.time())
baseCore.recordLog('', taskType, state, takeTime, '', f'{com_code}解析交易所失败--e:{e}')
continue
try:
if "上海" in jys:
jys_code = '2'
if "深圳" in jys:
jys_code = '3'
except:
jys = json_1['jbzl'][0]['SECURITY_TYPE']
if "北京" in jys:
jys_code = '1'
short_name = json_1['jbzl'][0]['STR_NAMEA']
zhengquan_type = json_1['jbzl'][0]['SECURITY_TYPE']
# print(zhengquan_type)
if 'A' in zhengquan_type:
# print(zhengquan_type)
category = '1'
if 'B' in zhengquan_type:
category = '2'
if '新三板' in zhengquan_type:
category = '3'
if 'H' in zhengquan_type:
category = '4'
id_code = json_1['jbzl'][0]['REG_NUM']
dongcai = json_1['jbzl'][0]['EM2016']
zhengjian = json_1['jbzl'][0]['INDUSTRYCSRC1']
try:
shangshishijian = json_1['fxxg'][0]['LISTING_DATE'][:10]
except:
shangshishijian = ''
zhuyingfanwei = json_2['zyfw'][0]['BUSINESS_SCOPE']
dic_cwsj = {
"exchange": jys_code,
"category": category, # 股票类型(1-A股;2-B股;3-新三板;4-H股)
'listed':tag,
"listingDate": shangshishijian,
"securitiesCode": com_code[2:],
"securitiesShortName": short_name,
"securitiesType": zhengquan_type,
"socialCreditCode": id_code,
"businessScope": zhuyingfanwei,
"eastIndustry": dongcai,
"csrcIndustry": zhengjian
}
list_all_info.append(dic_cwsj)
log.info(f'======{com_code}====采集成功=====')
# 通过接口将数据保存进数据库
for num in range(0, len(list_all_info),100):
json_updata = json.dumps(list_all_info[num:num+100])
# print(json_updata)
try:
response = requests.post('http://114.115.236.206:8088/sync/enterpriseIpo', data=json_updata, timeout=300,
verify=False)
except Exception as e:
print(e)
print("{}:到:{}".format(num, num + 100))
print(response.text)
...@@ -212,65 +212,18 @@ def get_info(sid,json_search,origin,url_,info_source_code,page): ...@@ -212,65 +212,18 @@ def get_info(sid,json_search,origin,url_,info_source_code,page):
continue continue
return list_all_info,num_caiji return list_all_info,num_caiji
def job(count,key): def RequestUrl(dic_url,token,key):
# 刷新浏览器并获取当前token和cookie
token, cookies = flushAndGetToken(list_b)
log.info('===========获取公众号============')
start_ = time.time() start_ = time.time()
#todo:redis中数据 pop一条
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
if infoSourceCode == 'None' or infoSourceCode == None:
#当一次采集完之后,重新插入数据并等待插入完成
getFromSql()
time.sleep(20)
log.info(f'========本次公众号已采集完毕,共采集{count}个公众号=========总耗时:{baseCore.getTimeCost(start_,time.time())}')
return count
sql = f"SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = '{infoSourceCode}' "
# '一带一路百人论坛'
# sql = f"-- SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = 'IN-20220609-57436' "
cursor.execute(sql)
row = cursor.fetchone()
dic_url = {
'url_': row[0],
'sid': row[1],
'name': row[2],
'info_source_code': row[3],
'biz': ''
}
log.info('===========获取biz==========')
s.cookies.update(cookies)
s.keep_alive = False
url_ = dic_url['url_'] url_ = dic_url['url_']
origin = dic_url['name'] origin = dic_url['name']
info_source_code = dic_url['info_source_code'] info_source_code = dic_url['info_source_code']
sid = dic_url['sid'] sid = dic_url['sid']
try: biz = dic_url['biz']
biz = url_.split('__biz=')[1].split('==&')[0].split('=')[0]
dic_url['biz'] = biz
except Exception as e:
log.info(f'---公众号--{origin}---biz错误')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
error = [
origin,
url_,
info_source_code,
e,
'biz错误',
time_now
]
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error))
cnx_.commit()
return count
fakeid = biz + '==' fakeid = biz + '=='
url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1' url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1'
#获取页数 ret = -1
json_search = ''
# 获取页数
try: try:
# ip = baseCore.get_proxy() # ip = baseCore.get_proxy()
json_search = s.get(url_search, headers=headers, json_search = s.get(url_search, headers=headers,
...@@ -281,7 +234,8 @@ def job(count,key): ...@@ -281,7 +234,8 @@ def job(count,key):
log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===') log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===')
rePutIntoR(info_source_code) rePutIntoR(info_source_code)
time.sleep(20) time.sleep(20)
return count return json_search,ret
ret = json_search['base_resp']['ret'] ret = json_search['base_resp']['ret']
# {"base_resp": {"ret": 200003, "err_msg": "invalid session"}} # {"base_resp": {"ret": 200003, "err_msg": "invalid session"}}
# TODO:需要判断返回值,根据返回值判断是封号还是biz错误 # TODO:需要判断返回值,根据返回值判断是封号还是biz错误
...@@ -304,7 +258,7 @@ def job(count,key): ...@@ -304,7 +258,7 @@ def job(count,key):
# browser_run.refresh() # browser_run.refresh()
r.set(key, 50) r.set(key, 50)
r.expire(key, 5400) r.expire(key, 5400)
return count return json_search,ret
elif ret == 200002: elif ret == 200002:
# 公众号链接错误 保存库里 记录错误信息及错误类型 # 公众号链接错误 保存库里 记录错误信息及错误类型
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -320,7 +274,7 @@ def job(count,key): ...@@ -320,7 +274,7 @@ def job(count,key):
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}')
return count return json_search,ret
elif ret == 200003: elif ret == 200003:
# 无效的session # 无效的session
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -336,7 +290,7 @@ def job(count,key): ...@@ -336,7 +290,7 @@ def job(count,key):
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'公众号----{origin}----耗时{baseCore.getTimeCost(start_, time.time())}')
return count return json_search,ret
else: else:
log.info(f'----其他情况-----{json_search}---公众号{origin}------') log.info(f'----其他情况-----{json_search}---公众号{origin}------')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
...@@ -351,7 +305,65 @@ def job(count,key): ...@@ -351,7 +305,65 @@ def job(count,key):
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)" insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error)) cursor_.execute(insertSql, tuple(error))
cnx_.commit() cnx_.commit()
return json_search,ret
def job(count,key):
# 刷新浏览器并获取当前token和cookie
token, cookies = flushAndGetToken(list_b)
log.info('===========获取公众号============')
start_ = time.time()
#todo:redis中数据 pop一条
infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
if infoSourceCode == 'None' or infoSourceCode == None:
#当一次采集完之后,重新插入数据并等待插入完成
getFromSql()
time.sleep(20)
log.info(f'========本次公众号已采集完毕,共采集{count}个公众号=========总耗时:{baseCore.getTimeCost(start_,time.time())}')
return count
sql = f"SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = '{infoSourceCode}' "
# '一带一路百人论坛'
# sql = f"-- SELECT site_uri,id,site_name,info_source_code from info_source where info_source_code = 'IN-20220609-57436' "
cursor.execute(sql)
row = cursor.fetchone()
dic_url = {
'url_': row[0],
'sid': row[1],
'name': row[2],
'info_source_code': row[3],
'biz': ''
}
log.info('===========获取biz==========')
s.cookies.update(cookies)
s.keep_alive = False
url_ = dic_url['url_']
origin = dic_url['name']
info_source_code = dic_url['info_source_code']
sid = dic_url['sid']
try:
biz = url_.split('__biz=')[1].split('==&')[0].split('=')[0]
dic_url['biz'] = biz
except Exception as e:
log.info(f'---公众号--{origin}---biz错误')
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
error = [
origin,
url_,
info_source_code,
e,
'biz错误',
time_now
]
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(error))
cnx_.commit()
return count return count
json_search,ret = RequestUrl(dic_url,token,key)
if ret == 0:
try: try:
Max_data = int(json_search['app_msg_cnt']) Max_data = int(json_search['app_msg_cnt'])
Max_page = int(int(json_search['app_msg_cnt']) / 5) Max_page = int(int(json_search['app_msg_cnt']) / 5)
...@@ -364,22 +376,13 @@ def job(count,key): ...@@ -364,22 +376,13 @@ def job(count,key):
Max_data = 5 Max_data = 5
log.info(f'开始采集{origin}-----共{Max_page}页---{Max_data}条数据-----') log.info(f'开始采集{origin}-----共{Max_page}页---{Max_data}条数据-----')
for i in range(0, Max_data, 5): for i in range(0, Max_data, 5):
url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin={i}&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1' json_search,ret = RequestUrl(dic_url,token,key)
if ret == 0:
# url_search = f'https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid={fakeid}&type=9&query=&token={token}&lang=zh_CN&f=json&ajax=1' pass
# https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid=MzAwNDA5Njc1Mg==&type=9&query=&token=550883192&lang=zh_CN&f=json&ajax=1 else:
try:
# ip = get_proxy()[random.randint(0, 3)]
json_search = s.get(url_search, headers=headers,
verify=False).json() # , proxies=ip, verify=False
str_t = json.dumps(json_search)
time.sleep(2)
except Exception as e:
log.error(f'===公众号{origin}请求失败!当前时间:{baseCore.getNowTime(1)}======={e}===')
rePutIntoR(info_source_code)
return count return count
if json_search != '':
list_all = json_search['app_msg_list'] # list_all = json_search['app_msg_list']
try: try:
#开始采集每一页文章信息 #开始采集每一页文章信息
page = int(i/5+1) page = int(i/5+1)
...@@ -422,9 +425,12 @@ def job(count,key): ...@@ -422,9 +425,12 @@ def job(count,key):
insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)" insertSql = f"insert into WeixinGZH (site_name,site_url,info_source_code,json_error_info,error_type,create_time) values (%s,%s,%s,%s,%s,%s)"
cursor_.execute(insertSql, tuple(false)) cursor_.execute(insertSql, tuple(false))
cnx_.commit() cnx_.commit()
log.info(f'{fakeid}、公众号:{origin}采集失败!!!!!!耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'{biz}、公众号:{origin}采集失败!!!!!!耗时{baseCore.getTimeCost(start_, time.time())}')
count += 1 count += 1
log.info(f'{fakeid}、公众号{origin}:采集成功!、已采集{count}个公众号、耗时{baseCore.getTimeCost(start_, time.time())}') log.info(f'{biz}、公众号{origin}:采集成功!、已采集{count}个公众号、耗时{baseCore.getTimeCost(start_, time.time())}')
return count
else:
return count
time.sleep(2) time.sleep(2)
return count return count
......
import json import json
...@@ -91,6 +91,7 @@ def getInfo(name,enname,gpdm, xydm, start): ...@@ -91,6 +91,7 @@ def getInfo(name,enname,gpdm, xydm, start):
state = 1 state = 1
soup = BeautifulSoup(response.content, 'html.parser') soup = BeautifulSoup(response.content, 'html.parser')
page = soup.find('div', {'id': 'Col1-0-Profile-Proxy'}) page = soup.find('div', {'id': 'Col1-0-Profile-Proxy'})
name = page.find('h3',{'class':'Fz(m) Mb(10px)'}).text
try: try:
com_info = page.find('div', {'class': 'Mb(25px)'}) com_info = page.find('div', {'class': 'Mb(25px)'})
except: except:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论