提交 22b065ed 作者: LiuLiYuan

纳斯达克财务数据 10/07

上级 afe226ba
......@@ -11,7 +11,6 @@ from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages import urllib3
from retry import retry
from base import BaseCore
urllib3.disable_warnings()
......@@ -20,6 +19,7 @@ log = baseCore.getLogger()
cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
charset='utf8mb4')
cursor = cnx.cursor()
r = baseCore.r
URL = 'https://www.nasdaq.com/'
session = requests.session()
session.mount('https://', HTTPAdapter(pool_connections=20, pool_maxsize=100))
......@@ -65,6 +65,7 @@ def add_date(com_code, date_list):
# 数据发送端口
def sendData(start_time, social_code, gpdm, dic_info):
data = json.dumps(dic_info)
# print(data)
url_baocun = 'http://114.115.236.206:8088/sync/finance/nsdk'
for nnn in range(0, 3):
try:
......@@ -86,7 +87,7 @@ def getUnit(gpdm):
req.encoding = req.apparent_encoding
soup = BeautifulSoup(req.text, 'lxml')
unit = soup.find('div', class_='financials__note').text.split(' ')[1].lstrip().strip()
unit = f'(千){unit}'
unit = f'{unit}(千)'
req.close()
return unit
......@@ -104,9 +105,11 @@ def getlist(table, tableName):
value = re.sub(r"[^\d+-]", "", value)
else:
value = '-'
date = years[f'value{i}'].split('/')[2] + '-' + years[f'value{i}'].split('/')[0] + '-' + \
years[f'value{i}'].split('/')[1]
list.append({f'{tableName}': name, 'value': value, 'date': date, })
date_ = years[f'value{i}']
if date_ :
date = date_.split('/')[2] + '-' + date_.split('/')[0] + '-' + \
date_.split('/')[1]
list.append({f'{tableName}': name, 'value': value, 'date': date, })
return list
......@@ -162,6 +165,7 @@ def getYear(start_time, session, social_code, gpdm):
# 判断该报告期是否已采过
panduan = check_date(social_code, date + '-year')
if panduan:
log.info(f'{social_code}=={gpdm}=={date}年度数据采集过')
continue
xjll_list_f = reviseData([item for item in final_list if 'xjll' in item], unit, 'xjll')
zcfz_list_f = reviseData([item for item in final_list if 'zcfz' in item], unit, 'zcfz')
......@@ -177,6 +181,7 @@ def getYear(start_time, session, social_code, gpdm):
"ynFirst": ynFirst,
}
sendData(start_time, social_code, gpdm, dic_info)
log.info(f'{social_code}=={gpdm}=={date}年度财务数据采集成功')
date_list.append(date + '-year')
else:
log.error(f'找不到{social_code}=={gpdm}年度财务数据')
......@@ -184,6 +189,7 @@ def getYear(start_time, session, social_code, gpdm):
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===无年度财务数据')
except:
log.error(f'{social_code}===年度财务数据访问失败')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===年度财务数据访问失败')
......@@ -217,6 +223,7 @@ def getQuarter(start_time, session, social_code, gpdm):
# 判断该报告期是否已采过
panduan = check_date(social_code, date + '-quarter')
if panduan:
log.info(f'{social_code}=={gpdm}=={date}季度数据采集过')
continue
xjll_list_f = reviseData([item for item in final_list if 'xjll' in item], unit, 'xjll')
zcfz_list_f = reviseData([item for item in final_list if 'zcfz' in item], unit, 'zcfz')
......@@ -236,13 +243,15 @@ def getQuarter(start_time, session, social_code, gpdm):
if panduan_flag:
dic_info['dateFlag'] = 'year'
sendData(start_time, social_code, gpdm, dic_info)
log.info(f'{social_code}=={gpdm}=={date}季度财务数据采集成功')
date_list.append(date + '-quarter')
else:
log.error(f'{social_code}=={gpdm}无季度财务数据')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===无季度财务数据')
except:
log.error(f'{social_code}===季度财务数据访问失败')
state = 0
takeTime = baseCore.getTimeCost(start_time, time.time())
baseCore.recordLog(social_code, taskType, state, takeTime, url, f'{social_code}===季度财务数据访问失败')
......@@ -250,36 +259,52 @@ def getQuarter(start_time, session, social_code, gpdm):
return date_list
def FinanceFromNasdaq():
sql = "select xydm from mgzqyjwyh_list where state=2 and exchange='Nasdaq';"
cursor.execute(sql)
finance = cursor.fetchall()
finance_list = [item[0] for item in finance]
for item in finance_list:
r.rpush('FinanceFromNasdaq:nasdaqfinance_socialCode', item)
print('redis放入成功')
def getInfomation(social_code):
sql = f"select * from mgzqyjwyh_list where state=2 and xydm='{social_code}';"
cursor.execute(sql)
data = cursor.fetchone()
return data
def doJob():
# while True:
# social_code = baseCore.redicPullData('')
# datas_enterprise = baseCore.getInfomation(social_code)
session.get(URL, headers=headers)
# sql = "select * from mgzqyjwyh_list where state=2 and exchange='Nasdaq';"
# cursor.execute(sql)
# datas_enterprise = cursor.fetchall()
# for data_enterprise in datas_enterprise:
start_time = time.time()
# gpdm = data_enterprise[3]
# social_code = data_enterprise[6]
social_code = 'ZD0CN0012309000172'
gpdm = 'NTES'
# 采集年度数据
date_list_year = getYear(start_time, session, social_code, gpdm)
# 保存年度数据到redis
add_date(social_code, date_list_year)
# 采集季度数据
date_list_quarter = getQuarter(start_time, session, social_code, gpdm)
# 保存季度数据到redis
add_date(social_code, date_list_quarter)
timeCost = baseCore.getTimeCost(start_time, time.time())
state = 1
baseCore.recordLog(social_code, taskType, state, timeCost, '', '')
log.info(f'{social_code}=={gpdm}==耗时{timeCost}')
# break
cursor.close()
cnx.close()
while True:
social_code = baseCore.redicPullData('FinanceFromNasdaq:nasdaqfinance_socialCode')
if not social_code or social_code == None:
log.info('============已没有数据============等待===============')
time.sleep(600)
continue
data_enterprise = getInfomation(social_code)
start_time = time.time()
gpdm = data_enterprise[3]
social_code = data_enterprise[6]
# print(gpdm,social_code)
# 采集年度数据
date_list_year = getYear(start_time, session, social_code, gpdm)
# 保存年度数据到redis
add_date(social_code, date_list_year)
# 采集季度数据
date_list_quarter = getQuarter(start_time, session, social_code, gpdm)
# 保存季度数据到redis
add_date(social_code, date_list_quarter)
timeCost = baseCore.getTimeCost(start_time, time.time())
state = 1
baseCore.recordLog(social_code, taskType, state, timeCost, '', '')
log.info(f'{social_code}=={gpdm}==耗时{timeCost}')
if __name__ == '__main__':
# 财务数据采集
doJob()
# 企业股票代码放入redis
# FinanceFromNasdaq()
cursor.close()
cnx.close()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论