提交 9c3eea0f 作者: XveLingKun

美国证监会年报

上级 5a10db80
...@@ -423,7 +423,7 @@ def SEC_CIK(): ...@@ -423,7 +423,7 @@ def SEC_CIK():
for item in cik_list: for item in cik_list:
# r.rpush('Sec_cik_US:uscik_baseinfo',item) # r.rpush('Sec_cik_US:uscik_baseinfo',item)
r.rpush('Sec_cik_US:uscik_annualReport', item) r.rpush('Sec_cik_US:uscik_annualReport', item)
closeSql(cnx,cursor) closeSql(cnx, cursor)
#福布斯=====从数据库中读取信息放入redis #福布斯=====从数据库中读取信息放入redis
......
...@@ -99,12 +99,12 @@ def spider(com_name,cik,up_okCount): ...@@ -99,12 +99,12 @@ def spider(com_name,cik,up_okCount):
} }
ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'} ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
#正式 #正式
# url_json = f'https://data.sec.gov/submissions/CIK{cik}.json' url_json = f'https://data.sec.gov/submissions/CIK{cik}.json'
#测试 #测试
url_json = 'https://data.sec.gov/submissions/CIK0000104169.json' # url_json = 'https://data.sec.gov/submissions/CIK0000104169.json'
#解析页面 #解析页面
for nnn in range(0,4): for nnn in range(0, 4):
try: try:
req = requests.get(url=url_json, headers=header, proxies=ip_dic, verify=False, timeout=30) req = requests.get(url=url_json, headers=header, proxies=ip_dic, verify=False, timeout=30)
# req = requests.get(url=url_json, headers=header, verify=False, timeout=30) # req = requests.get(url=url_json, headers=header, verify=False, timeout=30)
...@@ -115,7 +115,7 @@ def spider(com_name,cik,up_okCount): ...@@ -115,7 +115,7 @@ def spider(com_name,cik,up_okCount):
try: try:
data = req.json() data = req.json()
except: except:
baseCore.rePutIntoR('Sec_cik_US:uscik_annualReport',social_code) baseCore.rePutIntoR('Sec_cik_US:uscik_annualReport', social_code)
return return
req.close() req.close()
info = data['filings']['recent'] info = data['filings']['recent']
...@@ -138,9 +138,9 @@ def spider(com_name,cik,up_okCount): ...@@ -138,9 +138,9 @@ def spider(com_name,cik,up_okCount):
date = datetime.strptime(filingDate, '%Y-%m-%d') # 将日期字符串转换为datetime对象 date = datetime.strptime(filingDate, '%Y-%m-%d') # 将日期字符串转换为datetime对象
month = date.month # 获取月份 month = date.month # 获取月份
if month <= 6: if month < 12:
year = date.year - 1 year = date.year - 1
elif month > 6: else:
year = date.year year = date.year
# year = filingDate[:4] # year = filingDate[:4]
...@@ -246,7 +246,7 @@ def spider(com_name,cik,up_okCount): ...@@ -246,7 +246,7 @@ def spider(com_name,cik,up_okCount):
def getrequest(social_code,url,headers,data): def getrequest(social_code,url,headers,data):
ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'} ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
#通过请求post接口获取企业的CIK #通过请求post接口获取企业的CIK
response = requests.post(url=url, headers=headers, data=data ,proxies=ip_dic) response = requests.post(url=url, headers=headers, data=data, proxies=ip_dic)
response.encoding = response.apparent_encoding response.encoding = response.apparent_encoding
# 检查响应状态码 # 检查响应状态码
if response.status_code == 200: if response.status_code == 200:
...@@ -326,8 +326,8 @@ if __name__ == '__main__': ...@@ -326,8 +326,8 @@ if __name__ == '__main__':
while True: while True:
start_time = time.time() start_time = time.time()
# 获取企业信息 # 获取企业信息
# cik = baseCore.redicPullData('Sec_cik_US:uscik_annualReport') cik = baseCore.redicPullData('Sec_cik_US:uscik_annualReport')
cik = '789019' # cik = '789019'
data = fromcikgetinfo(cik) data = fromcikgetinfo(cik)
com_name = data[2] com_name = data[2]
com_code = data[3] com_code = data[3]
......
...@@ -113,11 +113,8 @@ def main(page, p, esMethod): ...@@ -113,11 +113,8 @@ def main(page, p, esMethod):
socialCode = mms['_source']['labels'][0]['relationId'] socialCode = mms['_source']['labels'][0]['relationId']
log.info(f'{id}--{title}--{origin}--{sourceAddress}---') log.info(f'{id}--{title}--{origin}--{sourceAddress}---')
if origin == 'SEC美国证券交易委员会': if origin == 'SEC美国证券交易委员会':
redis_conn.lrem('NianbaoUS:id', 0, id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode) # redis_conn.lrem('NianbaoUS:id', 0, id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode)
redis_conn.lpush('NianbaoUS:id', id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode) redis_conn.sadd('NianbaoUS:id', id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode)
else:
redis_conn.lrem(f'NianbaoOT_{origin}:id', 0, id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode)
redis_conn.lpush(f'NianbaoOT_{origin}:id', id+"|"+title+"|"+sourceAddress+"|"+year+"|"+socialCode)
def run_threads(num_threads,esMethod,j): def run_threads(num_threads,esMethod,j):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论