提交 07decca3 作者: XveLingKun

美国证券交易委员会年报日期修改

上级 78929e4e
...@@ -96,15 +96,15 @@ def spider(com_name,cik,up_okCount): ...@@ -96,15 +96,15 @@ def spider(com_name,cik,up_okCount):
} }
ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'} ip_dic = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
#正式 #正式
url_json = f'https://data.sec.gov/submissions/CIK{cik}.json' # url_json = f'https://data.sec.gov/submissions/CIK{cik}.json'
#测试 #测试
# url_json = 'https://data.sec.gov/submissions/CIK0000104169.json' url_json = 'https://data.sec.gov/submissions/CIK0000104169.json'
#解析页面 #解析页面
for nnn in range(0,4): for nnn in range(0,4):
try: try:
# req = requests.get(url=url_json,headers=header,proxies=ip_dic,verify=False,timeout=30) req = requests.get(url=url_json,headers=header,proxies=ip_dic,verify=False,timeout=30)
req = requests.get(url=url_json, headers=header, verify=False, timeout=30) # req = requests.get(url=url_json, headers=header, verify=False, timeout=30)
break break
except Exception as e: except Exception as e:
time.sleep(2) time.sleep(2)
...@@ -118,13 +118,15 @@ def spider(com_name,cik,up_okCount): ...@@ -118,13 +118,15 @@ def spider(com_name,cik,up_okCount):
form_type_list = info['form'] form_type_list = info['form']
accessionNumber_list = info['accessionNumber'] accessionNumber_list = info['accessionNumber']
primaryDocument_list = info['primaryDocument'] primaryDocument_list = info['primaryDocument']
filingDate_list = info['filingDate'] # filingDate_list = info['filingDate']
# todo:修改日期采集
filingDate_list = info['reportDate']
i = 0 i = 0
for form in form_type_list: for form in form_type_list:
i += 1 i += 1
if form == '10-K' or form == '20-F': if form == '10-K' or form == '20-F':
log.info(form,i-1) log.info(form, i-1)
accessionNumber = accessionNumber_list[i-1] accessionNumber = accessionNumber_list[i-1]
#发布日期 #发布日期
filingDate = filingDate_list[i-1] filingDate = filingDate_list[i-1]
...@@ -140,7 +142,7 @@ def spider(com_name,cik,up_okCount): ...@@ -140,7 +142,7 @@ def spider(com_name,cik,up_okCount):
u_1 = cik u_1 = cik
# u_1 = '1395064' # u_1 = '1395064'
u_2 = accessionNumber.replace('-','') u_2 = accessionNumber.replace('-', '')
u_3 = primaryDocument_list[i-1] u_3 = primaryDocument_list[i-1]
news_url = 'https://www.sec.gov/Archives/edgar/data/' + u_1 + '/' + u_2 + '/' + u_3 news_url = 'https://www.sec.gov/Archives/edgar/data/' + u_1 + '/' + u_2 + '/' + u_3
try: try:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论