提交 19e70d94 作者: 薛凌堃

9/28

上级 af03b018
......@@ -449,6 +449,21 @@ def omeng():
# r.rpush('gnOMEnterprise_socialcode:Notice', item)
closeSql(cnx, cursor)
#单项冠军
def danxiangguanjun():
pass
#科改示范
def kegaishifan():
pass
#双百企业
def shuangbaiqiye():
pass
#专精特新
def zhuangjingtexind():
pass
if __name__ == "__main__":
start = time.time()
......
......@@ -46,8 +46,8 @@ def doJob():
if tycid == None or tycid == '':
try:
retData = getTycIdByXYDM(xydm)
if retData:
tycid = retData['id']
if retData['state']:
tycid = retData['tycData']['id']
# todo:写入数据库
updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
cursor_.execute(updateSql)
......
......@@ -101,8 +101,8 @@ def spider(com_name,cik,up_okCount):
#解析页面
for nnn in range(0,4):
try:
req = requests.get(url=url_json,headers=header,proxies=ip_dic,verify=False,timeout=30)
# req = requests.get(url=url_json, headers=header, verify=False, timeout=30)
# req = requests.get(url=url_json,headers=header,proxies=ip_dic,verify=False,timeout=30)
req = requests.get(url=url_json, headers=header, verify=False, timeout=30)
break
except:
time.sleep(2)
......
......@@ -41,6 +41,14 @@ type_map = {
'9605':'公司公告',
'9533':'公司公告',
}
type_id_map = {
'公司公告': '8',
'股转公告': '9',
'挂牌审核': '10',
'自律监管措施': '11',
'问询函': '12',
'纪律处分': '13'
}
def secrchATT(item_id, name, type_id):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s '''
......@@ -157,7 +165,7 @@ def InsterInto(short_name, social_code, pdf_url):
return insert
def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_name,num):
def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_name,num,kfkid):
#上传至文件服务器
retData = baseCore.upLoadToServe(pdf_url,8,social_code)
#附件插入att数据库
......@@ -192,7 +200,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_na
'sourceAddress': pdf_url, # 原文链接
'summary': '',
'title': pdf_name,
'type': 3,
'type': kfkid,
'socialCreditCode': social_code,
'year': year
}
......@@ -241,6 +249,7 @@ def SpiderByZJH(url, dic_info, start_time,num): # dic_info 数据库中获取
pdf_url = 'https://www.neeq.com.cn' + rp['destFilePath']
name_pdf = rp['disclosureTitle']
rp_type = type_map[rp['disclosureType']]
kfkid = type_id_map[rp_type]
publishDate = rp['publishDate']
year = publishDate[:4]
# 数据入库
......@@ -250,7 +259,7 @@ def SpiderByZJH(url, dic_info, start_time,num): # dic_info 数据库中获取
# okCount = okCount + 1
# 解析PDF内容,先获取PDF链接 下载 解析成功,解析失败 ,传输成功,传输失败
log.info(f'======={short_name}===========插入公告库成功')
result = GetContent(pdf_url, name_pdf, social_code, year, publishDate, start_time, com_name, num)
result = GetContent(pdf_url, name_pdf, social_code, year, publishDate, start_time, com_name, num,kfkid)
if result:
# 公告信息列表
......@@ -300,17 +309,18 @@ if __name__ == '__main__':
while True:
start_time = time.time()
# # 获取企业信息
# # social_code = baseCore.redicPullData('NoticeEnterpriseFbs:gnqy_socialCode')
social_code = '9110000071092841XX'
com_code = '430045'
short_name = '超毅网络'
social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
# social_code = '9110000071092841XX'
# com_code = '430045'
# short_name = '超毅网络'
dic_info = {}
# # 判断 如果Redis中已经没有数据,则等待
# if social_code == None:
# time.sleep(20)
# continue
# dic_info = baseCore.getInfomation(social_code)
# count = dic_info[16]
if social_code == None:
time.sleep(20)
continue
data = baseCore.getInfomation(social_code)
com_code = data[3]
short_name = data[4]
url = 'https://www.neeq.com.cn/disclosureInfoController/productInfoResult.do'
#翻页 page 0~ 25 totalPages
......
......@@ -56,7 +56,7 @@ if __name__=="__main__":
url = "https://mp.weixin.qq.com/"
browser.get(url)
# 可改动
time.sleep(60)
time.sleep(20)
s = requests.session()
#获取到token和cookies
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论