提交 6500f5f7 作者: 薛凌堃

10/27

上级 443a395e
......@@ -152,7 +152,8 @@ def BaseInfoEnterprise_task():
#企业核心人员
def CorPerson():
cnx, cursor = connectSql()
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
# gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=8 AND a.Place=1"
cursor.execute(gn_query)
gn_result = cursor.fetchall()
cnx.commit()
......@@ -533,6 +534,27 @@ def dujs_1020():
# r.rpush('dujs_1020:news_socialcode', item)
# r.rpush('dujs_1020:person_socialcode', item)
def china100():
cnx, cursor = connectSql()
query = "SELECT SocialCode FROM China100"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('china100:baseinfo', item)
# r.rpush('dujs_1020:news_socialcode', item)
# r.rpush('dujs_1020:person_socialcode', item)
def global100():
cnx, cursor = connectSql()
query = "SELECT SocialCode FROM Global100"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('global100:baseinfo', item)
if __name__ == "__main__":
start = time.time()
......@@ -546,7 +568,10 @@ if __name__ == "__main__":
# AnnualEnterprise()
# BaseInfoEnterpriseAbroad()
# NewsEnterprise_task()
NewsEnterprise()
# NewsEnterprise()
# CorPerson()
# china100()
global100()
# AnnualEnterpriseXueQ()
# dujs_1020()
# dujioashou()
......
......@@ -58,7 +58,7 @@ if __name__ == '__main__':
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
}
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会'"
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会' AND id = '18703822757'"
cursor_.execute(query)
results = cursor_.fetchall()
for result in results:
......
......@@ -29,7 +29,7 @@ type_id = 1
create_by = 'XueLingKun'
taskType = '企业年报'
#付俊雪的需要改为巨潮资讯网1_福布斯2000_PDF_60_付
file_path = 'D:\\年报\\福布斯2000强_年报补充_20231018'
file_path = 'D:\\年报\\失败'
log.info(f'=============当前pid为{baseCore.getPID()}==============')
def sendKafka(dic_news):
......
......@@ -28,6 +28,8 @@ log = baseCore.getLogger()
cnx = baseCore.cnx
cursor = baseCore.cursor
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=0)
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
taskType = '企业负面新闻'
def sendKafka(dic_news):
start_time = time.time()
......@@ -93,7 +95,7 @@ def dishonesty(headers,com_name,social_code):
pass
else:
log.info(f'该企业{com_name}无严重失信信息')
return list_dishonesty
return url,list_dishonesty
for page in range(1,total_size+1):
param_page = {
'tableName': 'credit_zgf_fr_sxbzxr',
......@@ -172,7 +174,7 @@ def punish(headers,com_name,social_code):
pass
else:
log.info(f'该企业{com_name}无行政处罚信息')
return list_punish
return url,list_punish
for page in range(1,total_size+1):
param_page = {
'tableName': 'credit_xyzx_fr_xzcf_new',
......@@ -250,7 +252,7 @@ def abnormal(headers,com_name,social_code):
pass
else:
log.info(f'该企业{com_name}无经营异常信息')
return list_abhormal
return url,list_abhormal
for page in range(1, total_size+1):
param_page = {
'tableName': 'credit_xyzx_fr_xzcf_new',
......@@ -293,7 +295,7 @@ def abnormal(headers,com_name,social_code):
def dic_data(com_name,listData,type,detailurl):
dic_news = {
'title':com_name + type,
'structuredData':listData,
'structuredData':listData[:1],
'ynStructure':1,
'content': '',
'contentHtml': '',
......@@ -303,6 +305,11 @@ def dic_data(com_name,listData,type,detailurl):
}
return dic_news
def insertinto(dic_):
sql = "INSERT INTO zhejiangfmnews (title, structuredData, source) VALUES (%s, %s, %s)"
cursor.execute(sql, (dic_["title"], str(dic_["structuredData"]), dic_["source"]))
cnx.commit()
if __name__=='__main__':
headers = {
......@@ -315,21 +322,39 @@ if __name__=='__main__':
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
com_name = '石家庄交投集团工程服务有限责任公司'
social_code = '91130100MA7EK14C8L'
url_dishonesty,list_dishonesty = dishonesty(headers,com_name,social_code)
dic_dishonesty = dic_data(com_name,list_dishonesty,'严重违法失信信息',url_dishonesty)
sendKafka(dic_dishonesty)
query = "select * from zhejiang "
cursor.execute(query)
results = cursor.fetchall()
for result in results:
com_name = result[1]
log.info(f'------------正在采集{com_name}----------------')
social_code = result[0]
try:
url_dishonesty,list_dishonesty = dishonesty(headers,com_name,social_code)
except:
list_dishonesty = []
log.info(f'error-------{com_name}')
continue
if list_dishonesty:
dic_dishonesty = dic_data(com_name,list_dishonesty,'严重违法失信信息',url_dishonesty)
# sendKafka(dic_dishonesty)
insertinto(dic_dishonesty)
log.info(f'----{com_name}---新增')
url_punish,list_punish = punish(headers,com_name,social_code)
dic_punish = dic_data(com_name, list_punish, '行政处罚信息', url_punish)
# print(dic_punish)
sendKafka(dic_punish)
url_punish,list_punish = punish(headers,com_name,social_code)
if list_punish:
dic_punish = dic_data(com_name, list_punish, '行政处罚信息', url_punish)
insertinto(dic_punish)
log.info(f'----{com_name}---新增')
# sendKafka(dic_punish)
url_abnormal,list_abnormal = abnormal(headers,com_name,social_code)
dic_abnormal = dic_data(com_name, list_abnormal, '经营异常信息', url_abnormal)
# print(dic_abnormal)
sendKafka(dic_abnormal)
url_abnormal,list_abnormal = abnormal(headers,com_name,social_code)
if list_abnormal:
dic_abnormal = dic_data(com_name, list_abnormal, '经营异常信息', url_abnormal)
insertinto(dic_abnormal)
log.info(f'----{com_name}---新增')
# sendKafka(dic_abnormal)
# 报告链接
# url_report = f'https://public.creditchina.gov.cn/credit-check/pdf/clickDownload?companyName={com_name}&entityType=1&uuid=&tyshxydm={social_code}'
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论