提交 6500f5f7 作者: 薛凌堃

10/27

上级 443a395e
...@@ -152,7 +152,8 @@ def BaseInfoEnterprise_task(): ...@@ -152,7 +152,8 @@ def BaseInfoEnterprise_task():
#企业核心人员 #企业核心人员
def CorPerson(): def CorPerson():
cnx, cursor = connectSql() cnx, cursor = connectSql()
gn_query = "select SocialCode from EnterpriseInfo where Place = '1'" # gn_query = "select SocialCode from EnterpriseInfo where Place = '1'"
gn_query = "SELECT a.SocialCode From EnterpriseInfo a ,EnterpriseType b WHERE a.SocialCode = b.SocialCode AND b.type=8 AND a.Place=1"
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
cnx.commit() cnx.commit()
...@@ -533,6 +534,27 @@ def dujs_1020(): ...@@ -533,6 +534,27 @@ def dujs_1020():
# r.rpush('dujs_1020:news_socialcode', item) # r.rpush('dujs_1020:news_socialcode', item)
# r.rpush('dujs_1020:person_socialcode', item) # r.rpush('dujs_1020:person_socialcode', item)
def china100():
cnx, cursor = connectSql()
query = "SELECT SocialCode FROM China100"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('china100:baseinfo', item)
# r.rpush('dujs_1020:news_socialcode', item)
# r.rpush('dujs_1020:person_socialcode', item)
def global100():
cnx, cursor = connectSql()
query = "SELECT SocialCode FROM Global100"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('global100:baseinfo', item)
if __name__ == "__main__": if __name__ == "__main__":
start = time.time() start = time.time()
...@@ -546,7 +568,10 @@ if __name__ == "__main__": ...@@ -546,7 +568,10 @@ if __name__ == "__main__":
# AnnualEnterprise() # AnnualEnterprise()
# BaseInfoEnterpriseAbroad() # BaseInfoEnterpriseAbroad()
# NewsEnterprise_task() # NewsEnterprise_task()
NewsEnterprise() # NewsEnterprise()
# CorPerson()
# china100()
global100()
# AnnualEnterpriseXueQ() # AnnualEnterpriseXueQ()
# dujs_1020() # dujs_1020()
# dujioashou() # dujioashou()
......
...@@ -58,7 +58,7 @@ if __name__ == '__main__': ...@@ -58,7 +58,7 @@ if __name__ == '__main__':
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
} }
query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会'" query = "SELECT * FROM clb_sys_attachment WHERE type_id=1 AND source='证监会' AND id = '18703822757'"
cursor_.execute(query) cursor_.execute(query)
results = cursor_.fetchall() results = cursor_.fetchall()
for result in results: for result in results:
......
...@@ -29,7 +29,7 @@ type_id = 1 ...@@ -29,7 +29,7 @@ type_id = 1
create_by = 'XueLingKun' create_by = 'XueLingKun'
taskType = '企业年报' taskType = '企业年报'
#付俊雪的需要改为巨潮资讯网1_福布斯2000_PDF_60_付 #付俊雪的需要改为巨潮资讯网1_福布斯2000_PDF_60_付
file_path = 'D:\\年报\\福布斯2000强_年报补充_20231018' file_path = 'D:\\年报\\失败'
log.info(f'=============当前pid为{baseCore.getPID()}==============') log.info(f'=============当前pid为{baseCore.getPID()}==============')
def sendKafka(dic_news): def sendKafka(dic_news):
......
...@@ -28,6 +28,8 @@ log = baseCore.getLogger() ...@@ -28,6 +28,8 @@ log = baseCore.getLogger()
cnx = baseCore.cnx cnx = baseCore.cnx
cursor = baseCore.cursor cursor = baseCore.cursor
r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=0) r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=0)
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
taskType = '企业负面新闻' taskType = '企业负面新闻'
def sendKafka(dic_news): def sendKafka(dic_news):
start_time = time.time() start_time = time.time()
...@@ -93,7 +95,7 @@ def dishonesty(headers,com_name,social_code): ...@@ -93,7 +95,7 @@ def dishonesty(headers,com_name,social_code):
pass pass
else: else:
log.info(f'该企业{com_name}无严重失信信息') log.info(f'该企业{com_name}无严重失信信息')
return list_dishonesty return url,list_dishonesty
for page in range(1,total_size+1): for page in range(1,total_size+1):
param_page = { param_page = {
'tableName': 'credit_zgf_fr_sxbzxr', 'tableName': 'credit_zgf_fr_sxbzxr',
...@@ -172,7 +174,7 @@ def punish(headers,com_name,social_code): ...@@ -172,7 +174,7 @@ def punish(headers,com_name,social_code):
pass pass
else: else:
log.info(f'该企业{com_name}无行政处罚信息') log.info(f'该企业{com_name}无行政处罚信息')
return list_punish return url,list_punish
for page in range(1,total_size+1): for page in range(1,total_size+1):
param_page = { param_page = {
'tableName': 'credit_xyzx_fr_xzcf_new', 'tableName': 'credit_xyzx_fr_xzcf_new',
...@@ -250,7 +252,7 @@ def abnormal(headers,com_name,social_code): ...@@ -250,7 +252,7 @@ def abnormal(headers,com_name,social_code):
pass pass
else: else:
log.info(f'该企业{com_name}无经营异常信息') log.info(f'该企业{com_name}无经营异常信息')
return list_abhormal return url,list_abhormal
for page in range(1, total_size+1): for page in range(1, total_size+1):
param_page = { param_page = {
'tableName': 'credit_xyzx_fr_xzcf_new', 'tableName': 'credit_xyzx_fr_xzcf_new',
...@@ -293,7 +295,7 @@ def abnormal(headers,com_name,social_code): ...@@ -293,7 +295,7 @@ def abnormal(headers,com_name,social_code):
def dic_data(com_name,listData,type,detailurl): def dic_data(com_name,listData,type,detailurl):
dic_news = { dic_news = {
'title':com_name + type, 'title':com_name + type,
'structuredData':listData, 'structuredData':listData[:1],
'ynStructure':1, 'ynStructure':1,
'content': '', 'content': '',
'contentHtml': '', 'contentHtml': '',
...@@ -303,6 +305,11 @@ def dic_data(com_name,listData,type,detailurl): ...@@ -303,6 +305,11 @@ def dic_data(com_name,listData,type,detailurl):
} }
return dic_news return dic_news
def insertinto(dic_):
sql = "INSERT INTO zhejiangfmnews (title, structuredData, source) VALUES (%s, %s, %s)"
cursor.execute(sql, (dic_["title"], str(dic_["structuredData"]), dic_["source"]))
cnx.commit()
if __name__=='__main__': if __name__=='__main__':
headers = { headers = {
...@@ -315,21 +322,39 @@ if __name__=='__main__': ...@@ -315,21 +322,39 @@ if __name__=='__main__':
'sec-ch-ua-mobile': '?0', 'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"' 'sec-ch-ua-platform': '"Windows"'
} }
com_name = '石家庄交投集团工程服务有限责任公司' query = "select * from zhejiang "
social_code = '91130100MA7EK14C8L' cursor.execute(query)
url_dishonesty,list_dishonesty = dishonesty(headers,com_name,social_code) results = cursor.fetchall()
dic_dishonesty = dic_data(com_name,list_dishonesty,'严重违法失信信息',url_dishonesty) for result in results:
sendKafka(dic_dishonesty) com_name = result[1]
log.info(f'------------正在采集{com_name}----------------')
social_code = result[0]
try:
url_dishonesty,list_dishonesty = dishonesty(headers,com_name,social_code)
except:
list_dishonesty = []
log.info(f'error-------{com_name}')
continue
if list_dishonesty:
dic_dishonesty = dic_data(com_name,list_dishonesty,'严重违法失信信息',url_dishonesty)
# sendKafka(dic_dishonesty)
insertinto(dic_dishonesty)
log.info(f'----{com_name}---新增')
url_punish,list_punish = punish(headers,com_name,social_code) url_punish,list_punish = punish(headers,com_name,social_code)
dic_punish = dic_data(com_name, list_punish, '行政处罚信息', url_punish) if list_punish:
# print(dic_punish) dic_punish = dic_data(com_name, list_punish, '行政处罚信息', url_punish)
sendKafka(dic_punish) insertinto(dic_punish)
log.info(f'----{com_name}---新增')
# sendKafka(dic_punish)
url_abnormal,list_abnormal = abnormal(headers,com_name,social_code) url_abnormal,list_abnormal = abnormal(headers,com_name,social_code)
dic_abnormal = dic_data(com_name, list_abnormal, '经营异常信息', url_abnormal) if list_abnormal:
# print(dic_abnormal) dic_abnormal = dic_data(com_name, list_abnormal, '经营异常信息', url_abnormal)
sendKafka(dic_abnormal) insertinto(dic_abnormal)
log.info(f'----{com_name}---新增')
# sendKafka(dic_abnormal)
# 报告链接 # 报告链接
# url_report = f'https://public.creditchina.gov.cn/credit-check/pdf/clickDownload?companyName={com_name}&entityType=1&uuid=&tyshxydm={social_code}' # url_report = f'https://public.creditchina.gov.cn/credit-check/pdf/clickDownload?companyName={com_name}&entityType=1&uuid=&tyshxydm={social_code}'
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论