提交 21aafa31 作者: XveLingKun

2024-10-11

上级 c20792ca
......@@ -164,7 +164,7 @@ def NoticeDF():
continue
# 获取全部美股企业
mg_query = "select * from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
mg_query = "select social_credit_code from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
cursor_.execute(mg_query)
cnx_.commit()
mg_result = cursor_.fetchall()
......
......@@ -63,8 +63,8 @@ def get_html(tycid, driver, headers):
@retry(tries=5, delay=2)
def get_page(url, s, headers):
ip = baseCore.get_proxy()
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
# res = s.get(url=url, headers=headers, verify=False)
# res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res = s.get(url=url, headers=headers, verify=False)
if res.status_code != 200:
raise
data_page = res.json()
......@@ -141,9 +141,9 @@ def doJob():
else:
continue
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
# item = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
item = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
# 判断 如果Redis中已经没有数据,则等待
item = '9133000070471161XA'
# item = '9133000070471161XA'
if item == None:
time.sleep(30 * 60)
continue
......@@ -499,10 +499,10 @@ def doJob():
continue
else:
pass
response = requests.post('http://114.115.236.206:8088/sync/executive', data=json_updata, timeout=300,
verify=False)
response_ = requests.post('http://114.116.116.241:9098/userserver/sync/executive', data=json_updata, timeout=300,
response = requests.post('http://1.95.72.34:8088/sync/executive', data=json_updata, timeout=300,
verify=False)
response_ = requests.post('http://114.116.116.241:9098/userserver/sync/executive', data=json_updata,
timeout=300, verify=False)
print(response.text)
print(response_.text)
log.info('=========成功======')
......
......@@ -64,8 +64,8 @@ def get_html(tycid, driver, headers):
@retry(tries=5, delay=3)
def get_page(url, s, headers):
ip = baseCore.get_proxy()
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
# res = s.get(url=url, headers=headers, verify=False)
# res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res = s.get(url=url, headers=headers, verify=False)
if res.status_code != 200:
raise
data_page = res.json()
......
......@@ -16,7 +16,7 @@ from base import BaseCore
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN[
db_storage = pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN[
'国外智库']
@retry(tries=2, delay=5)
......@@ -105,7 +105,8 @@ def doJob():
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Cookie': 'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72%2C123.149.3.159%2C1%2C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439',
# 'Cookie': 'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72%2C123.149.3.159%2C1%2C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439',
'Cookie': '__cf_bm=d9hIc2bALTgTBZ64CyxHwuWXuAZmsBuh5CakctSWeP0-1728549696-1.0.1.1-6XP3FXhlXvLTp0Bgcnhh00_7UcjUmV9KlVd6Zr5jbUVcZiwH4qM9suuA_1f181EMaZ2drTFJVLBGwS27V98VGg; JSESSIONID=MiizoFg43W81UlgdhIa0nXXsTwxJoYreIo6ZAgc1.ip-10-240-5-72; _gcl_au=1.1.472576737.1728549714; _ga_F5XZ540Q4V=GS1.1.1728549714.1.0.1728549714.60.0.0; _gid=GA1.2.2100045182.1728549715; _ga_F7KSNTXTRX=GS1.1.1728549714.1.0.1728549714.0.0.0; _ga=GA1.1.507449157.1728549715; cb-enabled=enabled; AWSALB=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; AWSALBCORS=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; custom_cookie_AB=2; cf_clearance=q8LbD2iMK8HvCfC8EC0ZxocKplGL_1yI7mPEDZx5FM0-1728549715-1.2.1.1-n7xc_Cop0OGUMyp2wTnL.YPlw71QrDsks_alvUMeYQXTShaMFiSIcJyDLbrGj.Bb_TbpRMaSE999joCsuSDRXsCPmIuEFZGhR3qaJt5a1EKeWRLQbf4IcRKLCwQTj3O3eITWLIPJmisWYQxNp0Rm6gLNDEM6zOvFkhkuwfaJP7Taj9JQ4eeAajhmhiGpijEiE3PCc499D1f_PAP09Y8uMRqj_YZGD99IBxoG1gBlRylFlUa9riPqWs7vaD6IFFwFEyeuc3XzC3Nl0TiYDj9sBiG8Us9jeZgWwXm2J7E.MJQKeQBM1utSKAEsVW3Lhs8emraPJYaDLzSW7MtMZ8C1m93z0dRSOtjEtzL7Cz5IL7wm3qSY2MqAug.Y2f3eXKb23Oxe6ebcO1vJRK5YHYyQdOV1by5J0t5oKQ5iRrVq1XBUiYYMX.e8lWy6rD1WO.qP',
'Referer': 'https://www.oecd-ilibrary.org/economics/oecd-policy-responses-on-the-impacts-of-the-war-in-ukraine_dc825602-en?page=2',
'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'Sec-Ch-Ua-Mobile': '?0',
......
import os
import os
......@@ -399,7 +399,7 @@ if __name__ =='__main__':
start_time = time.time()
# 获取企业信息
# social_code = baseCore.redicPullData('NoticeEnterprise:mgqy_socialCode_add')
social_code = 'ZZSN22080900000046'
social_code = 'ZD0CN0012309068194'
if not social_code:
time.sleep(20)
continue
......@@ -417,8 +417,8 @@ if __name__ =='__main__':
else:
# log.info(f'数据库中无该企业{social_code}')
sql = f"SELECT * FROM sys_base_enterprise_ipo WHERE social_credit_code = '{social_code}' and category=7 and securities_code is not null and priority=1"
cursor.execute(sql)
data = cursor.fetchall()
cursor_.execute(sql)
data = cursor_.fetchone()
if data:
pass
else:
......@@ -430,17 +430,21 @@ if __name__ =='__main__':
Category = data[6]
Exchange = data[7]
sql_baseinfo = f"SELECT * FROM sys_base_enterprise WHERE social_credit_code = '{social_code}'"
cursor.execute(sql_baseinfo)
data_baseinfo = cursor.fetchone()
cursor_.execute(sql_baseinfo)
data_baseinfo = cursor_.fetchone()
if data_baseinfo:
pass
CompanyName = data_baseinfo[3]
EnglishName = data_baseinfo[34]
countryName = data_baseinfo[39]
if CompanyName:
pass
else:
continue
# 写入数据库
insert = "INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (%s, %s)"
cursor_.execute(insert, (CompanyName, social_code, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName))
cnx_.commit()
insert = "INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(insert, (CompanyName, social_code, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName))
cnx.commit()
com_name = CompanyName
code = SecuritiesCode
......
......@@ -24,7 +24,7 @@ class ClassTool():
def __init__(self):
self.taskType = '政策法规'
self.db_storage =pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN[
self.db_storage =pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN[
'国务院_国资委_copy1']
self.driver_path = r'D:\cmd100\chromedriver.exe'
......@@ -93,7 +93,7 @@ class ClassTool():
def sendKafka(self, dic_news):
try: # 114.116.116.241
producer = KafkaProducer(bootstrap_servers=['1.95.3.121:9092'], max_request_size=1024 * 1024 * 20)
producer = KafkaProducer(bootstrap_servers=['1.95.78.131:9092'], max_request_size=1024 * 1024 * 20, api_version=(2, 5, 0))
kafka_result = producer.send("policy",
json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
......
......@@ -30,8 +30,8 @@ def getJson(url, headers, s):
ip = baseCore.get_proxy()
log.info(f'当前使用的ip是{ip}')
# req = requests.get(url, headers=headers, timeout=20)
req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
# req = s.get(url, headers=headers, timeout=(5, 10))
# req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
req = s.get(url, headers=headers, timeout=(5, 10))
dataJson = req.json()
if dataJson['errorCode'] != 0:
raise
......@@ -116,7 +116,7 @@ def doJob():
dics.append(dic)
log.info(f'{socialCreditCode}==={tycId}===共采集{len(dics)}条记录')
if dics:
req = sendData('http://114.115.236.206:8088/sync/branch', dics)
req = sendData('http://1.95.72.34:8088/sync/branch', dics)
log.info(f'{socialCreditCode}==={req.text}')
takeTime = baseCore.getTimeCost(start, time.time())
log.info(f'{socialCreditCode}==={req.text}===耗时{takeTime}')
......
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
......@@ -12,7 +12,7 @@ from kafka import KafkaProducer
from requests.packages import urllib3
from datetime import datetime, timedelta
urllib3.disable_warnings()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN['人民网-习讲话数据库_copy']
db_storage = pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN['人民网-习讲话数据库_copy']
def newsdata(art_content_dict,art_type_dict,dic_lables):
for key, value in art_content_dict.items():
......@@ -61,7 +61,7 @@ def newsdata(art_content_dict,art_type_dict,dic_lables):
del post_dict['tags']
del post_dict['title_pd']
# 发送kafka
producer = KafkaProducer(bootstrap_servers=['1.95.3.121:9092'], max_request_size=1024 * 1024 * 20)
producer = KafkaProducer(bootstrap_servers=['1.95.78.131:9092'], max_request_size=1024 * 1024 * 20, api_version=(2, 5, 0))
kafka_result = producer.send("research_center_fourth",
json.dumps(post_dict, ensure_ascii=False).encode('utf8'))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论