提交 21aafa31 作者: XveLingKun

2024-10-11

上级 c20792ca
......@@ -931,115 +931,219 @@ def test24():
# make_snapshot(snapshot, bar.render(), pic_echarts_path)
# bar.render("bar_chart.html")
import os
import asyncio
import aiofiles
from typing import List, Tuple
# 假设 get_folder_path 是一个已经定义好的函数
def get_folder_path(knowledge_base_id: str) -> str:
# 返回保存路径
return f"./20240929/{knowledge_base_id}/folder"
async def async_write_file(file_path: str, text: str) -> None:
async with aiofiles.open(file_path, "w", encoding='utf-8') as f:
await f.write(text)
async def async_read_file(file_path: str) -> str:
async with aiofiles.open(file_path, "r", encoding='utf-8') as f:
return await f.read()
async def content2txt0915(textId: str, text: str, knowledge_base_id: str) -> Tuple[str, str]:
"""
将资讯内容转换为txt 文件,且文件名以id命名
"""
# 定义文件名,文件名有title的内容组成,但由于文件名需要去除特殊字符,否则编译失败
file_name = f"{textId}.txt"
saved_path = get_folder_path(knowledge_base_id)
if not os.path.exists(saved_path):
os.makedirs(saved_path)
file_path = os.path.join(saved_path, file_name)
if os.path.exists(file_path):
try:
temp_text = await async_read_file(file_path)
if len(str(temp_text)) == len(text):
file_status = f"文件 {file_name} 已存在。"
return file_status, file_name
except Exception as e:
return f"读取文件 {file_name} 失败:{str(e)}", file_name
try:
await async_write_file(file_path, text)
return file_path, file_name
except (FileNotFoundError, OSError) as e:
return f"写入文件 {file_name} 失败:{str(e)}", file_name
# 示例使用
async def main():
tasks = [
("123456", "这是一段示例文本。"),
("789012", "这是另一段示例文本。"),
# 添加更多任务...
]
knowledge_base_id = "example_knowledge_base_id"
coroutines = [content2txt0915(textId, text, knowledge_base_id) for textId, text in tasks]
results = await asyncio.gather(*coroutines)
for result in results:
if isinstance(result, tuple):
print(f"文件路径:{result[0]}, 文件名:{result[1]}")
else:
print(f"处理文件时出错:{result}")
# # 运行异步函数
# if __name__ == "__main__":
# asyncio.run(main())
def test25():
import jieba
# 待分词的文本
text = "我来到北京清华大学"
# 使用 jieba 进行分词
words = jieba.lcut(text)
# 输出分词结果
print(words)
# 全模式
full_mode_words = jieba.lcut(text, cut_all=True)
print("全模式分词结果:", full_mode_words)
# 精确模式(默认)
precise_mode_words = jieba.lcut(text)
print("精确模式分词结果:", precise_mode_words)
# 搜索引擎模式
search_mode_words = jieba.lcut_for_search(text)
print("搜索引擎模式分词结果:", search_mode_words)
def test26():
info_date = "2023-04-02"
year = info_date[:4]
if info_date < "2023-05-05":
print("aaaa")
return
if __name__ == "__main__":
# import queue
#
# # 创建一个无限大小的队列
# update_edges = queue.Queue(maxsize=0)
#
# # 定义你的数据
# edge_dict = {
# 'id': '0_RHtFptO8lzqsr30a',
# 'source': '0',
# 'target': 'RHtFptO8lzqsr30a',
# 'relationshipType': '包含',
# 'attributeInfo': {'arrow': '', 'width': 1, 'colour': '#A2B1C3', 'id': '0_RHtFptO8lzqsr30a'},
# 'databaseName': 'industry.20240812115056'
# }
#
# # 将数据放入队列
# update_edges.put(edge_dict)
#
# # 从队列中取出数据
# retrieved_edge_dict = update_edges.get()
#
# # 检查id是否发生了变化
# print(retrieved_edge_dict['id']) # 应该输出 '0_RHtFptO8lzqsr30a'
# 假设这是你的原始字典
# original_dict = {
# 'old_key': 'value',
# 'another_key': 'another_value'
# }
#
# # 你想替换的旧键和新键
# old_key = 'old_key'
# new_key = 'new_key'
#
# # 检查旧键是否存在于字典中
# if old_key in original_dict:
# # 将旧键的值赋给新键
# original_dict[new_key] = original_dict.pop(old_key)
#
# # 打印更新后的字典
# print(original_dict)
# import re
#
# text = "以下是一些日期:1.1, 11.1, 2.2, 12.29, 12.31, 3.31"
# # pattern = r'\d{1,2}[1-12]\.\d{1,2}[1-31]'
# pattern = r'\d{1,2}(0[1-9]|1[0-2])\.(0[1-9]|[12][0-9]|3[01])'
# matches = re.findall(pattern, text)
#
# print(matches)
#
# import re
#
# import re
# # import queue
# #
# # # 创建一个无限大小的队列
# # update_edges = queue.Queue(maxsize=0)
# #
# # # 定义你的数据
# # edge_dict = {
# # 'id': '0_RHtFptO8lzqsr30a',
# # 'source': '0',
# # 'target': 'RHtFptO8lzqsr30a',
# # 'relationshipType': '包含',
# # 'attributeInfo': {'arrow': '', 'width': 1, 'colour': '#A2B1C3', 'id': '0_RHtFptO8lzqsr30a'},
# # 'databaseName': 'industry.20240812115056'
# # }
# #
# # # 将数据放入队列
# # update_edges.put(edge_dict)
# #
# # # 从队列中取出数据
# # retrieved_edge_dict = update_edges.get()
# #
# # # 检查id是否发生了变化
# # print(retrieved_edge_dict['id']) # 应该输出 '0_RHtFptO8lzqsr30a'
#
# # 正确的正则表达式
# # pattern = r'(0?[1-9]|1[0-2])\.(0?[1-9]|[12][0-9]|3[01])'
# pattern = r'(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日|(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01])|(0?[1-9]|1[0-2])\/(0?[1-9]|[12][0-9]|3[01])|(0?[1-9]|1[0-2])\.(0?[1-9]|[12][0-9]|3[01])|\d{1,2}日'
# # 假设这是你的原始字典
# # original_dict = {
# # 'old_key': 'value',
# # 'another_key': 'another_value'
# # }
# #
# # # 你想替换的旧键和新键
# # old_key = 'old_key'
# # new_key = 'new_key'
# #
# # # 检查旧键是否存在于字典中
# # if old_key in original_dict:
# # # 将旧键的值赋给新键
# # original_dict[new_key] = original_dict.pop(old_key)
# #
# # # 打印更新后的字典
# # print(original_dict)
#
# # import re
# #
# # text = "以下是一些日期:1.1, 11.1, 2.2, 12.29, 12.31, 3.31"
# # # pattern = r'\d{1,2}[1-12]\.\d{1,2}[1-31]'
# # pattern = r'\d{1,2}(0[1-9]|1[0-2])\.(0[1-9]|[12][0-9]|3[01])'
# # matches = re.findall(pattern, text)
# #
# # print(matches)
# #
# # import re
# #
# # import re
# #
# # # 正确的正则表达式
# # # pattern = r'(0?[1-9]|1[0-2])\.(0?[1-9]|[12][0-9]|3[01])'
# # pattern = r'(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日|(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01])|(0?[1-9]|1[0-2])\/(0?[1-9]|[12][0-9]|3[01])|(0?[1-9]|1[0-2])\.(0?[1-9]|[12][0-9]|3[01])|\d{1,2}日'
# #
# # # # 测试数据
# # # test_data = [
# # # "01.01",
# # # "2.5",
# # # "13.01",
# # # "00.01",
# # # "02.29",
# # # "02.28",
# # # "04.31"
# # # ]
# #
# # # # 修改后的正则表达式
# # # pattern = r'\d{1,2}(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日'
# # #
# # # 测试数据
# # test_data = [
# # "01.01",
# # "2.5",
# # "13.01",
# # "00.01",
# # "02.29",
# # "02.28",
# # "04.31"
# # "01月02日",
# # "2月5日",
# # "13月01日",
# # "00月01日",
# # "02月29日",
# # "02月28日",
# # "04月31日"
# # ]
#
# # # 修改后的正则表达式
# # pattern = r'\d{1,2}(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日'
# #
# # 测试数据
# test_data = [
# "01月02日",
# "2月5日",
# "13月01日",
# "00月01日",
# "02月29日",
# "02月28日",
# "04月31日"
# ]
#
# # 遍历测试数据并检查是否匹配
# for data in test_data:
# match = re.match(pattern, data)
# if match:
# # print(f"match:{match}")
# print(f"{data} is a valid date format.")
# else:
# print(f"{data} is not a valid date format.")
#
# text = """为全面反映电源绿色转型发展、行业电力消费情况,更好地服务政府宏观调控、行业运行监测,中国电力企业联合会研究编制了全国电力消费和供应系列指数,并于近日正式发布。通过该系列指数,可以量化电力消费、供应系列指标自基期(以2020年为基期)以来的累计变化情况,直观显现指标变化趋势,提高对电力消费和供应大势的整体把握。
#   数据显示,2024年6月10日,2月5日,全行业用电指数为129.4,3.31,全行业用电量比2020年基期增长了29.4%,年均增长6.7%,同比增长5.9%。国民经济运行总体稳定,全行业用电量保持平稳较快增长。
#   6月,制造业用电指数为126.6,比2020年基期增长了26.6%,年均增长6.1%,同比增长6.0%,制造业用电量保持平稳较快增长。其中,四大高载能行业用电指数为120.2,比2020年基期增长了20.2%,年均增长4.7%,同比增长5.2%。高技术及装备制造业用电指数为141.6,比2020年基期增长了41.6%,年均增长9.1%,同比增长9.1%,高技术及装备制造业用电量延续快速增长势头,制造业产业结构转型升级趋势较为明显。
#   6月,服务业用电指数为149.5,比2020年基期增长了49.5%,年均增长10.6%,同比增长7.6%,服务业用电量保持较快增长势头。其中,交通运输、仓储和邮政业用电指数为137.5,比2020年基期增长了37.5%,年均增长8.3%,同比增长5.2%,同比增速比上月回升1.0个百分点。信息传输、软件和信息技术服务业用电指数为135.9,比2020年基期增长了35.9%,年均增长8.0%,同比增长8.0%。在5G、人工智能、大数据、云计算等快速发展带动下,信息传输、软件和信息技术服务业用电延续较快增长势头
# """
# # match = re.match(pattern, text)
# # # 遍历测试数据并检查是否匹配
# # for data in test_data:
# # match = re.match(pattern, data)
# # if match:
# # print(match)
# # test02()
#
#
#
#
# test03()
# test04()
# test05()
# test05()
test24()
\ No newline at end of file
# # # print(f"match:{match}")
# # print(f"{data} is a valid date format.")
# # else:
# # print(f"{data} is not a valid date format.")
# #
# # text = """为全面反映电源绿色转型发展、行业电力消费情况,更好地服务政府宏观调控、行业运行监测,中国电力企业联合会研究编制了全国电力消费和供应系列指数,并于近日正式发布。通过该系列指数,可以量化电力消费、供应系列指标自基期(以2020年为基期)以来的累计变化情况,直观显现指标变化趋势,提高对电力消费和供应大势的整体把握。
# #   数据显示,2024年6月10日,2月5日,全行业用电指数为129.4,3.31,全行业用电量比2020年基期增长了29.4%,年均增长6.7%,同比增长5.9%。国民经济运行总体稳定,全行业用电量保持平稳较快增长。
# #   6月,制造业用电指数为126.6,比2020年基期增长了26.6%,年均增长6.1%,同比增长6.0%,制造业用电量保持平稳较快增长。其中,四大高载能行业用电指数为120.2,比2020年基期增长了20.2%,年均增长4.7%,同比增长5.2%。高技术及装备制造业用电指数为141.6,比2020年基期增长了41.6%,年均增长9.1%,同比增长9.1%,高技术及装备制造业用电量延续快速增长势头,制造业产业结构转型升级趋势较为明显。
# #   6月,服务业用电指数为149.5,比2020年基期增长了49.5%,年均增长10.6%,同比增长7.6%,服务业用电量保持较快增长势头。其中,交通运输、仓储和邮政业用电指数为137.5,比2020年基期增长了37.5%,年均增长8.3%,同比增长5.2%,同比增速比上月回升1.0个百分点。信息传输、软件和信息技术服务业用电指数为135.9,比2020年基期增长了35.9%,年均增长8.0%,同比增长8.0%。在5G、人工智能、大数据、云计算等快速发展带动下,信息传输、软件和信息技术服务业用电延续较快增长势头
# # """
# # # match = re.match(pattern, text)
# # # if match:
# # # print(match)
# # # test02()
# #
# #
# #
# #
# # test03()
# # test04()
# # test05()
# # test05()
# # test24()
# pass
# test25()
test26()
\ No newline at end of file
......@@ -164,7 +164,7 @@ def NoticeDF():
continue
# 获取全部美股企业
mg_query = "select * from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
mg_query = "select social_credit_code from sys_base_enterprise_ipo where category=7 and securities_code is not null and priority =1"
cursor_.execute(mg_query)
cnx_.commit()
mg_result = cursor_.fetchall()
......
......@@ -63,8 +63,8 @@ def get_html(tycid, driver, headers):
@retry(tries=5, delay=2)
def get_page(url, s, headers):
ip = baseCore.get_proxy()
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
# res = s.get(url=url, headers=headers, verify=False)
# res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res = s.get(url=url, headers=headers, verify=False)
if res.status_code != 200:
raise
data_page = res.json()
......@@ -141,9 +141,9 @@ def doJob():
else:
continue
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
# item = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
item = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
# 判断 如果Redis中已经没有数据,则等待
item = '9133000070471161XA'
# item = '9133000070471161XA'
if item == None:
time.sleep(30 * 60)
continue
......@@ -499,10 +499,10 @@ def doJob():
continue
else:
pass
response = requests.post('http://114.115.236.206:8088/sync/executive', data=json_updata, timeout=300,
verify=False)
response_ = requests.post('http://114.116.116.241:9098/userserver/sync/executive', data=json_updata, timeout=300,
response = requests.post('http://1.95.72.34:8088/sync/executive', data=json_updata, timeout=300,
verify=False)
response_ = requests.post('http://114.116.116.241:9098/userserver/sync/executive', data=json_updata,
timeout=300, verify=False)
print(response.text)
print(response_.text)
log.info('=========成功======')
......
......@@ -64,8 +64,8 @@ def get_html(tycid, driver, headers):
@retry(tries=5, delay=3)
def get_page(url, s, headers):
ip = baseCore.get_proxy()
res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
# res = s.get(url=url, headers=headers, verify=False)
# res = s.get(url=url, headers=headers, proxies=ip, timeout=(5, 10))
res = s.get(url=url, headers=headers, verify=False)
if res.status_code != 200:
raise
data_page = res.json()
......
......@@ -16,7 +16,7 @@ from base import BaseCore
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN[
db_storage = pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN[
'国外智库']
@retry(tries=2, delay=5)
......@@ -105,7 +105,8 @@ def doJob():
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Cookie': 'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72%2C123.149.3.159%2C1%2C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439',
# 'Cookie': 'JSESSIONID=BHezogPwi8NJVECsKXCXqijdQ00-yMJHw_gR8wiC.ip-10-240-5-121; __cf_bm=c2byUypnSjXPS_UFDM7BMRGDxN6AQEkNVUjzw9HuSq8-1707054653-1-AbbI7JWWkfWKVGi8SKI06f0jGEjPdk5kvHAIRRpBHSSSnmxj1IcvGUT8+/O6R0U2RLZJECZdUzZIXAwFuEz5lPo=; _gcl_au=1.1.201344533.1707054655; _gid=GA1.2.557164000.1707054655; cb-enabled=enabled; cf_clearance=6tK6.WKHJbXXoV4NTgbyHRhetRxMdWPZofwlv01F65Y-1707054656-1-AfrYlWnLLZFC1sKxeFVQintPrZnjvjoJSZwRRhAYwqRHGdWbU5IFZQDJZJM21l20Tj6gk4JxNobWT0wGzp1Dgjw=; _ce.irv=new; cebs=1; _ce.clock_event=1; _ce.clock_data=72%2C123.149.3.159%2C1%2C9c1ce27f08b16479d2e17743062b28ed; custom_cookie_AB=1; AWSALB=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; AWSALBCORS=I/eGQ0glcxuROskD1JKEl/dqsqElpmo/MnwLboJZJB2QthQFFWnLA3gzuJTskEaZxJD7VuWEEsqjhLVvhq4q2Wt0RebuRhukeHpKvgmGMelxpn/RiDmehyvxTOiS; _gat_UA-1887794-2=1; _dc_gtm_UA-136634323-1=1; _ga_F5XZ540Q4V=GS1.1.1707054655.1.1.1707055119.7.0.0; _ga=GA1.1.1014316406.1707054655; _ga_F7KSNTXTRX=GS1.1.1707054655.1.1.1707055119.0.0.0; cebsp_=5; _ce.s=v~212f033193b9432855ae8335d6d3969cc1f8b751~lcw~1707055134688~lva~1707054658247~vpv~0~v11.fhb~1707054659602~v11.lhb~1707055126493~v11.cs~325107~v11.s~6d7ba630-c364-11ee-aba8-136dbbf9a447~v11.sla~1707055134688~v11.send~1707055135439~lcw~1707055135439',
'Cookie': '__cf_bm=d9hIc2bALTgTBZ64CyxHwuWXuAZmsBuh5CakctSWeP0-1728549696-1.0.1.1-6XP3FXhlXvLTp0Bgcnhh00_7UcjUmV9KlVd6Zr5jbUVcZiwH4qM9suuA_1f181EMaZ2drTFJVLBGwS27V98VGg; JSESSIONID=MiizoFg43W81UlgdhIa0nXXsTwxJoYreIo6ZAgc1.ip-10-240-5-72; _gcl_au=1.1.472576737.1728549714; _ga_F5XZ540Q4V=GS1.1.1728549714.1.0.1728549714.60.0.0; _gid=GA1.2.2100045182.1728549715; _ga_F7KSNTXTRX=GS1.1.1728549714.1.0.1728549714.0.0.0; _ga=GA1.1.507449157.1728549715; cb-enabled=enabled; AWSALB=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; AWSALBCORS=ycwWtvr4GmtlXD8YwugYRc9pfc68sD/lVUvwOaJNcSBtyoLxAQMeqeEz5IuiDVbsKzd6pKkCIhBEE1UcGoORiD70DwsEPrEAQgDZ3OuZodS0BG0g1OYO1vvhUClf; custom_cookie_AB=2; cf_clearance=q8LbD2iMK8HvCfC8EC0ZxocKplGL_1yI7mPEDZx5FM0-1728549715-1.2.1.1-n7xc_Cop0OGUMyp2wTnL.YPlw71QrDsks_alvUMeYQXTShaMFiSIcJyDLbrGj.Bb_TbpRMaSE999joCsuSDRXsCPmIuEFZGhR3qaJt5a1EKeWRLQbf4IcRKLCwQTj3O3eITWLIPJmisWYQxNp0Rm6gLNDEM6zOvFkhkuwfaJP7Taj9JQ4eeAajhmhiGpijEiE3PCc499D1f_PAP09Y8uMRqj_YZGD99IBxoG1gBlRylFlUa9riPqWs7vaD6IFFwFEyeuc3XzC3Nl0TiYDj9sBiG8Us9jeZgWwXm2J7E.MJQKeQBM1utSKAEsVW3Lhs8emraPJYaDLzSW7MtMZ8C1m93z0dRSOtjEtzL7Cz5IL7wm3qSY2MqAug.Y2f3eXKb23Oxe6ebcO1vJRK5YHYyQdOV1by5J0t5oKQ5iRrVq1XBUiYYMX.e8lWy6rD1WO.qP',
'Referer': 'https://www.oecd-ilibrary.org/economics/oecd-policy-responses-on-the-impacts-of-the-war-in-ukraine_dc825602-en?page=2',
'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'Sec-Ch-Ua-Mobile': '?0',
......
import os
import os
......@@ -399,7 +399,7 @@ if __name__ =='__main__':
start_time = time.time()
# 获取企业信息
# social_code = baseCore.redicPullData('NoticeEnterprise:mgqy_socialCode_add')
social_code = 'ZZSN22080900000046'
social_code = 'ZD0CN0012309068194'
if not social_code:
time.sleep(20)
continue
......@@ -417,8 +417,8 @@ if __name__ =='__main__':
else:
# log.info(f'数据库中无该企业{social_code}')
sql = f"SELECT * FROM sys_base_enterprise_ipo WHERE social_credit_code = '{social_code}' and category=7 and securities_code is not null and priority=1"
cursor.execute(sql)
data = cursor.fetchall()
cursor_.execute(sql)
data = cursor_.fetchone()
if data:
pass
else:
......@@ -430,17 +430,21 @@ if __name__ =='__main__':
Category = data[6]
Exchange = data[7]
sql_baseinfo = f"SELECT * FROM sys_base_enterprise WHERE social_credit_code = '{social_code}'"
cursor.execute(sql_baseinfo)
data_baseinfo = cursor.fetchone()
cursor_.execute(sql_baseinfo)
data_baseinfo = cursor_.fetchone()
if data_baseinfo:
pass
CompanyName = data_baseinfo[3]
EnglishName = data_baseinfo[34]
countryName = data_baseinfo[39]
if CompanyName:
pass
else:
continue
# 写入数据库
insert = "INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (%s, %s)"
cursor_.execute(insert, (CompanyName, social_code, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName))
cnx_.commit()
insert = "INSERT INTO EnterpriseInfo(CompanyName, SocialCode, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(insert, (CompanyName, social_code, SecuritiesCode, SecuritiesShortName, EnglishName, SecuritiesType, Category, Exchange, countryName))
cnx.commit()
com_name = CompanyName
code = SecuritiesCode
......
......@@ -24,7 +24,7 @@ class ClassTool():
def __init__(self):
self.taskType = '政策法规'
self.db_storage =pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN[
self.db_storage =pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN[
'国务院_国资委_copy1']
self.driver_path = r'D:\cmd100\chromedriver.exe'
......@@ -93,7 +93,7 @@ class ClassTool():
def sendKafka(self, dic_news):
try: # 114.116.116.241
producer = KafkaProducer(bootstrap_servers=['1.95.3.121:9092'], max_request_size=1024 * 1024 * 20)
producer = KafkaProducer(bootstrap_servers=['1.95.78.131:9092'], max_request_size=1024 * 1024 * 20, api_version=(2, 5, 0))
kafka_result = producer.send("policy",
json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
......
......@@ -30,8 +30,8 @@ def getJson(url, headers, s):
ip = baseCore.get_proxy()
log.info(f'当前使用的ip是{ip}')
# req = requests.get(url, headers=headers, timeout=20)
req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
# req = s.get(url, headers=headers, timeout=(5, 10))
# req = s.get(url, headers=headers, proxies=ip, timeout=(5, 10))
req = s.get(url, headers=headers, timeout=(5, 10))
dataJson = req.json()
if dataJson['errorCode'] != 0:
raise
......@@ -116,7 +116,7 @@ def doJob():
dics.append(dic)
log.info(f'{socialCreditCode}==={tycId}===共采集{len(dics)}条记录')
if dics:
req = sendData('http://114.115.236.206:8088/sync/branch', dics)
req = sendData('http://1.95.72.34:8088/sync/branch', dics)
log.info(f'{socialCreditCode}==={req.text}')
takeTime = baseCore.getTimeCost(start, time.time())
log.info(f'{socialCreditCode}==={req.text}===耗时{takeTime}')
......
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
......@@ -12,7 +12,7 @@ from kafka import KafkaProducer
from requests.packages import urllib3
from datetime import datetime, timedelta
urllib3.disable_warnings()
db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='ZZsn@9988').ZZSN['人民网-习讲话数据库_copy']
db_storage = pymongo.MongoClient('mongodb://1.95.69.135:27017', username='admin', password='ZZsn@9988').ZZSN['人民网-习讲话数据库_copy']
def newsdata(art_content_dict,art_type_dict,dic_lables):
for key, value in art_content_dict.items():
......@@ -61,7 +61,7 @@ def newsdata(art_content_dict,art_type_dict,dic_lables):
del post_dict['tags']
del post_dict['title_pd']
# 发送kafka
producer = KafkaProducer(bootstrap_servers=['1.95.3.121:9092'], max_request_size=1024 * 1024 * 20)
producer = KafkaProducer(bootstrap_servers=['1.95.78.131:9092'], max_request_size=1024 * 1024 * 20, api_version=(2, 5, 0))
kafka_result = producer.send("research_center_fourth",
json.dumps(post_dict, ensure_ascii=False).encode('utf8'))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论