提交 5007e9f7 作者: LiuLiYuan

Merge remote-tracking branch 'origin/master'

......@@ -421,6 +421,7 @@ def NQEnterprise():
nq_social_list = [item[0] for item in nq_result]
for item in nq_social_list:
#新三板企业财务数据 上市信息 核心人员已采集 企业动态、企业公告未采集 企业公告脚本已开发,企业动态需要每天放入redis
# r.rpush('NQEnterprise:nq_Ipo', item)
r.rpush('NQEnterprise:nq_finance',item)
# r.rpush('NQEnterprise:nq_notice',item)
......@@ -451,11 +452,26 @@ def omeng():
#单项冠军
def danxiangguanjun():
pass
cnx, cursor = connectSql()
query = "SELECT CompanyName FROM champion"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('champion:baseinfo',item)
#科改示范
def kegaishifan():
pass
cnx, cursor = connectSql()
query = "SELECT CompanyName FROM technological"
cursor.execute(query)
result = cursor.fetchall()
cnx.commit()
com_namelist = [item[0] for item in result]
for item in com_namelist:
r.rpush('technological:baseinfo',item)
#双百企业
def shuangbaiqiye():
......@@ -467,6 +483,8 @@ def zhuangjingtexind():
if __name__ == "__main__":
start = time.time()
# danxiangguanjun()
kegaishifan()
# NoticeEnterprise()
# AnnualEnterpriseIPO()
# AnnualEnterprise()
......@@ -477,7 +495,7 @@ if __name__ == "__main__":
# FBS()
# MengZhi()
# NQEnterprise()
SEC_CIK()
# SEC_CIK()
# omeng()
# AnnualEnterpriseUS()
# NoticeEnterprise_task()
......
"""
解析json数据 两个链接:
https://data.sec.gov/api/xbrl/companyfacts/CIK0000320193.json 数据值和gaap字段
https://www.sec.gov/Archives/edgar/data/320193/000032019322000108/MetaLinks.json html字段和gaap字段映射
step1:拼接链接
step2:
"""
import json
import time
import requests
from kafka import KafkaProducer
from operator import itemgetter
from itertools import groupby
from base.BaseCore import BaseCore
# import urllib3
# urllib3.disable_warings()
baseCore = BaseCore()
log = baseCore.getLogger()
cnx = baseCore.cnx
cursor = baseCore.cursor
def fromcikgetinfo(cik):
query = f"select * from mgzqyjwyh_list where cik='{cik}' "
cursor.execute(query)
data = cursor.fetchone()
return data
def getRequest(url):
headers = {
'Host': 'data.sec.gov',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '_ga=GA1.2.784424676.1695174651; _4c_=%7B%22_4c_s_%22%3A%22lZFLT4QwFIX%2FyqRrILS0pbAzmBgXajQ%2BlhNpLwOZcUoKDo4T%2Fru3gMbHym5ov55zcjk9kaGGPcmpzARNuVRcxElAtnDsSH4irjH%2BcyA50awsDTUq1ElShZwZCMuKmbASSQUUKsYoIwF5w6w0ZpmIpeBKqTEgul0yTkRbA5hFs4iqKA6rDh39OxKuYty2zppX3a%2F7Y%2BtlA5SrzmzxwsCh0bAeGtPX3s8m%2BUJraDZ1jzhlE22dl0QC90OzN3b47Vvol0%2BkFGnp7NCB9xa1sy%2BwolQitlgEeZocfloHFTg3yfDUNb0ftAMdbexhAVjezMKZPTaemtV9cYf8%2Bhu5LW6uFtT6jv0YO6ufdz4UnyUgF2frh8tz%2F2%2BKc8ZlKqPPpxKUjHPfCJiksRAZldhnvyO5kjz2a5yTp%2FrpTzVXWfZXPbcQ%2Bulh%2Fx%2FrOH4A%22%7D; _ga_300V1CHKH1=GS1.1.1695174651.1.1.1695174684.0.0.0; ak_bmsc=91C6D28D093861656DB8C1FC1972DAB6~000000000000000000000000000000~YAAQlQ8kF2U6orCKAQAAgyl9uxX8kNk3C77pkMi6N6RxnsUqDbYEmIcNjtLSa8W6kfGL9cQMRHBUaYcbEA1+oXsvUwUF80G8hmH/F4S0ZOEnVCrlcBLx219N24l2qmoSKtVDH+VKe7c1bji9MHc7tO2R56R7juZJv9gceAdtKEuArkPfD8ijx/TyEgIrM+XruGtzCRmLnfq86UoJYP+j+tXcaWkc/qm1zHDReDNf/cHd6h2aRMs4lsES8+uh6YTjE7bfCp8h2DNJ2e07pm0ojcI/kdycUPHmuTqWPdTBEjUybad31E1hRNBAE8PbGjy2lvlPY/piuN3HX3Q5ifsmTqCNJzynN2kjGm6i4SHhmEAijUeIzNQXB11GrVmALJVV6pEjd/uu; bm_sv=FD8981426EA388050697DFB615BAFFE3~YAAQ1wcsF5K72ZSKAQAAsvl/uxUw0do3nknGCkllXH27UZBpM7kQUXm4crBNTAkhek5YSDKIrrm2uFWidfpBfyxbRSr+w7FH7Y0w4cXMAa7BELzcc/B9Uf8T6e2I2W29wjurKkBFtSseslHSqYD3BWx9/GidJMW+dFNrlzNUMd1dONUR9J1TDnYifPhE6A/zSLPHVrCTJl7xzg7VlW/05Ay0i+Bo7TynZdWgotfjET3vg2/ZVixVSGaWeQo4~1'
}
for m in range(0,3):
try:
response = requests.get(url=url,headers=headers,verify=False)
break
except Exception as e:
log.error(f"request请求异常-------{e}")
continue
# 检查响应状态码
if response.status_code == 200:
jsonData = response.json()
return jsonData
else:
return False
if __name__=='__main__':
taskType = '财务数据/SEC'
zcfzb_mapping = {
'AccountsAndOtherReceivablesNetCurrent':'指标1'
}
lrb_mapping = {
}
xjllb_mapping = {
}
while True:
start_time = time.time
# todo:从redis中获取企业cik
# cik = baseCore.redicPullData('sec_cik_US:uscik')
cik = '320193'
#通过cik去数据库中获取信息
data = fromcikgetinfo(cik)
com_name = data[2]
com_code = data[3]
exchange = data[4]
#拼接链接的cik是十位数
url_cik = cik
while True:
if len(url_cik) < 10:
url_cik = '0' + url_cik
else:
break
url = f'https://data.sec.gov/api/xbrl/companyfacts/CIK{url_cik}.json'
jsonData = getRequest(url)
if jsonData:
pass
print(jsonData)
try:
us_gaap = jsonData['facts']['us-gaap']
except:
continue
# 遍历map的key值
Listzcfzb = []
for key in zcfzb_mapping.keys():
# 一个财务指标的所有年份和金额
usd_list = us_gaap[key]['units']['USD']
# form: 10-K fp: FY
for j in usd_list:
form = usd_list[j]['form']
fp = usd_list[j]['fp']
if form=='10-K' and fp=='FY':
pass
else:
continue
date = usd_list[j]['end']
if date.endswith('03-31') or date.endswith('06-30') or date.endswith('09-30') or date.endswith('12-31'):
pass
else:
continue
val = usd_list[j]['val']
zcfzb_dic ={
'zbname': key,
'riqi': date,
'jine': val,
'fp': fp,
'form': form
}
# 资产负债表所有年份指标
Listzcfzb.append(zcfzb_dic)
Listzcfzb.sort(key=itemgetter('riqi'))
groups = groupby(Listzcfzb, key=itemgetter('riqi'))
# 遍历每个分组,并打印分类结果
for riqi, group in groups:
print(f"riqi: {riqi}")
# 迭代表达式
listbydate = [item for item in group]
print()
"""从html页面中抽取表格"""
import requests
from bs4 import BeautifulSoup
from base.BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()
def getRequest(url):
headers = {
'Referer': 'https://www.sec.gov/ix?doc=/Archives/edgar/data/356037/000035603723000038/cspi-20230630x10q.htm',
'Sec-Ch-Ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31',
}
for m in range(0,3):
try:
response = requests.get(url=url,headers=headers,verify=False)
break
except Exception as e:
log.error(f"request请求异常-------{e}")
continue
# 检查响应状态码
if response.status_code == 200:
soup = BeautifulSoup(response.content,'html.parser')
return soup
else:
return False
def getzcfztable(soup):
table_list = soup.find_all('table')
for table in table_list:
aa = table.find_all(text='Current assets:')
if aa:
# print(table)
trlist = table.find_all('tr')
date1 = trlist[1].find_all('td')[1].text.replace('\n', '')
date2 = trlist[1].find_all('td')[-1].text.replace('\n', '')
print(date1, date2)
# todo:把td内容为空的去掉
for tr in trlist[2:]:
filtered_tags = tr(lambda tag: tag.name == 'td' and '$' in tag.text)
for tag in filtered_tags:
tag.extract()
# filtered_tags2 = tr(lambda tag:tag.name=='td' and tag.text==' ')
filtered_tags2 = tr(lambda tag: tag.name == 'td' and tag.text == '')
for tag in filtered_tags2:
tag.extract()
try:
zbtag = tr.find_all('td')[0].text.replace('\n', '')
except:
zbtag = ''
try:
cash1 = tr.find_all('td')[1].text.replace('\n', '')
except:
cash1 = ''
try:
cash2 = tr.find_all('td')[2].text.replace('\n', '')
except:
cash2 = ''
if zbtag != '' and cash1 != '' and cash2 != '':
print(f'字段:{zbtag} 值1:{cash1} 值2:{cash2}')
if __name__=='__main__':
url = 'https://www.sec.gov/Archives/edgar/data/320193/000032019321000105/aapl-20210925.htm'
soup = getRequest(url)
#html解析表格 资产负债表
getzcfztable(soup)
# -*- coding: utf-8 -*-
import time
from urllib.parse import quote
import requests
import urllib3
from BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()
# headers = {
# 'Host': 'xcx.qcc.com',
# 'Connection': 'keep-alive',
# 'Qcc-Platform': 'mp-weixin',
# 'Qcc-Timestamp': '',
# 'Qcc-Version': '1.0.0',
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
# 'content-type': 'application/json',
# 'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
# 'Accept-Encoding': 'gzip, deflate, br,'
# }
headers = {
'Host': 'xcx.qcc.com',
'Connection': 'keep-alive',
'x-request-device-type': 'Android',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391',
'Content-Type': 'application/json',
'Qcc-Version': '1.0.0',
'authMini': 'Bearer f51dae1a2fcb109fa9ec58bd4a85e5c5',
'xweb_xhr': '1',
'xcx-version': '2023.09.27',
'Qcc-Platform': 'mp-weixin',
'Qcc-CurrentPage': '/company-subpackages/business/index',
'Qcc-Timestamp': '1696661787803',
'Qcc-RefPage': '/company-subpackages/detail/index',
'Accept': '*/*',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://servicewechat.com/wx395200814fcd7599/307/page-frame.html',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh'
}
# 通过企业名称或信用代码获取企查查id
def find_id_by_name(start,token,name):
urllib3.disable_warnings()
qcc_key = name
t = str(int(time.time()) * 1000)
headers['Qcc-Timestamp'] = t
url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
for lll in range(1, 6):
try:
resp_dict = requests.get(url=url, headers=headers, verify=False).json()
break
except Exception as e:
print(f'{e}-------------重试')
time.sleep(5)
continue
time.sleep(2)
#{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频,请升级小程序版本'}
if resp_dict['status']==40101:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
if resp_dict['status']==401:
KeyNo = False
log.info(f'=======您的账号访问超频,请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
try:
if resp_dict['result']['Result']:
result_dict = resp_dict['result']['Result'][0]
KeyNo = result_dict['KeyNo']
Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
if Name == '':
KeyNo = 'null'
else:
KeyNo = 'null'
except:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
return KeyNo
log.info("{},企业代码为:{}".format(qcc_key, KeyNo))
return KeyNo
\ No newline at end of file
# -*- coding: utf-8 -*-
import time
from urllib.parse import quote
import requests
import urllib3
from BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()
# headers = {
# 'Host': 'xcx.qcc.com',
# 'Connection': 'keep-alive',
# 'Qcc-Platform': 'mp-weixin',
# 'Qcc-Timestamp': '',
# 'Qcc-Version': '1.0.0',
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
# 'content-type': 'application/json',
# 'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
# 'Accept-Encoding': 'gzip, deflate, br,'
# }
headers = {
'Host': 'xcx.qcc.com',
'Connection': 'keep-alive',
'x-request-device-type': 'Android',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391',
'Content-Type': 'application/json',
'Qcc-Version': '1.0.0',
'authMini': 'Bearer f51dae1a2fcb109fa9ec58bd4a85e5c5',
'xweb_xhr': '1',
'xcx-version': '2023.09.27',
'Qcc-Platform': 'mp-weixin',
'Qcc-CurrentPage': '/company-subpackages/business/index',
'Qcc-Timestamp': '1696661787803',
'Qcc-RefPage': '/company-subpackages/detail/index',
'Accept': '*/*',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://servicewechat.com/wx395200814fcd7599/307/page-frame.html',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh'
}
# 通过企业名称或信用代码获取企查查id
def find_id_by_name(start,token,name):
urllib3.disable_warnings()
qcc_key = name
t = str(int(time.time()) * 1000)
headers['Qcc-Timestamp'] = t
url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
for lll in range(1, 6):
try:
resp_dict = requests.get(url=url, headers=headers, verify=False).json()
break
except Exception as e:
print(f'{e}-------------重试')
time.sleep(5)
continue
time.sleep(2)
#{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频,请升级小程序版本'}
if resp_dict['status']==40101:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
if resp_dict['status']==401:
KeyNo = False
log.info(f'=======您的账号访问超频,请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
return KeyNo
try:
if resp_dict['result']['Result']:
result_dict = resp_dict['result']['Result'][0]
KeyNo = result_dict['KeyNo']
Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
if Name == '':
KeyNo = 'null'
else:
KeyNo = 'null'
except:
KeyNo = False
log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
return KeyNo
log.info("{},企业代码为:{}".format(qcc_key, KeyNo))
return KeyNo
\ No newline at end of file
import json
import json
......@@ -5,7 +5,9 @@ import requests
from bs4 import BeautifulSoup
from kafka import KafkaProducer
from base import BaseCore
from obs import ObsClient
import fitz
from urllib.parse import unquote
baseCore = BaseCore.BaseCore()
log = baseCore.getLogger()
......@@ -16,7 +18,57 @@ cnx_ = baseCore.cnx_
cursor_ = baseCore.cursor_
taskType = '企业公告/证监会'
obsClient = ObsClient(
access_key_id='VEHN7D0TJ9316H8AHCAV', # 你的华为云的ak码
secret_access_key='heR353lvSWVPNU8pe2QxDtd8GDsO5L6PGH5eUoQY', # 你的华为云的sk
server='https://obs.cn-north-1.myhuaweicloud.com' # 你的桶的地址
)
def uptoOBS(pdf_url,pdf_name,type_id,social_code):
headers = {}
retData = {'state': False, 'type_id': type_id, 'item_id': social_code, 'group_name': 'group1', 'path': '',
'full_path': '',
'category': 'pdf', 'file_size': '', 'status': 1, 'create_by': 'XueLingKun',
'create_time': '', 'page_size': '', 'content': ''}
headers['User-Agent'] = baseCore.getRandomUserAgent()
for i in range(0, 3):
try:
resp_content = requests.get(pdf_url, headers=headers, verify=False, timeout=20).content
break
except:
time.sleep(3)
continue
page_size = 0
for i in range(0, 3):
try:
name = pdf_name + '.pdf'
result = obsClient.putContent('zzsn', 'ZJH/'+name, content=resp_content)
with fitz.open(stream=resp_content, filetype='pdf') as doc:
page_size = doc.page_count
for page in doc.pages():
retData['content'] += page.get_text()
break
except:
time.sleep(3)
continue
if page_size < 1:
# pdf解析失败
# print(f'======pdf解析失败=====')
return retData
else:
try:
time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
retData['state'] = True
retData['path'] = result['body']['objectUrl'].split('/ZJH')[0]
retData['full_path'] = unquote(result['body']['objectUrl'])
retData['file_size'] = result['Uploaded size']
retData['create_time'] = time_now
retData['page_size'] = page_size
except:
return retData
return retData
def secrchATT(item_id, name, type_id):
sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s '''
......@@ -164,16 +216,20 @@ def getUrl(code, url_parms, Catagory2_parms):
return dic_parms
def InsterInto(short_name, social_code, pdf_url):
inster = False
def ifInstert(short_name, social_code, pdf_url):
ifexist = True
sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s and origin='证监会' and type='1' '''
cursor.execute(sel_sql, (social_code, pdf_url))
selects = cursor.fetchone()
#如果数据库中存在 则跳过
if selects:
print(f'com_name:{short_name}、{pdf_url}已存在')
return inster
ifexist = False
log.info(f'com_name:{short_name}、{pdf_url}已存在')
return ifexist
else:
return ifexist
def InsterInto(short_name, social_code, pdf_url):
# 信息插入数据库
try:
insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
......@@ -197,8 +253,8 @@ def InsterInto(short_name, social_code, pdf_url):
def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_name,num):
#上传至文件服务器
retData = baseCore.upLoadToServe(pdf_url,8,social_code)
#上传至华为云服务器
retData = uptoOBS(pdf_url,pdf_name,8,social_code)
#附件插入att数据库
if retData['state']:
pass
......@@ -323,10 +379,10 @@ def SpiderByZJH(url, payload, dic_info, start_time,num): # dic_info 数据库
year = pub_time[:4]
report_type = td_list[4].text.strip()
# 信息插入数据库
insert = InsterInto(short_name, social_code, name_pdf)
if insert:
# 判断数据库中是否有该条资讯
ifexist = ifInstert(short_name, social_code, pdf_url)
#如果不存在 ifexist = True
if ifexist:
# # 公告信息列表
# okCount = okCount + 1
# 解析PDF内容,先获取PDF链接 下载 解析成功,解析失败 ,传输成功,传输失败
......
import pandas as pd
import pandas as pd
import glob
# 查找当前目录及其子目录下所有以.txt结尾的文件
csv_files = glob.glob(r"D:\机械项目研报\机械项目研报*.xlsx", recursive=True)
# 创建一个空的DataFrame用于存储合并后的数据
merged_data = pd.DataFrame()
# 逐个读取CSV文件并合并到DataFrame中
for file in csv_files:
data = pd.read_excel(file,dtype=str)
# 去掉最后一列
# data = data.iloc[:, :-1]
dad=pd.DataFrame(data,dtype=str)
merged_data = merged_data.append(dad, ignore_index=True)
sorted_df = merged_data.sort_values('industry')
grouped = merged_data.groupby('industry')
# 将合并后的数据保存到新的CSV文件中
# merged_data.to_csv(r"D:\hg\tmp\11.csv", encoding='gbk', index=False, quoting=1, quotechar='"', escapechar='\\')
# merged_data.to_excel(r"D:\机械项目研报\机械项目研报汇总.xlsx", index=False, engine='openpyxl')
with pd.ExcelWriter(r'D:\机械项目研报\机械项目研报汇总2.xlsx') as writer:
for group_name, group_df in grouped:
group_df.to_excel(writer, sheet_name=group_name, index=False)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论