提交 b2ebfb55 作者: 薛凌堃

Merge remote-tracking branch 'origin/master'

import time
import pandas as pd
from openpyxl import Workbook
import json
import requests
def postRes(keyno,num):
header={
'Host':'apph5.qichacha.com',
'Connection':'keep-alive',
'Content-Length':'242',
'sec-ch-ua':'"Chromium";v="107", "Not=A?Brand";v="24"',
'applet-token':'b7f45e9a64fa048f3bbdf1e575730242',
'referrer':'https://apph5.qichacha.com/company/basic/holding-enterprises/list3?unique=a59b3c1f33224db1eac88afb1906efbd&name=%E5%9B%BD%E5%AE%B6%E7%94%B5%E7%BD%91%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8&appletLength=4&appletUrl=%2Fcompany-subpackages%2Fholding-enterprise%2Findex&v=2023.09.08&accessToken=b7f45e9a64fa048f3bbdf1e575730242',
'sec-ch-ua-mobile':'?0',
'Authorization':'bearer b7f45e9a64fa048f3bbdf1e575730242',
'applet-platform':'weixin',
'Content-Type':'application/json',
'Accept':'application/json, text/plain, */*',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391',
'X-Requested-With':'XMLHttpRequest',
'26c7dfc0c2559db8c5b4':'21d4056872e94612ac007633e30f94b6391357f28908502a5f6c27acb1bf3e416e76e33f4e19d26f6cc767cc4fcea01d99efbf039f0e6d1c597b54613d985893',
'x-request-id':'389331a8-9ab3-423d-8922-92aaf4fdd5c5',
'sec-ch-ua-platform':'"Windows"',
'Origin':'https://apph5.qichacha.com',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh',
'Cookie':'acw_tc=b628321e16944875136114392e16ef9b4e7e5a16358c7567306ebc32f4; tid=b7f45e9a64fa048f3bbdf1e575730242'
}
kk=keyno
n=num
# data=f'{"unique":"a59b3c1f33224db1eac88afb1906efbd","pageIndex":1,"searchKey":"","province":"ZJ","cityCode":"ZJ","industry":"","fundedRatioLevel":"","fundedRatioMin":"","fundedRatioMax":"","filterKeyNo":"","token":"b7f45e9a64fa048f3bbdf1e575730242"}'
data='{"unique":"[kk]","pageIndex":[n],"searchKey":"","province":"ZJ","cityCode":"ZJ","industry":"","fundedRatioLevel":"","fundedRatioMin":"","fundedRatioMax":"","filterKeyNo":"","token":"b7f45e9a64fa048f3bbdf1e575730242"}'
data=data.replace('[kk]',keyno).replace('[n]',str(num))
# data=json.dumps(data)
url='https://apph5.qichacha.com/api/basic/getHoldingCompany'
res=requests.post(url,data=data,headers=header,verify=False)
# print(res.status_code)
print(res.text)
return res.text
# 将数据追加到excel
def writerToExcel(detailList):
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename2, engine='openpyxl')
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename2, index=False)
def readfile(filename):
datas=[]
# 读取Excel文件
data = pd.read_excel(filename)
# 遍历数据
for index, row in data.iterrows():
# 读取每一行的数据
rr={
'rank':str(row[0]),
'qiye':str(row[1]),
'keyno':str(row[2])
}
datas.append(rr)
return datas
if __name__ == '__main__':
filename2='qiye2hold.xlsx'
# # 创建一个工作簿
# workbook = Workbook()
# workbook.save(filename2)
filename=r'C:\Users\WIN10\DataspellProjects\zzsn_spider\test\qiye2.xlsx'
rqs=readfile(filename)
for rd in rqs:
keyno=rd['keyno']
print(f'请求企业的ID{keyno}')
num=1
text=postRes(keyno,num)
msg=json.loads(text)
totalRecords=msg['Paging']['TotalRecords']
pagenum=totalRecords// 20 +1
for i in range(1,pagenum+1):
print(f'请求第{i}页')
dlist=[]
text=postRes(keyno,i)
yKeyNo=msg['Result']['KeyNo']
yCompanyName=msg['Result']['CompanyName']
NameCount=msg['Result']['NameCount']
Names=msg['Result']['Names']
for zzname in Names:
zKeyNo= zzname['KeyNo']
zName= zzname['Name']
# startDate= zzname['StartDate']
registCapi= zzname['RegistCapi']
imageUrl= zzname['ImageUrl']
province= zzname['Province']
industry= zzname['Industry']
shortStatus= zzname['ShortStatus']
percentTotal= zzname['PercentTotal']
startDateStr= zzname['StartDateStr']
h5Url= zzname['H5Url']
district= zzname['District']
industryDesc= zzname['IndustryDesc']
area= str(zzname['Area'])
industryItem= str(zzname['IndustryItem'])
detailmsg={
'yKeyNo':yKeyNo,
'yCompanyName':yCompanyName,
'nameCount':NameCount,
'zKeyNo':zKeyNo,
'zName':zName,
'registCapi':registCapi,
'imageUrl':imageUrl,
'province':province,
'industry':industry,
'shortStatus':shortStatus,
'percentTotal':percentTotal,
'startDateStr':startDateStr,
'h5Url':h5Url,
'district':district,
'industryDesc':industryDesc,
'area':area,
'industryItem':industryItem,
}
dlist.append(detailmsg)
# print(detailmsg)
print('写入excel')
if len(dlist):
writerToExcel(dlist)
if __name__ == '__main__':
kwList=range(1,100)
print()
\ No newline at end of file
import os
from flask import Flask, request, send_file, render_template
import json
import pymysql
'''
手动捕获请求的接口数据,实现解析
使用fiddler将链接对应的页面数据信息发送到后台,后台对数据进行解析
'''
def connMysql():
# 创建MySQL连接
conx = pymysql.connect(host='114.115.159.144',
user='caiji',
password='zzsn9988',
database='caiji')
# 创建一个游标对象
cursorM = conx.cursor()
return conx,cursorM
def closeSql(conx,cursorM):
# 关闭游标和连接
cursorM.close()
conx.close()
#将列表数据插入到表中 baidu_search_result
def itemInsertToTable(item):
conx,cursorM=connMysql()
zKeyNo=item['zKeyNo']
yKeyNo=item['yKeyNo']
try:
select_sql=f'select * from qccholdmsg where yKeyNo="{yKeyNo}" and zKeyNo="{zKeyNo}" '
cursorM.execute(select_sql)
existing_record = cursorM.fetchone()
except Exception as e:
existing_record=''
if existing_record:
print(f'数据已存在!{zKeyNo}')
return
insert_param=(item['yKeyNo'],item['yCompanyName'],item['nameCount'],item['zKeyNo'],item['zName'],
item['registCapi'],item['province'],item['industry'],item['shortStatus'],item['percentTotal'],item['startDateStr'],
item['h5Url'],item['district'],item['industryDesc'],item['area'],item['industryItem'])
insert_sql ="INSERT into qccholdmsg (yKeyNo,yCompanyName,nameCount,zKeyNo,zName,registCapi,province," \
"industry,shortStatus,percentTotal,startDateStr,h5Url,district,industryDesc,area,industryItem) VALUES (%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
cursorM.execute(insert_sql,insert_param)
# 定义插入数据的SQL语句
# 执行插入操作
conx.commit()
print('数据插入成功!')
closeSql(conx,cursorM)
app = Flask(__name__)
@app.route('/')
def index():
return 'Welcome to the website!'
@app.route('/get_hold', methods=['POST'])
def get_news():
data=request.form
@app.route('/e1', methods=['POST'])
def e1():
data = request.get_json()
html = data.get('html')
print(html)
# 处理请求参数...
@app.route('/e2', methods=['POST'])
def e2():
html = request.form.get('html')
# print(html)
print('获取fiddler抓取的数据')
msg=json.loads(html)
# 处理请求参数...
yKeyNo=msg['Result']['KeyNo']
yCompanyName=msg['Result']['CompanyName']
NameCount=msg['Result']['NameCount']
Names=msg['Result']['Names']
for zzname in Names:
item={}
zKeyNo= zzname['KeyNo']
zName= zzname['Name']
registCapi= zzname['RegistCapi']
imageUrl= zzname['ImageUrl']
province= zzname['Province']
industry= zzname['Industry']
shortStatus= zzname['ShortStatus']
percentTotal= zzname['PercentTotal']
startDateStr= zzname['StartDateStr']
h5Url= zzname['H5Url']
district= zzname['District']
industryDesc= zzname['IndustryDesc']
# area= str(zzname['Area'])
# industryItem= str(zzname['IndustryItem'])
area=''
industryItem=''
item={
'yKeyNo':yKeyNo,
'yCompanyName':yCompanyName,
'nameCount':NameCount,
'zKeyNo':zKeyNo,
'zName':zName,
'registCapi':registCapi,
'imageUrl':imageUrl,
'province':province,
'industry':industry,
'shortStatus':shortStatus,
'percentTotal':percentTotal,
'startDateStr':startDateStr,
'h5Url':h5Url,
'district':district,
'industryDesc':industryDesc,
'area':area,
'industryItem':industryItem,
}
try:
print('对数据进行解析入库')
itemInsertToTable(item)
except Exception as e:
print(e)
return ""
def installToMysql():
pass
if __name__ == '__main__':
app.run(port=8000)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论