Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
丁双波
zzsn_spider
Commits
bbb23d8c
提交
bbb23d8c
authored
9月 25, 2023
作者:
刘伟刚
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test脚本提交
上级
fcfdc422
全部展开
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
279 行增加
和
0 行删除
+279
-0
holdco.py
test/holdco.py
+139
-0
pipeiUrl.py
test/pipeiUrl.py
+0
-0
poolTest.py
test/poolTest.py
+7
-0
qccqyflask.py
test/qccqyflask.py
+133
-0
没有找到文件。
test/holdco.py
0 → 100644
浏览文件 @
bbb23d8c
import
time
import
pandas
as
pd
from
openpyxl
import
Workbook
import
json
import
requests
def
postRes
(
keyno
,
num
):
header
=
{
'Host'
:
'apph5.qichacha.com'
,
'Connection'
:
'keep-alive'
,
'Content-Length'
:
'242'
,
'sec-ch-ua'
:
'"Chromium";v="107", "Not=A?Brand";v="24"'
,
'applet-token'
:
'b7f45e9a64fa048f3bbdf1e575730242'
,
'referrer'
:
'https://apph5.qichacha.com/company/basic/holding-enterprises/list3?unique=a59b3c1f33224db1eac88afb1906efbd&name=
%
E5
%9
B
%
BD
%
E5
%
AE
%
B6
%
E7
%94%
B5
%
E7
%
BD
%91%
E6
%9
C
%89%
E9
%99%90%
E5
%85%
AC
%
E5
%8
F
%
B8&appletLength=4&appletUrl=
%2
Fcompany-subpackages
%2
Fholding-enterprise
%2
Findex&v=2023.09.08&accessToken=b7f45e9a64fa048f3bbdf1e575730242'
,
'sec-ch-ua-mobile'
:
'?0'
,
'Authorization'
:
'bearer b7f45e9a64fa048f3bbdf1e575730242'
,
'applet-platform'
:
'weixin'
,
'Content-Type'
:
'application/json'
,
'Accept'
:
'application/json, text/plain, */*'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391'
,
'X-Requested-With'
:
'XMLHttpRequest'
,
'26c7dfc0c2559db8c5b4'
:
'21d4056872e94612ac007633e30f94b6391357f28908502a5f6c27acb1bf3e416e76e33f4e19d26f6cc767cc4fcea01d99efbf039f0e6d1c597b54613d985893'
,
'x-request-id'
:
'389331a8-9ab3-423d-8922-92aaf4fdd5c5'
,
'sec-ch-ua-platform'
:
'"Windows"'
,
'Origin'
:
'https://apph5.qichacha.com'
,
'Sec-Fetch-Site'
:
'same-origin'
,
'Sec-Fetch-Mode'
:
'cors'
,
'Sec-Fetch-Dest'
:
'empty'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh'
,
'Cookie'
:
'acw_tc=b628321e16944875136114392e16ef9b4e7e5a16358c7567306ebc32f4; tid=b7f45e9a64fa048f3bbdf1e575730242'
}
kk
=
keyno
n
=
num
# data=f'{"unique":"a59b3c1f33224db1eac88afb1906efbd","pageIndex":1,"searchKey":"","province":"ZJ","cityCode":"ZJ","industry":"","fundedRatioLevel":"","fundedRatioMin":"","fundedRatioMax":"","filterKeyNo":"","token":"b7f45e9a64fa048f3bbdf1e575730242"}'
data
=
'{"unique":"[kk]","pageIndex":[n],"searchKey":"","province":"ZJ","cityCode":"ZJ","industry":"","fundedRatioLevel":"","fundedRatioMin":"","fundedRatioMax":"","filterKeyNo":"","token":"b7f45e9a64fa048f3bbdf1e575730242"}'
data
=
data
.
replace
(
'[kk]'
,
keyno
)
.
replace
(
'[n]'
,
str
(
num
))
# data=json.dumps(data)
url
=
'https://apph5.qichacha.com/api/basic/getHoldingCompany'
res
=
requests
.
post
(
url
,
data
=
data
,
headers
=
header
,
verify
=
False
)
# print(res.status_code)
print
(
res
.
text
)
return
res
.
text
# 将数据追加到excel
def
writerToExcel
(
detailList
):
# 读取已存在的xlsx文件
existing_data
=
pd
.
read_excel
(
filename2
,
engine
=
'openpyxl'
)
# 创建新的数据
new_data
=
pd
.
DataFrame
(
data
=
detailList
)
# 将新数据添加到现有数据的末尾
combined_data
=
existing_data
.
append
(
new_data
,
ignore_index
=
True
)
# 将结果写入到xlsx文件
combined_data
.
to_excel
(
filename2
,
index
=
False
)
def
readfile
(
filename
):
datas
=
[]
# 读取Excel文件
data
=
pd
.
read_excel
(
filename
)
# 遍历数据
for
index
,
row
in
data
.
iterrows
():
# 读取每一行的数据
rr
=
{
'rank'
:
str
(
row
[
0
]),
'qiye'
:
str
(
row
[
1
]),
'keyno'
:
str
(
row
[
2
])
}
datas
.
append
(
rr
)
return
datas
if
__name__
==
'__main__'
:
filename2
=
'qiye2hold.xlsx'
# # 创建一个工作簿
# workbook = Workbook()
# workbook.save(filename2)
filename
=
r'C:\Users\WIN10\DataspellProjects\zzsn_spider\test\qiye2.xlsx'
rqs
=
readfile
(
filename
)
for
rd
in
rqs
:
keyno
=
rd
[
'keyno'
]
print
(
f
'请求企业的ID{keyno}'
)
num
=
1
text
=
postRes
(
keyno
,
num
)
msg
=
json
.
loads
(
text
)
totalRecords
=
msg
[
'Paging'
][
'TotalRecords'
]
pagenum
=
totalRecords
//
20
+
1
for
i
in
range
(
1
,
pagenum
+
1
):
print
(
f
'请求第{i}页'
)
dlist
=
[]
text
=
postRes
(
keyno
,
i
)
yKeyNo
=
msg
[
'Result'
][
'KeyNo'
]
yCompanyName
=
msg
[
'Result'
][
'CompanyName'
]
NameCount
=
msg
[
'Result'
][
'NameCount'
]
Names
=
msg
[
'Result'
][
'Names'
]
for
zzname
in
Names
:
zKeyNo
=
zzname
[
'KeyNo'
]
zName
=
zzname
[
'Name'
]
# startDate= zzname['StartDate']
registCapi
=
zzname
[
'RegistCapi'
]
imageUrl
=
zzname
[
'ImageUrl'
]
province
=
zzname
[
'Province'
]
industry
=
zzname
[
'Industry'
]
shortStatus
=
zzname
[
'ShortStatus'
]
percentTotal
=
zzname
[
'PercentTotal'
]
startDateStr
=
zzname
[
'StartDateStr'
]
h5Url
=
zzname
[
'H5Url'
]
district
=
zzname
[
'District'
]
industryDesc
=
zzname
[
'IndustryDesc'
]
area
=
str
(
zzname
[
'Area'
])
industryItem
=
str
(
zzname
[
'IndustryItem'
])
detailmsg
=
{
'yKeyNo'
:
yKeyNo
,
'yCompanyName'
:
yCompanyName
,
'nameCount'
:
NameCount
,
'zKeyNo'
:
zKeyNo
,
'zName'
:
zName
,
'registCapi'
:
registCapi
,
'imageUrl'
:
imageUrl
,
'province'
:
province
,
'industry'
:
industry
,
'shortStatus'
:
shortStatus
,
'percentTotal'
:
percentTotal
,
'startDateStr'
:
startDateStr
,
'h5Url'
:
h5Url
,
'district'
:
district
,
'industryDesc'
:
industryDesc
,
'area'
:
area
,
'industryItem'
:
industryItem
,
}
dlist
.
append
(
detailmsg
)
# print(detailmsg)
print
(
'写入excel'
)
if
len
(
dlist
):
writerToExcel
(
dlist
)
test/pipeiUrl.py
0 → 100644
浏览文件 @
bbb23d8c
差异被折叠。
点击展开。
test/poolTest.py
0 → 100644
浏览文件 @
bbb23d8c
if
__name__
==
'__main__'
:
kwList
=
range
(
1
,
100
)
print
()
\ No newline at end of file
test/qccqyflask.py
0 → 100644
浏览文件 @
bbb23d8c
import
os
from
flask
import
Flask
,
request
,
send_file
,
render_template
import
json
import
pymysql
'''
手动捕获请求的接口数据,实现解析
使用fiddler将链接对应的页面数据信息发送到后台,后台对数据进行解析
'''
def
connMysql
():
# 创建MySQL连接
conx
=
pymysql
.
connect
(
host
=
'114.115.159.144'
,
user
=
'caiji'
,
password
=
'zzsn9988'
,
database
=
'caiji'
)
# 创建一个游标对象
cursorM
=
conx
.
cursor
()
return
conx
,
cursorM
def
closeSql
(
conx
,
cursorM
):
# 关闭游标和连接
cursorM
.
close
()
conx
.
close
()
#将列表数据插入到表中 baidu_search_result
def
itemInsertToTable
(
item
):
conx
,
cursorM
=
connMysql
()
zKeyNo
=
item
[
'zKeyNo'
]
yKeyNo
=
item
[
'yKeyNo'
]
try
:
select_sql
=
f
'select * from qccholdmsg where yKeyNo="{yKeyNo}" and zKeyNo="{zKeyNo}" '
cursorM
.
execute
(
select_sql
)
existing_record
=
cursorM
.
fetchone
()
except
Exception
as
e
:
existing_record
=
''
if
existing_record
:
print
(
f
'数据已存在!{zKeyNo}'
)
return
insert_param
=
(
item
[
'yKeyNo'
],
item
[
'yCompanyName'
],
item
[
'nameCount'
],
item
[
'zKeyNo'
],
item
[
'zName'
],
item
[
'registCapi'
],
item
[
'province'
],
item
[
'industry'
],
item
[
'shortStatus'
],
item
[
'percentTotal'
],
item
[
'startDateStr'
],
item
[
'h5Url'
],
item
[
'district'
],
item
[
'industryDesc'
],
item
[
'area'
],
item
[
'industryItem'
])
insert_sql
=
"INSERT into qccholdmsg (yKeyNo,yCompanyName,nameCount,zKeyNo,zName,registCapi,province,"
\
"industry,shortStatus,percentTotal,startDateStr,h5Url,district,industryDesc,area,industryItem) VALUES (
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)"
cursorM
.
execute
(
insert_sql
,
insert_param
)
# 定义插入数据的SQL语句
# 执行插入操作
conx
.
commit
()
print
(
'数据插入成功!'
)
closeSql
(
conx
,
cursorM
)
app
=
Flask
(
__name__
)
@app.route
(
'/'
)
def
index
():
return
'Welcome to the website!'
@app.route
(
'/get_hold'
,
methods
=
[
'POST'
])
def
get_news
():
data
=
request
.
form
@app.route
(
'/e1'
,
methods
=
[
'POST'
])
def
e1
():
data
=
request
.
get_json
()
html
=
data
.
get
(
'html'
)
print
(
html
)
# 处理请求参数...
@app.route
(
'/e2'
,
methods
=
[
'POST'
])
def
e2
():
html
=
request
.
form
.
get
(
'html'
)
# print(html)
print
(
'获取fiddler抓取的数据'
)
msg
=
json
.
loads
(
html
)
# 处理请求参数...
yKeyNo
=
msg
[
'Result'
][
'KeyNo'
]
yCompanyName
=
msg
[
'Result'
][
'CompanyName'
]
NameCount
=
msg
[
'Result'
][
'NameCount'
]
Names
=
msg
[
'Result'
][
'Names'
]
for
zzname
in
Names
:
item
=
{}
zKeyNo
=
zzname
[
'KeyNo'
]
zName
=
zzname
[
'Name'
]
registCapi
=
zzname
[
'RegistCapi'
]
imageUrl
=
zzname
[
'ImageUrl'
]
province
=
zzname
[
'Province'
]
industry
=
zzname
[
'Industry'
]
shortStatus
=
zzname
[
'ShortStatus'
]
percentTotal
=
zzname
[
'PercentTotal'
]
startDateStr
=
zzname
[
'StartDateStr'
]
h5Url
=
zzname
[
'H5Url'
]
district
=
zzname
[
'District'
]
industryDesc
=
zzname
[
'IndustryDesc'
]
# area= str(zzname['Area'])
# industryItem= str(zzname['IndustryItem'])
area
=
''
industryItem
=
''
item
=
{
'yKeyNo'
:
yKeyNo
,
'yCompanyName'
:
yCompanyName
,
'nameCount'
:
NameCount
,
'zKeyNo'
:
zKeyNo
,
'zName'
:
zName
,
'registCapi'
:
registCapi
,
'imageUrl'
:
imageUrl
,
'province'
:
province
,
'industry'
:
industry
,
'shortStatus'
:
shortStatus
,
'percentTotal'
:
percentTotal
,
'startDateStr'
:
startDateStr
,
'h5Url'
:
h5Url
,
'district'
:
district
,
'industryDesc'
:
industryDesc
,
'area'
:
area
,
'industryItem'
:
industryItem
,
}
try
:
print
(
'对数据进行解析入库'
)
itemInsertToTable
(
item
)
except
Exception
as
e
:
print
(
e
)
return
""
def
installToMysql
():
pass
if
__name__
==
'__main__'
:
app
.
run
(
port
=
8000
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论