Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Z
zzsn_spider
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
1
合并请求
1
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
丁双波
zzsn_spider
Commits
54dd8a54
提交
54dd8a54
authored
10月 24, 2023
作者:
薛凌堃
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
企业负面信息
上级
5c05e843
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
275 行增加
和
0 行删除
+275
-0
ccpg.py
comData/negative_news/ccpg.py
+105
-0
creditchina.py
comData/negative_news/creditchina.py
+170
-0
没有找到文件。
comData/negative_news/ccpg.py
0 → 100644
浏览文件 @
54dd8a54
"""
中国政府采购网
"""
import
requests
from
bs4
import
BeautifulSoup
from
selenium
import
webdriver
from
selenium.webdriver.chrome.service
import
Service
from
selenium.webdriver.common.by
import
By
from
selenium.webdriver.common.action_chains
import
ActionChains
from
selenium.webdriver.support.ui
import
WebDriverWait
from
selenium.webdriver.support
import
expected_conditions
as
EC
def
createDriver
():
chrome_driver
=
r'D:\cmd100\chromedriver.exe'
path
=
Service
(
chrome_driver
)
chrome_options
=
webdriver
.
ChromeOptions
()
chrome_options
.
binary_location
=
r'D:\Google\Chrome\Application\chrome.exe'
# 设置代理
# proxy = "127.0.0.1:8080" # 代理地址和端口
# chrome_options.add_argument('--proxy-server=http://' + proxy)
driver
=
webdriver
.
Chrome
(
service
=
path
,
chrome_options
=
chrome_options
)
return
driver
def
postRrequest
(
url
,
headers
,
com_name
):
payload
=
{
'orgName'
:
com_name
,
'enforceUnit'
:
''
,
'punishTime'
:
''
,
'punishTimeMax'
:
''
,
# 'gp': 1
}
response
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
json
=
payload
)
result
=
response
.
text
return
result
if
__name__
==
"__main__"
:
# 模拟浏览器 -- 模拟点击
headers
=
{
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,
'Accept-Encoding'
:
'gzip, deflate, br'
,
'Accept-Language'
:
'zh-CN,zh;q=0.9'
,
'Cache-Control'
:
'max-age=0'
,
'Connection'
:
'keep-alive'
,
'Content-Length'
:
'183'
,
'Content-Type'
:
'application/x-www-form-urlencoded'
,
'Cookie'
:
'BizAc_cookie=bXBL4nKhXAx0l6BxLapa7EUva6ZNeLWOEUIogHMY9uhekQVTNHyI!79161097!1697785213601; HMF_CI=2b97dd74cf1745069afc2b52a639692c1fe863ae5ec8f5a99fb173dd50c56f387b0f9597470f17b4f940d0828a06c511577ebce9b48edcf05bac7fa2d981b15286; Hm_lvt_9f8bda7a6bb3d1d7a9c7196bfed609b5=1697785203; JSESSIONIDGS6Credit=_6NRWtzkuNQaDVVmVy6sFRKigu7hsSHVYC_QElYDNYDeDpfllLDo!79161097; Hm_lpvt_9f8bda7a6bb3d1d7a9c7196bfed609b5=1697877010'
,
'Host'
:
'www.ccgp.gov.cn'
,
'Origin'
:
'https://www.ccgp.gov.cn'
,
'Referer'
:
'https://www.ccgp.gov.cn/cr/list'
,
'Sec-Fetch-Dest'
:
'iframe'
,
'Sec-Fetch-Mode'
:
'navigate'
,
'Sec-Fetch-Site'
:
'same-origin'
,
'Sec-Fetch-User'
:
'?1'
,
'Upgrade-Insecure-Requests'
:
'1'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
,
'sec-ch-ua'
:
'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"'
,
'sec-ch-ua-mobile'
:
'?0'
,
'sec-ch-ua-platform'
:
'"Windows"'
}
com_name
=
'新疆塔建三五九建工有限责任公司'
social_code
=
''
# url = 'https://www.ccgp.gov.cn/cr/list'
url
=
'https://www.ccgp.gov.cn/search/cr/'
browser
=
createDriver
()
browser
.
get
(
url
)
browser
.
switch_to
.
frame
(
browser
.
find_element
(
By
.
TAG_NAME
,
'iframe'
))
wait
=
WebDriverWait
(
browser
,
30
)
wait
.
until
(
EC
.
presence_of_element_located
((
By
.
ID
,
"orgName"
)))
browser
.
find_element
(
By
.
ID
,
'orgName'
)
.
send_keys
(
'忠县威华汽车维修厂'
)
browser
.
find_element
(
By
.
ID
,
'searchForm'
)
.
click
()
wait
=
WebDriverWait
(
browser
,
30
)
wait
.
until
(
EC
.
presence_of_element_located
((
By
.
TAG_NAME
,
"body"
)))
# 严重违法失信行为
page_source
=
browser
.
page_source
soup
=
BeautifulSoup
(
page_source
,
'html.parser'
)
table
=
soup
.
find
(
'table'
,
id
=
'tableInfo'
)
tr_list
=
table
.
find_all
(
'tr'
)
for
info
in
tr_list
[
1
:]:
td_list
=
info
.
find_all
(
'td'
)
number
=
td_list
[
0
]
.
text
name
=
td_list
[
1
]
.
text
code
=
td_list
[
2
]
.
text
address
=
td_list
[
3
]
.
text
behavior
=
td_list
[
4
]
.
text
punish_result
=
td_list
[
5
]
.
text
according_file
=
td_list
[
6
]
.
text
punish_date
=
td_list
[
7
]
.
text
publish_date
=
td_list
[
8
]
.
text
units
=
td_list
[
9
]
.
text
dic_info
=
{
'number'
:
number
,
'name'
:
name
,
'code'
:
code
,
'address'
:
address
,
'behavior'
:
behavior
,
'punish_result'
:
punish_result
,
'according_file'
:
according_file
,
'punish_date'
:
punish_date
,
'publish_date'
:
publish_date
,
'units'
:
units
}
comData/negative_news/creditchina.py
0 → 100644
浏览文件 @
54dd8a54
"""信用中国-严重失信名单
链接地址 :
{
source:
type: 严重失信主体名单
searchState: 1
entityType: 1
scenes: defaultscenario
keyword: 雷州市白金银座演艺文化实业有限公司
tyshxydm: 91440882315032592M
page: 1
pageSize: 10
}
"""
import
time
from
urllib
import
parse
import
requests
from
bs4
import
BeautifulSoup
from
retry
import
retry
from
base.BaseCore
import
BaseCore
baseCore
=
BaseCore
()
log
=
baseCore
.
getLogger
()
cnx
=
baseCore
.
cnx
cursor
=
baseCore
.
cursor
@retry
(
tries
=
3
,
delay
=
1
)
def
getRequest
(
url
,
headers
):
req
=
requests
.
get
(
url
=
url
,
headers
=
headers
,
verify
=
False
,
timeout
=
30
)
json_data
=
req
.
json
()
return
json_data
# 严重失信
def
dishonesty
():
param
=
{
'tableName'
:
'credit_zgf_fr_sxbzxr'
,
'searchState'
:
'1'
,
'scenes'
:
'defaultscenario'
,
'keyword'
:
'雷州市白金银座演艺文化实业有限公司'
,
'tyshxydm'
:
'91440882315032592M'
,
'page'
:
'1'
,
'pageSize'
:
'10'
}
url
=
f
'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_zgf_fr_sxbzxr&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page=1&pageSize=10'
json_data
=
getRequest
(
url
,
headers
)
# print(json_data)
if
json_data
[
'status'
]
==
1
:
pass
total_size
=
json_data
[
'data'
][
'totalSize'
]
for
page
in
total_size
:
param_page
=
{
'tableName'
:
'credit_zgf_fr_sxbzxr'
,
'searchState'
:
'1'
,
'scenes'
:
'defaultscenario'
,
'keyword'
:
'雷州市白金银座演艺文化实业有限公司'
,
'tyshxydm'
:
'91440882315032592M'
,
'page'
:
f
'{page}'
,
'pageSize'
:
'10'
}
url_page
=
f
'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_zgf_fr_sxbzxr&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page={param_page["page"]}&pageSize=10'
json_data
=
getRequest
(
url_page
,
headers
)
# print(json_data)
if
json_data
[
'status'
]
==
1
:
pass
info_list
=
json_data
[
'data'
][
'list'
]
for
info
in
info_list
:
entity
=
info
[
'entity'
]
iname
=
entity
[
'iname'
]
# 失信被执行人姓名/名称
cardnumber
=
entity
[
'cardnumber'
]
# 组织机构代码
court_name
=
entity
[
'court_name'
]
# 执行法院
area_name
=
entity
[
'area_name'
]
# 省份
case_code
=
entity
[
'case_code'
]
# 执行依据文号
reg_date
=
entity
[
'reg_date'
]
# 立案时间
gist_cid
=
entity
[
'gist_cid'
]
# 案号
gist_unit
=
entity
[
'gist_unit'
]
# 做出执行依据单位
duty
=
entity
[
'duty'
]
# 生效法律文书确定的义务
performance
=
entity
[
'performance'
]
# 被执行人的履行情况
disreput_type_name
=
entity
[
'disreput_type_name'
]
# 失信被执行人行为具体情形
publish_date
=
entity
[
'publish_date'
]
# 发布时间
performed_part
=
entity
[
'performed_part'
]
# 已履行部分
unperform_part
=
entity
[
'unperform_part'
]
# 未履行部分
dataSource
=
info
[
'dataSource'
]
# 数据来源
# 行政处罚
def
punish
():
param
=
{
'tableName'
:
'credit_xyzx_fr_xzcf_new'
,
'searchState'
:
'1'
,
'scenes'
:
'defaultscenario'
,
'keyword'
:
'雷州市白金银座演艺文化实业有限公司'
,
'tyshxydm'
:
'91440882315032592M'
,
'page'
:
'1'
,
'pageSize'
:
'10'
}
url
=
f
'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_xyzx_fr_xzcf_new&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page=1&pageSize=10'
json_data
=
getRequest
(
url
,
headers
)
# print(json_data)
if
json_data
[
'status'
]
==
1
:
pass
#总条数
total_size
=
json_data
[
'data'
][
'totalSize'
]
if
total_size
>
0
:
pass
else
:
log
.
info
()
for
page
in
total_size
:
param_page
=
{
'tableName'
:
'credit_xyzx_fr_xzcf_new'
,
'searchState'
:
'1'
,
'scenes'
:
'defaultscenario'
,
'keyword'
:
'雷州市白金银座演艺文化实业有限公司'
,
'tyshxydm'
:
'91440882315032592M'
,
'page'
:
f
'{page}'
,
'pageSize'
:
'10'
}
url_page
=
f
'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_xyzx_fr_xzcf_new&searchState=1&scenes=defaultscenario&keyword={param_page["keyword"]}&tyshxydm={param_page["tyshxydm"]}&page={param_page["page"]}&pageSize=10'
json_data
=
getRequest
(
url_page
,
headers
)
# print(json_data)
if
json_data
[
'status'
]
==
1
:
pass
info_list
=
json_data
[
'data'
][
'list'
]
for
entity
in
info_list
:
cf_wsh
=
entity
[
'cf_wsh'
]
# 行政处罚决定书文号
cf_cflb
=
entity
[
'cf_cflb'
]
# 处罚类别
cf_jdrq
=
entity
[
'cf_jdrq'
]
# 处罚决定日期
cf_nr
=
entity
[
'cf_nr'
]
# 处罚内容
cf_nr_fk
=
entity
[
'cf_nr_fk'
]
# 罚款金额(万元)
cf_nr_wfff
=
entity
[
'cf_nr_wfff'
]
# 没收违法所得、没收非法财物的金额(万元)
cf_nr_zkdx
=
entity
[
'cf_nr_zkdx'
]
# 暂扣或吊销证照名称及编号
cf_wfxw
=
entity
[
'cf_wfxw'
]
# 违法行为类型
cf_sy
=
entity
[
'cf_sy'
]
# 违法事实
cf_yj
=
entity
[
'cf_yj'
]
# 处罚依据
cf_cfjg
=
entity
[
'cf_cfjg'
]
# 处罚机关
cf_cfjgdm
=
entity
[
'cf_cfjgdm'
]
# 处罚机关统一社会信用代码
cf_sjly
=
entity
[
'cf_sjly'
]
# 数据来源
cf_sjlydm
=
entity
[
'cf_sjlydm'
]
# 数据来源单位统一社会信用代码
if
__name__
==
'__main__'
:
headers
=
{
'Referer'
:
'https://www.creditchina.gov.cn/'
,
'Sec-Fetch-Dest'
:
'empty'
,
'Sec-Fetch-Mode'
:
'cors'
,
'Sec-Fetch-Site'
:
'same-site'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
,
'sec-ch-ua'
:
'"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"'
,
'sec-ch-ua-mobile'
:
'?0'
,
'sec-ch-ua-platform'
:
'"Windows"'
}
type_list
=
[
'严重失信主体名单'
,
'行政管理'
]
com_name
=
''
social_code
=
''
dishonesty
()
punish
()
# 报告链接
url_report
=
f
'https://public.creditchina.gov.cn/credit-check/pdf/clickDownload?companyName={com_name}&entityType=1&uuid=&tyshxydm={social_code}'
report_json
=
getRequest
(
url_report
,
headers
)
reportNumber
=
report_json
[
'data'
][
'reportNumber'
]
pdf_url
=
f
'https://public.creditchina.gov.cn/credit-check/pdf/clickDownloadOBS?reportNumber={reportNumber}'
# respon = requests.get(url=pdf_url,headers=headers,verify=False,timeout=30)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论