提交 54dd8a54 作者: 薛凌堃

企业负面信息

上级 5c05e843
"""
中国政府采购网
"""
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def createDriver():
chrome_driver = r'D:\cmd100\chromedriver.exe'
path = Service(chrome_driver)
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
# 设置代理
# proxy = "127.0.0.1:8080" # 代理地址和端口
# chrome_options.add_argument('--proxy-server=http://' + proxy)
driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
return driver
def postRrequest(url,headers,com_name):
payload = {
'orgName': com_name,
'enforceUnit': '',
'punishTime': '',
'punishTimeMax': '',
# 'gp': 1
}
response = requests.post(url=url,headers=headers,json=payload)
result = response.text
return result
if __name__=="__main__":
# 模拟浏览器 -- 模拟点击
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '183',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'BizAc_cookie=bXBL4nKhXAx0l6BxLapa7EUva6ZNeLWOEUIogHMY9uhekQVTNHyI!79161097!1697785213601; HMF_CI=2b97dd74cf1745069afc2b52a639692c1fe863ae5ec8f5a99fb173dd50c56f387b0f9597470f17b4f940d0828a06c511577ebce9b48edcf05bac7fa2d981b15286; Hm_lvt_9f8bda7a6bb3d1d7a9c7196bfed609b5=1697785203; JSESSIONIDGS6Credit=_6NRWtzkuNQaDVVmVy6sFRKigu7hsSHVYC_QElYDNYDeDpfllLDo!79161097; Hm_lpvt_9f8bda7a6bb3d1d7a9c7196bfed609b5=1697877010',
'Host': 'www.ccgp.gov.cn',
'Origin': 'https://www.ccgp.gov.cn',
'Referer': 'https://www.ccgp.gov.cn/cr/list',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
com_name = '新疆塔建三五九建工有限责任公司'
social_code = ''
# url = 'https://www.ccgp.gov.cn/cr/list'
url = 'https://www.ccgp.gov.cn/search/cr/'
browser = createDriver()
browser.get(url)
browser.switch_to.frame(browser.find_element(By.TAG_NAME, 'iframe'))
wait = WebDriverWait(browser, 30)
wait.until(EC.presence_of_element_located((By.ID, "orgName")))
browser.find_element(By.ID, 'orgName').send_keys('忠县威华汽车维修厂')
browser.find_element(By.ID, 'searchForm').click()
wait = WebDriverWait(browser, 30)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
# 严重违法失信行为
page_source = browser.page_source
soup = BeautifulSoup(page_source, 'html.parser')
table = soup.find('table',id='tableInfo')
tr_list = table.find_all('tr')
for info in tr_list[1:]:
td_list = info.find_all('td')
number = td_list[0].text
name = td_list[1].text
code = td_list[2].text
address = td_list[3].text
behavior = td_list[4].text
punish_result = td_list[5].text
according_file = td_list[6].text
punish_date = td_list[7].text
publish_date = td_list[8].text
units = td_list[9].text
dic_info = {
'number': number,
'name': name,
'code': code,
'address': address,
'behavior': behavior,
'punish_result': punish_result,
'according_file': according_file,
'punish_date': punish_date,
'publish_date': publish_date,
'units': units
}
"""信用中国-严重失信名单
链接地址 :
{
source:
type: 严重失信主体名单
searchState: 1
entityType: 1
scenes: defaultscenario
keyword: 雷州市白金银座演艺文化实业有限公司
tyshxydm: 91440882315032592M
page: 1
pageSize: 10
}
"""
import time
from urllib import parse
import requests
from bs4 import BeautifulSoup
from retry import retry
from base.BaseCore import BaseCore
baseCore = BaseCore()
log = baseCore.getLogger()
cnx = baseCore.cnx
cursor = baseCore.cursor
@retry(tries=3,delay=1)
def getRequest(url,headers):
req = requests.get(url=url,headers=headers,verify=False,timeout=30)
json_data = req.json()
return json_data
# 严重失信
def dishonesty():
param = {
'tableName':'credit_zgf_fr_sxbzxr',
'searchState': '1',
'scenes': 'defaultscenario',
'keyword': '雷州市白金银座演艺文化实业有限公司',
'tyshxydm': '91440882315032592M',
'page': '1',
'pageSize': '10'
}
url = f'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_zgf_fr_sxbzxr&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page=1&pageSize=10'
json_data = getRequest(url, headers)
# print(json_data)
if json_data['status'] == 1:
pass
total_size = json_data['data']['totalSize']
for page in total_size:
param_page = {
'tableName': 'credit_zgf_fr_sxbzxr',
'searchState': '1',
'scenes': 'defaultscenario',
'keyword': '雷州市白金银座演艺文化实业有限公司',
'tyshxydm': '91440882315032592M',
'page': f'{page}',
'pageSize': '10'
}
url_page = f'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_zgf_fr_sxbzxr&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page={param_page["page"]}&pageSize=10'
json_data = getRequest(url_page, headers)
# print(json_data)
if json_data['status'] == 1:
pass
info_list = json_data['data']['list']
for info in info_list:
entity = info['entity']
iname = entity['iname'] # 失信被执行人姓名/名称
cardnumber = entity['cardnumber'] # 组织机构代码
court_name = entity['court_name'] # 执行法院
area_name = entity['area_name'] # 省份
case_code = entity['case_code'] # 执行依据文号
reg_date = entity['reg_date'] # 立案时间
gist_cid = entity['gist_cid'] # 案号
gist_unit = entity['gist_unit'] # 做出执行依据单位
duty = entity['duty'] # 生效法律文书确定的义务
performance = entity['performance'] # 被执行人的履行情况
disreput_type_name = entity['disreput_type_name'] # 失信被执行人行为具体情形
publish_date = entity['publish_date'] # 发布时间
performed_part = entity['performed_part'] # 已履行部分
unperform_part = entity['unperform_part'] # 未履行部分
dataSource = info['dataSource'] # 数据来源
# 行政处罚
def punish():
param = {
'tableName':'credit_xyzx_fr_xzcf_new',
'searchState': '1',
'scenes': 'defaultscenario',
'keyword': '雷州市白金银座演艺文化实业有限公司',
'tyshxydm': '91440882315032592M',
'page': '1',
'pageSize': '10'
}
url = f'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_xyzx_fr_xzcf_new&searchState=1&scenes=defaultscenario&keyword={param["keyword"]}&tyshxydm={param["tyshxydm"]}&page=1&pageSize=10'
json_data = getRequest(url, headers)
# print(json_data)
if json_data['status'] == 1:
pass
#总条数
total_size = json_data['data']['totalSize']
if total_size > 0:
pass
else:
log.info()
for page in total_size:
param_page = {
'tableName': 'credit_xyzx_fr_xzcf_new',
'searchState': '1',
'scenes': 'defaultscenario',
'keyword': '雷州市白金银座演艺文化实业有限公司',
'tyshxydm': '91440882315032592M',
'page': f'{page}',
'pageSize': '10'
}
url_page = f'https://public.creditchina.gov.cn/private-api/catalogSearch?tableName=credit_xyzx_fr_xzcf_new&searchState=1&scenes=defaultscenario&keyword={param_page["keyword"]}&tyshxydm={param_page["tyshxydm"]}&page={param_page["page"]}&pageSize=10'
json_data = getRequest(url_page, headers)
# print(json_data)
if json_data['status'] == 1:
pass
info_list = json_data['data']['list']
for entity in info_list:
cf_wsh = entity['cf_wsh'] # 行政处罚决定书文号
cf_cflb = entity['cf_cflb'] # 处罚类别
cf_jdrq = entity['cf_jdrq'] # 处罚决定日期
cf_nr = entity['cf_nr'] # 处罚内容
cf_nr_fk = entity['cf_nr_fk'] # 罚款金额(万元)
cf_nr_wfff = entity['cf_nr_wfff'] # 没收违法所得、没收非法财物的金额(万元)
cf_nr_zkdx = entity['cf_nr_zkdx'] # 暂扣或吊销证照名称及编号
cf_wfxw = entity['cf_wfxw'] # 违法行为类型
cf_sy = entity['cf_sy'] # 违法事实
cf_yj = entity['cf_yj'] # 处罚依据
cf_cfjg = entity['cf_cfjg'] # 处罚机关
cf_cfjgdm = entity['cf_cfjgdm'] # 处罚机关统一社会信用代码
cf_sjly = entity['cf_sjly'] # 数据来源
cf_sjlydm = entity['cf_sjlydm'] # 数据来源单位统一社会信用代码
if __name__=='__main__':
headers = {
'Referer': 'https://www.creditchina.gov.cn/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
type_list = ['严重失信主体名单','行政管理']
com_name = ''
social_code = ''
dishonesty()
punish()
# 报告链接
url_report = f'https://public.creditchina.gov.cn/credit-check/pdf/clickDownload?companyName={com_name}&entityType=1&uuid=&tyshxydm={social_code}'
report_json = getRequest(url_report, headers)
reportNumber = report_json['data']['reportNumber']
pdf_url = f'https://public.creditchina.gov.cn/credit-check/pdf/clickDownloadOBS?reportNumber={reportNumber}'
# respon = requests.get(url=pdf_url,headers=headers,verify=False,timeout=30)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论