import datetime
import json
import re
import time

import ddddocr
import pandas as pd
import requests
from bs4 import BeautifulSoup
from retry import retry
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select

from base import BaseCore

baseCore = BaseCore.BaseCore()
cnx = baseCore.cnx
cursor = baseCore.cursor
log = baseCore.getLogger()
ocr = ddddocr.DdddOcr(show_ad=False)


# 识别验证码
@retry(tries=3, delay=10)
def getCode(code_img):
    code = ocr.classification(code_img)
    return code


# 获取页面soup
def getSoup(url, headers):
    req = requests.get(url, headers=headers)
    req.encoding = req.apparent_encoding
    soup = BeautifulSoup(req.text, 'html.parser')
    return soup


# 关闭浏览器并重新生成浏览器
def reBuildDriver(driver, url):
    try:
        driver.close()
    except:
        pass
    driver.quit()
    time.sleep(1)
    driver = baseCore.buildDriver()
    driver.get(url)


# http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/wwquery.jsp 北京       省份  企业名称
def beijing_aj():
    # 获取页面soup
    def getSoup(url, data_post):
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        return soup

    # 获取数据
    def getData(id):
        data = {}
        data_post = f'id={id}&dq=null&ajlx=null&ndjd=null&bz=zh&dqy=&ymdx=&nsrmc=&nsrsbh=110112355288218&zcdz=&zzjgdm=&fddbrmc=&fddbrsfzhm=&cwfzrmc=&cwfzrsfzhm=&orgCode=11100000000'
        soup = getSoup(url_, data_post)
        tr_list_ = soup.find('table').find('table').find_all('tr')
        for tr_ in tr_list_:
            td_list = tr_.find_all('td')
            data[f'{td_list[0].text}'] = td_list[1].text
        return data

    url = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/wwquery'
    url_ = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/wwidquery'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Connection': 'keep-alive',
    }

    # dqy 页数
    nsrsbh = '110112355288218'
    data_post = f'orgCode=11100000000&bz=zh&nsrmc=&nsrsbh={nsrsbh}&zcdz=&zzjgdm=&fddbrmc=&fddbrsfzhm=&cwfzrmc=&cwfzrsfzhm='
    soup = getSoup(url, data_post)
    tr_list = soup.select('body > table > tbody > tr > td > table')[1].select('tbody > tr')
    for tr in tr_list:
        id = tr.select('td')[-1].select('input')[0].get('onclick')
        try:
            id = re.findall('\d+', id)[0]
            data = getData(id)
            print(data)
        except:
            continue


def beijing_qs():
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'beijing.chinatax.gov.cn',
        'Origin': 'http://beijing.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Referer': 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/qsgg/query.jsp',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }

    # 获取总数
    def getTotal(nsrsbh, fbdw, nsrlx):
        url = f'http://beijing.chinatax.gov.cn/bjsat/office/jsp/qsgg/query.jsp?page_num=1&dwmc=&fbdw={fbdw}&nsrlx={nsrlx}&nsrsbh={nsrsbh}&fzrxm=&zjhm='
        soup = getSoup(url, headers)
        form = soup.find('form', attrs={'name': 'condition'})
        table = form.select('table')[-1]
        tr_list = table.find_all('tr')
        total = int(re.findall('共(.*?)页', tr_list[-1].find('td').text)[0])
        return total

    # 获取数据
    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        table = soup.select('body > table  > tr > td > table  > tr > td > table')[0]
        tr_list = table.find_all('tr')
        for tr in tr_list:
            name = tr.find('td', attrs={'align': 'right'}).text.replace('：', '').lstrip().strip()
            value = tr.find('td', attrs={'align': 'left'}).text.lstrip().strip()
            data[f'{name}'] = value
        return data

    # url = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/qsgg/query.jsp?page_num=1&dwmc=&fbdw=%B1%B1%BE%A9%CA%D0%CB%B0%CE%F1%BE%D6&nsrlx=%C6%F3%D2%B5%BB%F2%B5%A5%CE%BB&nsrsbh=&fzrxm=&zjhm='
    # data_post = 'dwmc=&nsrsbh=91110106078500371J&fzrxm=&zjhm=&fbdw=%B1%B1%BE%A9%CA%D0%CB%B0%CE%F1%BE%D6&nsrlx=%C6%F3%D2%B5%BB%F2%B5%A5%CE%BB&validate='
    nsrsbh = '91110106078500371J'
    fbdw = '%B1%B1%BE%A9%CA%D0%CB%B0%CE%F1%BE%D6'
    nsrlx = '%C6%F3%D2%B5%BB%F2%B5%A5%CE%BB'
    total = getTotal(nsrsbh, fbdw, nsrlx)
    for page in range(1, total + 1):
        url = f'http://beijing.chinatax.gov.cn/bjsat/office/jsp/qsgg/query.jsp?page_num={page}&dwmc=&fbdw={fbdw}&nsrlx={nsrlx}&nsrsbh={nsrsbh}&fzrxm=&zjhm='
        soup = getSoup(url, headers)
        form = soup.find('form', attrs={'name': 'condition'})
        table = form.select('table')[-1]
        tr_list = table.find_all('tr')
        for tr in tr_list:
            if '首页' not in tr.text and '查询结果' not in tr.text:
                try:
                    a = tr.find('a')
                    href = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/qsgg/' + a.get('href')
                    data = getData(href)
                    print(data)
                except:
                    pass


# http://tianjin.chinatax.gov.cn/ 天津   不可以


# http://hebei.chinatax.gov.cn/ 河北
def hebei_aj():
    def getTotal(session, NSRSBH):
        url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid='
        data_post = {
            'column': 'adPenalty/taxDishonestyCases',
            'queryCity': '113',
            'queryType': '',
            'queryDate': '',
            'NSRMC': '',
            'NSRSBH': NSRSBH,
            'ZCDZ': '',
            'FDDBR': '',
            'FDDBZJH': '',
            'CWFZR': '',
            'CWFZRZJH': '',
            'pageSize': '10',
            'pageNum': '1',
            'orderByColumn': '',
            'isAsc': 'asc'
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()
        total = data_json['total']
        return int(total)

    def getDatas(session, NSRSBH, page):
        url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid='
        data_post = {
            'column': 'adPenalty/taxDishonestyCases',
            'queryCity': '113',
            'queryType': '',
            'queryDate': '',
            'NSRMC': '',
            'NSRSBH': NSRSBH,
            'ZCDZ': '',
            'FDDBR': '',
            'FDDBZJH': '',
            'CWFZR': '',
            'CWFZRZJH': '',
            'pageSize': page,
            'pageNum': '1',
            'orderByColumn': '',
            'isAsc': 'asc'
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()
        datas = data_json['rows']
        return datas

    def getData(url, headers):
        data = {}
        soup = getSoup(url, headers)
        table = soup.find('table', class_='tab_zfqxbox')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text
            value = td_list[1].text
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Content-Type': 'application/x-www-form-urlencoded',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46',
        'X-Requested-With': 'XMLHttpRequest'
    }
    session = requests.session()
    URL = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adLevy/owingTaxes'
    session.get(URL, headers=headers)
    # NSRSBH = '92130321MA082RXX9T'
    NSRSBH = ''
    total = getTotal(session, NSRSBH)
    if total % 50 == 0:
        pageSize = int(total / 50)
    else:
        pageSize = int(total / 50) + 1
    for page in range(1, pageSize + 1):
        datas = getDatas(session, NSRSBH, page)
        for data in datas:
            uid = data['uid']
            href = f'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findtaxDishonestyCasesContent?uid={uid}'
            data_send = getData(href, headers)
            print(data_send)
            break
        break


def hebei_qs():
    def getTotal(session, NSRSBH):
        url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/levy/findOwingTaxes?cid=15&uid='
        data_post = {
            'column': 'adLevy/owingTaxes',
            'NSRMC': '',
            'NSRSBH': NSRSBH,
            'FDDBRMC': '',
            'NATURE': 'ztsz',
            'pageSize': '50',
            'pageNum': '1',
            'orderByColumn': '',
            'isAsc': 'asc'
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()
        total = data_json['total']
        return int(total)

    def getDatas(session, NSRSBH, page):
        url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/levy/findOwingTaxes?cid=15&uid='
        data_post = {
            'column': 'adLevy/owingTaxes',
            'NSRMC': '',
            'NSRSBH': NSRSBH,
            'FDDBRMC': '',
            'NATURE': 'ztsz',
            'pageSize': '50',
            'pageNum': f'{page}',
            'orderByColumn': '',
            'isAsc': 'asc'
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()
        datas = data_json['rows']
        return datas

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Content-Type': 'application/x-www-form-urlencoded',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46',
        'X-Requested-With': 'XMLHttpRequest'
    }
    session = requests.session()
    URL = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adLevy/owingTaxes'
    session.get(URL, headers=headers)
    # NSRSBH = '92130321MA082RXX9T'
    NSRSBH = ''
    total = getTotal(session, NSRSBH)
    if total % 50 == 0:
        pageSize = int(total / 50)
    else:
        pageSize = int(total / 50) + 1
    for page in range(1, pageSize + 1):
        datas = getDatas(session, NSRSBH, page)
        data_send = {}
        for data in datas:
            data_send['企业或是单位名称'] = data['NSRMC']
            data_send['统一社会信用代码纳税人识别号'] = data['JMSFZH']
            data_send['法定代表人或负责人姓名'] = data['FDDBRMC']
            data_send['居民身份证或其他有效身份证件号码'] = data['JMSFZH']
            data_send['经营地点'] = data['JYDD']
            data_send['欠税税种'] = data['QSSZ']
            data_send['欠税余额'] = data['QSYE']
            data_send['当期新发生的欠税金额'] = data['DQXFSDQSJE']
            print(data_send)
        time.sleep(2)
    time.sleep(2)


def hebei_cf():
    def getTotal(relationId):
        data_post = {
            'column': 'adPenalty/taxPenaltyResult',
            'AJMC': '',
            'NSRMC': '',
            'NSRSBH': relationId,
            'CFSWJG': '',
            'pageSize': '10',
            'pageNum': '1',
            'orderByColumn': '',
            'isAsc': 'asc',
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        total = int(req.json()['total'])
        return total

    def getDataJson(relationId, page):
        data_post = {
            'column': 'adPenalty/taxPenaltyResult',
            'AJMC': '',
            'NSRMC': '',
            'NSRSBH': relationId,
            'CFSWJG': '',
            'pageSize': '10',
            'pageNum': f'{page}',
            'orderByColumn': '',
            'isAsc': 'asc',
        }
        req = session.post(url, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()['rows']
        return data_json

    headers = {
        'Accept': 'text/javascript, application/javascript, */*',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxPenaltyResultList?cid=26&uid='

    URL = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adLevy/owingTaxes'
    session = requests.session()
    session.get(URL, headers=headers)
    relationIds = ['91220802316729682A']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        if total % 10 == 0:
            pageSize = int(total / 10)
        else:
            pageSize = int(total / 10) + 1
        for page in range(1, pageSize + 1):
            datas_json = getDataJson(relationId, page)
            for data_json in datas_json:
                data = {}
                data['决定书文号'] = data_json['XZCFJDSWH']
                data['案件名称'] = data_json['AJMC']
                data['处罚类别'] = data_json['CFLB']
                data['处罚事由'] = data_json['CFSY']
                data['处罚依据'] = data_json['CFYJ']
                data['行政相对人名称'] = data_json['NSRMC']
                data['行政相对人代码'] = data_json['NSRSBH']
                data['法定代表人姓名'] = data_json['FDDBRXM']
                data['处罚结果'] = data_json['CFJG']
                data['处罚机关'] = data_json['CFSWJG']
                data['处罚生效期'] = data_json['ZCGCFRQ']
                data_list.append(data)
        print(data_list)


# http://shanxi.chinatax.gov.cn/ 山西
def shanxi_aj():
    def inputId(driver, relationId):
        driver.find_element(By.ID, 'NSRSBH').send_keys(relationId)
        driver.find_element(By.CLASS_NAME, 'xxgbl_query_btn').find_elements(By.TAG_NAME, 'a')[-1].click()
        time.sleep(2)

    def sendData(driver):
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'list_table'))
        )
        tr_list = driver.find_element(By.ID, 'list_table').find_elements(By.TAG_NAME, 'tr')
        del (tr_list[0])
        for tr in tr_list:
            data = {}
            tr.find_elements(By.TAG_NAME, 'td')[-1].find_element(By.TAG_NAME, 'a').click()
            div_table = driver.find_element(By.CLASS_NAME, 'xxgbl_box_show').find_element(By.TAG_NAME, 'table')
            tr_list_ = div_table.find_elements(By.TAG_NAME, 'tr')
            for tr_ in tr_list_:
                name = tr_.find_element(By.TAG_NAME, 'th').text
                value = tr_.find_element(By.TAG_NAME, 'td').text
                data[f'{name}'] = value
            time.sleep(1)
        driver.find_element(By.CLASS_NAME, 'xxgbl_box_show_btn').find_elements(By.TAG_NAME, 'a')[-1].click()

    driver = baseCore.buildDriver()
    url = 'http://shanxi.chinatax.gov.cn/topic/zdsswf/sx-11400'
    driver.get(url)
    time.sleep(1)
    relationId = '91140100MA0GX1W91F'
    inputId(driver, relationId)
    while True:
        sendData(driver)
        try:
            driver.find_element(By.ID, 'page').find_element(By.ID, 'next').click()
            time.sleep(2)
        except:
            break
    driver.close()


# http://neimenggu.chinatax.gov.cn/ 内蒙古
def neimenggu_aj():
    def getData(url, headers):
        soup = getSoup(url, headers)
        div = soup.find('div', class_='cc')
        table = div.find('table')
        data = {}
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            if td_list[0] == '':
                continue
            name = td_list[0].text
            value = td_list[1].text
            data[f'{name}'] = value
        return data

    def getSoup_Post(relationId, headers):
        url = 'http://neimenggu.chinatax.gov.cn/was5/search'
        data_post = {
            'channelid': '214090',
            'sortfield': '-DOCRELTIME',
            'searchword': '',
            'title': '',
            'zdaj_nsrsbh': relationId,
            'zdaj_zcdz': '',
            'zdaj_zzjgdm': '',
            'zdaj_fddb_xm': '',
            'zdaj_fddb_sfzh': '',
            'zdaj_cwfzr_xm': '',
            'zdaj_cwfzr_sfzh': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        return soup

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Length': '174',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'neimenggu.chinatax.gov.cn',
        'Origin': 'http://neimenggu.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Referer': 'http://neimenggu.chinatax.gov.cn/nsfw/sscx/zdaj/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    relationId = '91150622MA0N63T2X9'
    soup = getSoup_Post(relationId, headers)
    table = soup.find('div', attrs={'id': 'jjuuu'}).find('table')
    tr_list = table.find_all('tr')
    for tr in tr_list:
        try:
            a = tr.find('td', class_='fsnd').find('a')
            href = a.get('href')
            data = getData(href, headers)
            print(data)
        except:
            pass


# http://liaoning.chinatax.gov.cn/ 辽宁
def liaoning_aj():
    def inputId(driver, relationId):
        driver.find_element(By.ID, 'FormSearchTest').find_element(By.NAME, 'b').send_keys(relationId)
        driver.find_element(By.ID, 'fgk-select').click()
        time.sleep(2)
        driver.switch_to.frame('contentList')

    def getDataList(url):
        data_list = []
        soup = getSoup(url, headers)
        table = soup.find_all('table')[-1]
        a_list = table.find_all('a')
        for a in a_list:
            href = a.get('href')
            if 'http' not in href:
                href = href.replace('../../', 'http://liaoning.chinatax.gov.cn/')
            data = getData(href)
            data_list.append(data)
        return data_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        table_ = soup.find('div', class_='bt-article-02').find('table')
        tr_list_ = table_.find_all('tr')

        for tr_ in tr_list_:
            td_list = tr_.find_all('td')
            try:
                name = td_list[0].text.lstrip().strip()
                value = td_list[1].text
                data[f'{name}'] = value
            except:
                pass
        return data

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }

    url = 'http://liaoning.chinatax.gov.cn/col/col5883/index.html?LMCL=VHE7P5'
    driver = baseCore.buildDriver()
    driver.get(url)
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'FormSearchTest'))
    )
    relationIds = ['91210300MA0U0N6N3E']
    for relationId in relationIds:
        data_list = []
        inputId(driver, relationId)
        if '没有找到相关数据' in driver.find_element(By.CLASS_NAME, 'tipIntState').text:
            log.info(f'{relationId}===无数据')
        href = driver.find_element(By.TAG_NAME, 'form').get_attribute('action') + '&currpage=1'
        total = driver.find_element(By.TAG_NAME, 'form').find_elements(By.TAG_NAME, 'span')[-1].text
        total = int(re.findall('\d+', total)[0])
        for page in range(1, total + 1):
            # getDataList(href)
            data_list += getDataList(href)
            href = href.replace(f'currpage={page}', f'currpage={page + 1}')
        print(data_list)
        driver.switch_to.parent_frame()
        time.sleep(2)
    try:
        driver.close()
    except:
        pass
    driver.quit()
    pass


# http://jilin.chinatax.gov.cn/ 吉林
def jilin_aj():
    def inputId(driver, relationId):
        driver.find_element(By.ID, 'FormSearchTest').find_element(By.NAME, 'field_1114').send_keys(relationId)
        driver.find_element(By.ID, 'fgk-select').click()
        time.sleep(2)
        driver.switch_to.frame('contentList')

    def getDataList(url):
        data_list = []
        soup = getSoup(url, headers)
        table = soup.find_all('table')[-1]
        a_list = table.find_all('a')
        for a in a_list:
            href = a.get('href')
            if 'http' not in href:
                href = href.replace('../../', 'http://jilin.chinatax.gov.cn/')
            data = getData(href)
            data_list.append(data)
        return data_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        table = soup.find('div', class_='section').find('table')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            try:
                name = td_list[0].text.lstrip().strip()
                td_list[1].find('script').decompose()
                value = td_list[1].text
                data[f'{name}'] = value
            except:
                pass
        return data

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }

    url = 'http://jilin.chinatax.gov.cn/col/col19972/index.html'
    driver = baseCore.buildDriver()
    driver.get(url)
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'FormSearchTest'))
    )
    relationIds = ['91220802316729682A']
    for relationId in relationIds:
        data_list = []
        inputId(driver, relationId)
        href = driver.find_element(By.TAG_NAME, 'form').get_attribute('action') + '&currpage=1'
        total = driver.find_element(By.TAG_NAME, 'form').find_elements(By.TAG_NAME, 'span')[-1].text
        total = int(re.findall('\d+', total)[0])
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            # a = getDataList(href)
            # print(a)
            data_list += getDataList(href)
            href = href.replace(f'currpage={page}', f'currpage={page + 1}')
        print(data_list)
        driver.switch_to.parent_frame()
        time.sleep(2)
    try:
        driver.close()
    except:
        pass
    driver.quit()


def jilin_qs():
    def getTotal(relationId):
        data_post = {
            'nsrlx': '1',
            'shxydm': f'{relationId}',
            'nsrmc': '',
            'swjg': '12200000000',
            'type': 'gs',
            'jc': '0',
            'pageSize': '20',
            'pageIndex': '0',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        total = int(req.json()['total'])
        return total

    def getDataList(relationId, page):
        data_list = []
        data_post = {
            'nsrlx': '1',
            'shxydm': f'{relationId}',
            'nsrmc': '',
            'swjg': '12200000000',
            'type': 'gs',
            'jc': '0',
            'pageSize': '20',
            'pageIndex': f'{page}',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        datas_json = req.json()['data']
        for data_json in datas_json:
            data = {}
            data['企业或单位的名称'] = data_json['NSRMC']
            data['统一社会信用代码（纳税人识别号）'] = data_json['TYSHXYDM']
            data['法定代表人或负责人姓名'] = data_json['FDDBRXM']
            data['居民身份证或其他有效身份证件号码'] = data_json['SFZJHM']
            data['经营地点'] = data_json['JYDD']
            data['欠税税种'] = data_json['QSSZ']
            data['欠税余额'] = data_json['QSYE']
            data['当期新发生的欠税金额'] = data_json['DQXQ']
            data_list.append(data)
        return data_list

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'etax.jilin.chinatax.gov.cn:10853',
        'Origin': 'https://etax.jilin.chinatax.gov.cn:10853',
        'Pragma': 'no-cache',
        'Referer': 'https://etax.jilin.chinatax.gov.cn:10853/zfgspt/extranet/qsxx',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
        'X-Requested-With': 'XMLHttpRequest',
        'sec-ch-ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }
    url = 'https://etax.jilin.chinatax.gov.cn:10853/zfgspt/extranet/shgs/query_qsxx.spring'
    relationIds = ['9122010166011250XC', '9122010166011250XA']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        if total % 20 == 0:
            pageSize = total / 20
        else:
            pageSize = int(total / 20) + 1
        for page in range(0, pageSize):
            data_list += getDataList(relationId, page)
        print(data_list)


# http://heilongjiang.chinatax.gov.cn/ 黑龙江
def heilongjiang_aj():
    def getAList(relationId):
        a_list = []
        url = 'http://heilongjiang.chinatax.gov.cn/module/jslib/bulletin/ajaxfors.jsp?startrecord=1&endrecord=1&perpage=11'
        data_post = {
            'tablename': 'jcms_141',
            'nsrmc': '',
            'nsrsbh': f'{relationId}',
            'zcdz': '',
            'zzjgdm': '',
            'fddbrmc': '',
            'fddbrsfzhm': '',
            'cwfzrmc': '',
            'cwfzrsfzhm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        if 'dataStore = [];' not in req.text:
            soup = BeautifulSoup(re.findall('\[ \"(.*)\"\]\;', req.text)[0], 'html.parser')
            a_list = soup.select('a')
        return a_list

    def getData(a):
        data = {}
        href = a.get('href')
        soup_ = getSoup(href, headers)
        table = soup_.find('div', class_='artcontent').find('table')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text
            value = td_list[1].text
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/javascript, application/javascript, */*',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'heilongjiang.chinatax.gov.cn',
        'Origin': 'http://heilongjiang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
        'X-Requested-With': 'XMLHttpRequest',
    }

    relationIds = ['91230125MA1B8PTF8F', '91230125MA1B8PTF8A']
    for relationId in relationIds:
        data_list = []
        a_list = getAList(relationId)
        for a in a_list:
            data = getData(a)
            data_list.append(data)
        if data_list:
            print(data_list)
        else:
            print(f'{relationId}===无数据')


def heilongjiang_qs():
    def getTotal(bz):
        url = f'https://etax.heilongjiang.chinatax.gov.cn/nologin/xxcx/qsgg_list.jsp?rq=&bz={bz}'
        data_post = {
            'rq': '',
            'bz': f'{bz}',
            't_jump': '1',
            'pc': '20',
            'cc': '1',
            'tc': '400',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        total = int(re.findall('共(\d+)页', soup.find('td', class_='TR_ODD').text)[0])
        return total

    def getSoup(bz):
        url = f'https://etax.heilongjiang.chinatax.gov.cn/nologin/xxcx/qsgg_list.jsp?rq=&bz={bz}'
        data_post = {
            'rq': '',
            'bz': f'{bz}',
            't_jump': '1',
            'pc': '20',
            'cc': f'{page}',
            'tc': '400',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        return soup

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'etax.heilongjiang.chinatax.gov.cn',
        'Origin': 'https://etax.heilongjiang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
        'X-Requested-With': 'XMLHttpRequest',
    }

    bzs = ['qy', 'gt']
    num = 0
    pub_time = datetime.date.today()
    for bz in bzs:
        data_list = []
        total = getTotal(bz)
        for page in range(1, total + 1):
            soup = getSoup(bz)
            table = soup.find('table', class_='layui-table')
            tr_list = table.find_all("tr")
            del (tr_list[0])
            for tr in tr_list:
                data = {}
                td_list = tr.find_all('td')
                nsrmc = td_list[2].text.lstrip().strip()
                shxydm = td_list[1].text.lstrip().strip()
                fddbrw = td_list[3].text.lstrip().strip()
                fddbrwsfzh = td_list[4].text.lstrip().strip()
                scjydz = td_list[5].text.lstrip().strip()
                qsje = td_list[6].text.lstrip().strip()
                xzqs = td_list[7].text.lstrip().strip()
                sql = f"INSERT INTO QS_Notice(nsrName,nsrsbh,fdName,IDNumber,address,qsje,xzqs,province,publishDate) VALUES('{nsrmc}','{shxydm}','{fddbrw}','{fddbrwsfzh}','{scjydz}','{qsje}','{xzqs}','黑龙江省','{pub_time}')"
                cursor.execute(sql)
                cnx.commit()
                log.info('新增一条数据')
                num += 1
    log.info(f'共获取{num}条数据')


# http://shanghai.chinatax.gov.cn/ 上海
def shanghai_aj():
    # 验证码识别不准确
    @retry(tries=3, delay=10)
    def getCode(code_img):
        code = ocr.classification(code_img)
        if len(code) < 4:
            raise
        return code

    ocr = ddddocr.DdddOcr(show_ad=False)

    url = 'http://shanghai.chinatax.gov.cn/newxbwz/tycx/TYCXzdsswfajgblCtrl-init.pfv#'
    driver = baseCore.buildDriver()
    driver.get(url)
    time.sleep(1)
    code_img = driver.find_element(By.ID, 'yzm1').screenshot_as_png
    code = getCode(code_img)
    driver.find_element(By.ID, 'yzm').send_keys(code)
    time.sleep(5)
    driver.close()
    pass


def shanghai_qs():
    def getTotal(relationId):
        data_post = {
            'curPage': '1',
            'type': 'QY',
            'nsrmc': '',
            'swdjh': f'{relationId}',
            'fzrxm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        table = soup.find_all('table', class_='visible-xs')[-1]
        tr_total = table.find_all('tr')[-1]
        span_total = tr_total.find_all('span')
        for span in span_total:
            if '总页数' in span.text:
                total = int(re.findall('\d+', span.text)[0])
                return total

    def getDataList(page, relationId):
        data_list = []
        data_post = {
            'curPage': f'{page}',
            'type': 'QY',
            'nsrmc': '',
            'swdjh': f'{relationId}',
            'fzrxm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        table = soup.find_all('table', class_='visible-xs')[-1]
        tr_list = table.find_all('tr')
        del (tr_list[0])
        del (tr_list[0])
        del (tr_list[-1])
        data = {}
        for tr in tr_list:
            td_list = tr.find_all('td')
            data['纳税人名称'] = td_list[0].text
            data['税号或统一码'] = td_list[1].text
            data['负责人姓名'] = td_list[2].text
            data['证件名称'] = td_list[3].text
            data['证件号码'] = td_list[4].text
            data['欠税税种'] = td_list[5].text
            data['合计'] = td_list[6].text
            data['以前年度陈欠余额'] = td_list[7].text
            data['本年度新欠余额'] = td_list[8].text
            data['税款所属税务机关'] = td_list[9].text
            data_list.append(data)
        return data_list

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'shanghai.chinatax.gov.cn',
        'Origin': 'http://shanghai.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Referer': 'http://shanghai.chinatax.gov.cn/newxbwz/tycx/TYCXqjsknsrmdCtrl-getQjsknsrmd.pfv',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    url = 'http://shanghai.chinatax.gov.cn/newxbwz/tycx/TYCXqjsknsrmdCtrl-getQjsknsrmd.pfv'
    relationIds = ['310101577425652']
    for relationId in relationIds:
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        data_list = []
        for page in range(1, total + 1):
            data_list += getDataList(page, relationId)
        print(data_list)


# http://jiangsu.chinatax.gov.cn/ 江苏
def jiangsu_aj():
    def getTotal(relationId):
        data_post = {
            'pageSize': '11',
            'pageNo': '1',
            'name': '',
            'nsrsbh': f'{relationId}',
            'zcdz': '',
            'swjg': '',
            'lparea': '',
            'year': '',
            'month': '',
            'fdname': '',
            'fdcard': '',
            'count': '1',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        total = int(req.json()['totalPage'])
        return total

    def getDataJson(page, relationId):
        data_post = {
            'pageSize': '11',
            'pageNo': f'{page}',
            'name': '',
            'nsrsbh': f'{relationId}',
            'zcdz': '',
            'swjg': '',
            'lparea': '',
            'year': '',
            'month': '',
            'fdname': '',
            'fdcard': '',
            'count': '1',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()['data']
        return data_json

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        table = soup.find('div', class_='ssfgk-result-list').find('table')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'application/json, text/javascript, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    url = 'https://jiangsu.chinatax.gov.cn/module/jslib/bulletin2/lpajaxfors.jsp'
    relationIds = ['91320585796507877Y']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            data_json = getDataJson(page, relationId)
            for data_ in data_json:
                url = data_['url']
                data = getData(url)
                data_list.append(data)
        print(data_list)


def jiangsu_qs():
    def getTotal():
        data_post = {
            'page': '1',
            'rows': '20',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        total = int(req.json()['total'])
        return total

    def getDataJson(page):
        data_post = {
            'page': f'{page}',
            'rows': '20',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        data_json = req.json()['rows']
        return data_json

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    pub_time = datetime.date.today()
    yhlxs = ['qy', 'gt']
    num = 0
    for yhlx in yhlxs:
        url = f'https://etax.jiangsu.chinatax.gov.cn/portal/queryapi/queryGgcxQsxx.do?type=tzgg&yhlx={yhlx}&nd=2023'
        total = getTotal()
        if total % 20 == 0:
            pageSize = int(total / 20)
        else:
            pageSize = int(total / 20) + 1
        for page in range(1, pageSize + 1):
            data_json = getDataJson(page)
            for data_ in data_json:
                try:
                    sql = f"INSERT INTO QS_Notice(nsrName,nsrsbh,fdName,IDNumber,address,qssz,qsje,xzqs,province,publishDate) VALUES('{data_['nsrmc']}','{data_['nsrsbh']}','{data_['fzrxm']}','{data_['sfzjhm']}','{data_['jydz']}','{data_['qssz']}','{data_['qs_ye']}','{data_['dqxfsqs_je']}','江苏省','{pub_time}')"
                    cursor.execute(sql)
                    cnx.commit()
                    num += 1
                    print('新增一条')
                except:
                    log.error(
                        f"'{data_['nsrmc']}','{data_['nsrsbh']}','{data_['fzrxm']}','{data_['sfzjhm']}','{data_['jydz']}','{data_['qssz']}','{data_['qs_ye']}','{data_['dqxfsqs_je']}'===插入失败")
                    continue
    log.info(f'共新增{num}条')


# http://zhejiang.chinatax.gov.cn/ 浙江
def zhejiang_aj():
    def inputId(driver, relationId):
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'top'))
        )
        driver.switch_to.frame('top')
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'nsrsbh'))
        )
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        time.sleep(1)
        driver.find_element(By.ID, 'submit').click()

    def getData(href):
        data = {}
        soup = getSoup(href, headers)
        tr_list = soup.find_all('tr', class_='rlbbox')
        for tr in tr_list:
            rlbname = tr.find('div', class_='rlbname').text
            rlbvalue = tr.find('div', class_='rlbvalue').text
            data[f'{rlbname}'] = rlbvalue
        return data

    def getHref(driver):
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'right'))
        )
        driver.switch_to.frame('right')
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'jpage'))
        )
        flg = driver.find_element(By.ID, 'jpage').text
        if '很遗憾，没有检索到任何记录' in flg:
            log.info(f'{relationId}===无数据')
            return
        table = driver.find_element(By.CLASS_NAME, 'rightlistbox')
        tr_list = table.find_elements(By.CLASS_NAME, 'rlbbox')
        for tr in tr_list:
            td = tr.find_elements(By.TAG_NAME, 'td')[-1]
            onclick = td.find_element(By.TAG_NAME, 'span').get_attribute('onclick')
            href = re.findall(pattern, str(onclick))[0]
            data = getData(href)
            log.info(data)

    def process(driver):
        inputId(driver, relationId)
        driver.switch_to.parent_frame()
        getHref(driver)
        driver.switch_to.parent_frame()

    df = pd.read_excel('./浙江省企业.xlsx', sheet_name='Sheet1')
    relationId_list = df['relationId']
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.57',
        'X-Requested-With': 'XMLHttpRequest',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
    }
    pattern = r'window.open\(\'(.*?)\'\,'
    url = 'http://zhejiang.chinatax.gov.cn/col/col24411/index.html'
    driver = baseCore.buildDriver()
    driver.get(url)
    for relationId in relationId_list:
        try:
            process(driver)
            time.sleep(2)
        except:
            reBuildDriver(driver, url)
            process(driver)
    try:
        driver.close()
    except:
        pass
    driver.quit()


def zhejiang_qs():
    def getSoupP(url):
        data_post = {
            'infotypeId': '0',
            'jdid': '15',
            'divid': 'div11889',
            'vc_title': '',
            'vc_number': '',
            'currpage': '',
            'vc_filenumber': '',
            'vc_all': '',
            'texttype': '',
            'fbtime': '',
            'infotypeId': 'Z0705',
            'vc_title': '',
            'vc_number': '',
            'area': '11330000002484088Y',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        return soup

    headers = {
        'Accept': 'text/javascript, application/javascript, */*',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }
    url = 'http://zhejiang.chinatax.gov.cn/module/xxgk/search.jsp?infotypeId=Z0705&vc_title=&vc_number=&area=11330000002484088Y'
    soup = getSoupP(url)
    tr_list = soup.select('body > table > tr > td > table > tr')
    del (tr_list[0])
    num = 0
    for tr in tr_list:
        td_list = tr.find_all('td')
        pub_time = td_list[1].text.lstrip().strip()
        if pub_time > '2023':
            href = td_list[0].find('a').get('href')
            soup_ = getSoup(href, headers)
            a = soup_.select('.info-cont')[0].select('a')[0]
            href_ = a.get('href')
            if 'http' not in href_:
                href_ = 'http://zhejiang.chinatax.gov.cn' + href_
            req_ = requests.get(href_, headers=headers)
            df = pd.read_excel(req_.content)
            df = df.rename(columns=df.iloc[0])
            df = df.drop(df.index[0])
            last_row = df.iloc[-1]
            # 遍历最后一行的数据
            for column, value in last_row.iteritems():
                if '说明：' in str(value):
                    df = df.drop(df.index[-1])
                    break
            df.dropna(how='all', inplace=True)
            df = df.fillna(method='ffill')
            nsrmc_list = df['纳税人名称'].to_list()
            shxydm_list = df['社会信用代码（纳税人识别号）'].to_list()
            try:
                fddbrw_list = df['法定代表人姓名'].to_list()
            except:
                fddbrw_list = df['法定代表人'].to_list()
            fddbrwsfzh_list = df['法定代表人身份证号码'].to_list()
            scjydz_list = df['生产经营地址'].to_list()
            qssz_list = df['欠税税种'].to_list()
            qsje_list = df['欠税金额（元）'].to_list()
            xzqs_list = df['当期新增欠税（元）'].to_list()
            for i in range(len(nsrmc_list)):
                nsrmc = nsrmc_list[i]
                shxydm = shxydm_list[i]
                fddbrw = fddbrw_list[i]
                fddbrwsfzh = fddbrwsfzh_list[i]
                scjydz = scjydz_list[i]
                qssz = qssz_list[i]
                qsje = qsje_list[i]
                xzqs = xzqs_list[i]
                sql = f"INSERT INTO QS_Notice(nsrName,nsrsbh,fdName,IDNumber,address,qssz,qsje,xzqs,province,publishDate) VALUES('{nsrmc}','{shxydm}','{fddbrw}','{fddbrwsfzh}','{scjydz}','{qssz}','{qsje}','{xzqs}','浙江省','{pub_time}')"
                cursor.execute(sql)
                cnx.commit()
                log.info('新增一条数据')
                num += 1
    log.info(f'共新增{num}条数据')


def zhejiang_qs_select():
    df = pd.read_excel('./浙江省企业.xlsx', sheet_name='Sheet1')
    relationId_list = df['relationId']
    relationName_list = df['relationName']
    for i in range(len(relationId_list)):
        sql = f"SELECT * FROM QS_Notice WHERE nsrsbh = '{relationId_list[i]}'"
        cursor.execute(sql)
        datas = cursor.fetchall()
        if datas:
            for data in datas:
                log.info(data)
        else:
            sql = f"SELECT * FROM QS_Notice WHERE nsrName = '{relationName_list[i]}'"
            cursor.execute(sql)
            datas_ = cursor.fetchall()
            if datas_:
                for data_ in datas_:
                    log.info(data_)
            else:
                log.info(f'{relationId_list[i]}=={relationName_list[i]}==无数据')


# http://anhui.chinatax.gov.cn/ 安徽
def anhui_aj():
    def getAList(relationId):
        a_list = []
        data_post = {
            'tablename': 'jcms_17',
            'title': f'{relationId},',
            'key': '4,',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        if 'dataStore = [];' not in req.text:
            soup = BeautifulSoup(re.findall('\[ \"(.*)\"\]\;', req.text)[0], 'html.parser')
            a_list = soup.select('a')
        return a_list

    def getData(a):
        data = {}
        href = a.get('href')
        soup_ = getSoup(href, headers)
        tr_list = soup_.find_all('tr', class_='rlbbox')
        for tr in tr_list:
            name = tr.find('div', class_='rlbname').text
            scripts = tr.find('div', class_='rlbvalue').find_all('script')
            for script in scripts:
                script.decompose()
            value = tr.find('div', class_='rlbvalue').text
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/javascript, application/javascript, */*',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
    }

    url = 'http://anhui.chinatax.gov.cn/module/jslib/bulletin/ajaxfors.jsp?startrecord=1&endrecord=2&perpage=11'
    relationIds = ['91341282MA2UQ6UG8L']
    for relationId in relationIds:
        data_list = []
        a_list = getAList(relationId)
        for a in a_list:
            data = getData(a)
            data_list.append(data)
        if data_list:
            print(data_list)
        else:
            print(f'{relationId}===无数据')


def anhui_qs():
    def inputId(driver, relationId):
        code_im = driver.find_element(By.ID, 'imgYzm').screenshot_as_png
        code = getCode(code_im)
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.ID, 'yzm').clear()
        time.sleep(3)
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        driver.find_element(By.ID, 'yzm').send_keys(code)
        driver.find_element(By.CLASS_NAME, 'panel-footer').find_elements(By.TAG_NAME, 'button')[0].click()
        time.sleep(3)

    def getDataList(driver):
        data_list = []
        table = driver.find_element(By.ID, 'qsqk')
        tr_list = table.find_elements(By.TAG_NAME, 'tr')
        del (tr_list[0])
        for tr in tr_list:
            data = {}
            td_list = tr.find_elements(By.TAG_NAME, 'td')
            data['公告企业或单位名称'] = td_list[0].text.lstrip().strip()
            data['纳税人识别号'] = td_list[1].text.lstrip().strip()
            data['法定代表人名称'] = td_list[2].text.lstrip().strip()
            data['身份证件号码'] = td_list[3].text.lstrip().strip()
            data['经营地点'] = td_list[4].text.lstrip().strip()
            data['欠税税种'] = td_list[5].text.lstrip().strip()
            data['欠税余额'] = td_list[6].text.lstrip().strip()
            data['当期发生的欠税金额'] = td_list[7].text.lstrip().strip()
            data_list.append(data)
        return data_list

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'etax.heilongjiang.chinatax.gov.cn',
        'Origin': 'https://etax.heilongjiang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
        'X-Requested-With': 'XMLHttpRequest',
    }
    url = 'https://etax.anhui.chinatax.gov.cn/qjskggcx'
    driver = baseCore.buildDriver()
    driver.get(url)
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'imgYzm'))
    )
    relationIds = ['9134052205702437X2', '340522748941919']
    for relationId in relationIds:
        inputId(driver, relationId)
        total = driver.find_element(By.CLASS_NAME, 'bootTotalPage').text
        total = int(re.findall('\d+', total)[0])
        data_list = []
        for i in range(total):
            data_list += getDataList(driver)
            driver.find_element(By.CLASS_NAME, 'page-next').click()
            time.sleep(0.5)
        print(data_list)
    driver.close()


# http://fujian.chinatax.gov.cn/ 福建
def fujian_aj():
    def getTotal(url):
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        total = int(req.json()['count'])
        return total

    def getHref(url):
        href_list = []
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        datas = req.json()['docs']
        for data in datas:
            href = data['url']
            href_list.append(href)
        del (href_list[-1])
        return href_list

    def getData(href):
        data = {}
        soup = getSoup(href, headers)
        tr_list = soup.select('body > div')[5].select('tr')[0].select('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            if len(td_list) == 2:
                name = td_list[0].text.lstrip().strip()
                value = td_list[1].text.lstrip().strip()
                data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/plain, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        # 'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'fujian.chinatax.gov.cn',
        # 'Origin': 'https://etax.heilongjiang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.69',
        'X-Requested-With': 'XMLHttpRequest',
    }
    relationIds = ['913505213157340414', '91350582MA2YNJQX4T']
    for relationId in relationIds:
        url = f'http://fujian.chinatax.gov.cn/was5/web/search?channelid=291316&templet=zdaj.jsp&sortfield=-datefor&classsql=S2%3D{relationId}&r=0.41724026127570246&prepage=8&page=1'
        total = getTotal(url)
        if total % 8 == 0:
            pageSize = int(total / 8)
        else:
            pageSize = int(total / 8) + 1
        data_list = []
        for page in range(1, pageSize + 1):
            url = f'http://fujian.chinatax.gov.cn/was5/web/search?channelid=291316&templet=zdaj.jsp&sortfield=-datefor&classsql=S2%3D{relationId}&r=0.41724026127570246&prepage=8&page={page}'
            href_list = getHref(url)
            for href in href_list:
                if 'http' not in href:
                    href = 'http://fujian.chinatax.gov.cn' + href
                data = getData(href)
                data_list.append(data)
        print(data_list)


# 需要登陆
def fujian_qs():
    pass


# http://jiangxi.chinatax.gov.cn/ 江西
def jiangxi_qs():
    def inputID(driver, relationId):
        code_img = driver.find_element(By.ID, 'imgCode').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.ID, 'nsrsbm').clear()
        driver.find_element(By.ID, 'yzm').clear()
        driver.find_element(By.ID, 'nsrsbm').send_keys(relationId)
        driver.find_element(By.ID, 'yzm').send_keys(code)
        driver.find_element(By.CLASS_NAME, 'btn-blue-big').click()

    def getDataList(driver):
        data_list = []
        if '无数据' not in driver.find_element(By.ID, 'tbody').text:
            tr_list = driver.find_element(By.ID, 'tbody').find_elements(By.TAG_NAME, 'tr')
            for tr in tr_list:
                data = {}
                td_list = tr.find_elements(By.CLASS_NAME, 'td')
                data['纳税人识别号'] = td_list[1]
                data['纳税人名称'] = td_list[2]
                data['法定代表人'] = td_list[3]
                data['登记注册类型'] = td_list[4]
                data['认定日期'] = td_list[5]
                data['欠税税种'] = td_list[6]
                data['欠税金额'] = td_list[7]
                data['主管税务机关'] = td_list[8]
                data['生产经营地址'] = td_list[9]
                data_list.append(data)
        return data_list

    relationIds = ['913505213157340414', '91350582MA2YNJQX4T']
    driver = baseCore.buildDriver()
    url = 'https://etax.jiangxi.chinatax.gov.cn/etax/jsp/portal/web_iframe.jsp?menucode=15030500'
    driver.get(url)
    time.sleep(1)
    driver.switch_to.frame('iframeRight')
    driver.find_element(By.ID, 'imgCode').click()
    time.sleep(3)
    for relationId in relationIds:
        inputID(driver, relationId)
        time.sleep(1)
        data_list = getDataList(driver)
        if data_list:
            print(data_list)
        else:
            print(f'{relationId}===无数据')
        time.sleep(3)
    try:
        driver.close()
    except:
        pass
    driver.quit()


# http://shandong.chinatax.gov.cn/ 山东
def shandong_aj():
    def getTr(relationId):
        url = f'https://etax.shandong.chinatax.gov.cn/DZSWJ/DZSWJ_SSWFSXAJ_CX_NAVIGATE?method=query&nsrmc=&nsrsbh={relationId}%20%20&zcdz=&zzjgdm=&fddbrxm=&fddbrsfzhm=&cwfzrxm=&cwfzrsfzhm='
        data_post = {
            'nsrmc': '',
            'nsrsbh': '913701055755547177',
            'zcdz': '',
            'zzjgdm': '',
            'fddbrxm': '',
            'fddbrsfzhm': '',
            'cwfzrxm': '',
            'cwfzrsfzhm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.select('#form1 > div.table-container > table > tr')
        del (tr_list[0])
        return tr_list

    def getData(id):
        data = {}
        href = f'https://etax.shandong.chinatax.gov.cn/DZSWJ/DZSWJ_SSWFSXAJ_CX_NAVIGATE?method=queryMx&nsrmc=&nsrsbh={id}&zcdz=&zzjgdm=&fddbrxm=&fddbrsfzhm=&cwfzrxm=&cwfzrsfzhm=&cxnd=&cxdq=&ajxz='
        data_post = {
            'undefined': '',
        }
        req = requests.post(href, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        data['纳税人名称'] = soup.find('nsrmc').text
        data['纳税人识别号或社会信用代码'] = soup.find('nsrsbh').text
        data['组织机构代码'] = soup.find('zzjgdm').text
        data['注册地址'] = soup.find('zcdz').text
        data[
            '法定代表人或者负责人姓名、性别及身份证号码（或其他证件号码）'] = f"法定代表人或者负责人姓名：{soup.find('fddbrhfzrxm').text}；性别：{soup.find('fddbrhfzrxb').text}；证件号码：{soup.find('fdrzjhm').text}"
        data[
            '违法期间法人代表或者负责人姓名、性别及身份证号码（或其他证件号码）'] = f"违法期间法人代表或者负责人姓名：{soup.find('fddbrhfzrxm').text}；性别：{soup.find('fddbrhfzrxb').text}；证件号码：{soup.find('fdrzjhm').text}"
        if soup.find('fyzjzrdcwfzrxm').text == '' and soup.find('fyzjzrdcwfzrxb').text == '' and soup.find(
                'fyzzjhm').text == '':
            data['负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）'] = ''
        else:
            data[
                '负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）'] = f"负有直接责任的财务人员姓名：{soup.find('fyzjzrdcwfzrxm').text}；性别：{soup.find('fyzjzrdcwfzrxb').text}；证件号码：{soup.find('fyzzjhm').text}"
        data['实际负责人姓名、性别及身份证号码（或其他证件号码）'] = ''
        data['负有直接责任的中介机构信息'] = soup.find('fyzjzrdzjjgxx').text
        data['案件性质'] = soup.find('ajxz').text
        data['主要违法事实 相关法律依据及税务处理处罚情况'] = soup.find('zywfss').text + soup.find('xgflyjjswclcfqk').text
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Origin': 'https://etax.shandong.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://etax.shandong.chinatax.gov.cn/dzswj/zdsswfsxajgbl.jsp',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'iframe',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
    }
    relationIds = ['913701055755547177']
    for relationId in relationIds:
        data_list = []
        tr_list = getTr(relationId)
        for tr in tr_list:
            td_list = tr.select('td')
            id = re.findall('\(\'(\d+)\'\)\"', str(td_list[-1]))[0]
            data = getData(id)
            data_list.append(data)


def shandong_qs():
    def inputID(driver, relationId):
        driver.find_element(By.ID, 'verifyCodeImg').click()
        time.sleep(1)
        code_img = driver.find_element(By.ID, 'verifyCodeImg').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.ID, 'yzm').clear()
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        driver.find_element(By.ID, 'yzm').send_keys(code)
        driver.find_element(By.ID, 'cx').click()
        time.sleep(3)

    def getDataList(driver):
        data_list = []
        driver.switch_to.frame('rightFrame')
        time.sleep(1)
        if '未查询到有效的数据' in driver.find_element(By.CLASS_NAME, 'tablenew').text:
            return data_list
        else:
            tr_list = driver.find_element(By.CLASS_NAME, 'tablenew').find_elements(By.TAG_NAME, 'tr')
            del (tr_list[0])
            del (tr_list[0])
            for tr in tr_list:
                td_list = tr.find_elements(By.TAG_NAME, 'td')
                data = {
                    '纳税人识别号': td_list[1],
                    '纳税人名称': td_list[2],
                    '生产经营地址': td_list[3],
                    '登记注册类型': td_list[4],
                    '法定代表人': td_list[5],
                    '身份证件号码': td_list[6],
                    '合计': td_list[7],
                    '增值税': td_list[8],
                    '消费税': td_list[9],
                    '营业税': td_list[10],
                    '企业所得税': td_list[11],
                    '个人所得税': td_list[12],
                    '资源税': td_list[13],
                    '城建税': td_list[14],
                    '房产税': td_list[15],
                    '印花税': td_list[16],
                    '土地使用税': td_list[17],
                    '土地增值税': td_list[18],
                    '车船税': td_list[19],
                    '耕地占用税': td_list[20],
                    '其中：本期新增': td_list[21],
                }
                data_list.append(data)
        return data_list

    url = 'https://etax.shandong.chinatax.gov.cn/GgcxQscxAction.do?method=init'
    driver = baseCore.buildDriver()
    driver.get(url)
    time.sleep(1)
    relationIds = ['913701055755547177', '913701055755547177']
    for relationId in relationIds:
        inputID(driver, relationId)
        data_list = getDataList(driver)
        if not data_list:
            print(f"{relationId}===无数据")
        driver.switch_to.parent_frame()
        time.sleep(2)

    driver.close()


# https://henan.chinatax.gov.cn/ 河南
def henan_aj():
    def getSoupPost(url, relationId):
        ip = baseCore.get_proxy()
        data_post = {
            'filter_LIKE_EXT_STR6': '',
            'filter_LIKE_EXT_STR13': '',
            'filter_LIKE_EXT_STR15': '',
            'filter_LIKE_main.TITLE': '',
            'filter_LIKE_EXT_STR2': relationId,
            'filter_LIKE_EXT_STR4': '',
            'filter_LIKE_EXT_STR3': '',
            'filter_LIKE_EXT_STR8': '',
            'filter_LIKE_EXT_STR19': '',
        }
        req = requests.post(url, headers=headers, data=data_post, proxies=ip)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'html.parser')
        return soup

    def getData(url):
        soup = getSoup(url, headers)
        data = {}
        table = soup.find('table', class_='zhongdatable')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            name = tr.find('th').text.lstrip().strip()
            value = tr.find('td').text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'henan.chinatax.gov.cn',
        'Origin': 'https://henan.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://henan.chinatax.gov.cn/eportal/ui?pageId=bdfef9dfa679454c86d68f2203a69e84&currentPage=1&moduleId=143e1aeaa3b6405ea0fe04142c021d5b',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1'
    }
    page = 1
    url = 'https://henan.chinatax.gov.cn/eportal/ui?pageId=bdfef9dfa679454c86d68f2203a69e84&currentPage=1&moduleId=143e1aeaa3b6405ea0fe04142c021d5b'
    relationIds = ['91410222MA9GF2EA6U', '91410222MA9FBJ8504']
    for relationId in relationIds:
        data_list = []
        while True:
            soup = getSoupPost(url, relationId)
            li_list = soup.find('ul', class_='zhongdaList').find_all('li')
            del (li_list[0])
            for li in li_list:
                href = li.find('a').get('href')
                if 'http' not in href:
                    href = 'https://henan.chinatax.gov.cn' + href
                data = getData(href)
                data_list.append(data)
            try:
                flg = soup.find('div', class_='listFenYe').find('span').find_all('a')[-1].text
            except:
                break
            if re.findall('currentPage=(\d+)&', url)[0] < flg:
                break
            page += 1
            url = url.replace(f"currentPage={page - 1}", f"currentPage={page}")
        print(data_list)


# 过不去反爬
def henan_qs():
    pass


# http://hubei.chinatax.gov.cn/ 湖北
def hubei_aj():
    def getTotal(relationId):
        url = f'https://etax.hubei.chinatax.gov.cn/webroot/gzcxAction.do?method=zdsswfajcx&page=1&limit=15&nsrsbh={relationId}&zzjgdm=&nsrmc=&fddbrmc=&fddbrzjh=&cwfzrmc=&cwfzrzjh=&nsrlx=&ds=%2525E5%25258D%252581%2525E5%2525A0%2525B0&zcdz=&ajxz=&ssnd='
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        total = req.json()['count']
        return int(total)

    def getDataJson(url):
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        datas_json = req.json()['data']
        return datas_json

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'etax.hubei.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://etax.hubei.chinatax.gov.cn/webroot/nsfw/fwdt/gzcx/zdsswfajcx.jsp',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['91420302MA48JD4Y4Y', '91420325MA48YKGJ9Q']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total % 15 == 0:
            pageSize = int(total / 15)
        else:
            pageSize = int(total / 15) + 1
        for page in range(1, pageSize + 1):
            url = f'https://etax.hubei.chinatax.gov.cn/webroot/gzcxAction.do?method=zdsswfajcx&page={page}&limit=15&nsrsbh={relationId}&zzjgdm=&nsrmc=&fddbrmc=&fddbrzjh=&cwfzrmc=&cwfzrzjh=&nsrlx=&ds=%2525E5%25258D%252581%2525E5%2525A0%2525B0&zcdz=&ajxz=&ssnd='
            datas_json = getDataJson(url)
            for data_json in datas_json:
                data = {
                    '纳税人名称': data_json['NSRMC'],
                    '纳税人识别号或社会信用代码': data_json['NSRSBH'],
                    '组织机构代码': data_json['ZZJG'],
                    '注册地址': data_json['ZCDZ'],
                    '法定代表人或者负责人姓名、性别及身份证号码（或其他证件号码）': data_json['FDDBR'],
                    '违法期间法人代表或者负责人姓名、性别及身份在号码（或其他证件号码）': data_json['WFQJ_FDDBR'],
                    '负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）': data_json['CWFZR'],
                    '实际负责人姓名、性别及身份证号码（或其他证件号码）': data_json['JJZRR'],
                    '负有直接责任的中介机构信息': data_json['ZZJGRY'],
                    '案件性质': data_json['AJXZ'],
                    '主要违法事实相关法律依据及税务处理处罚情况': data_json['WFSS'],
                }
                data_list.append(data)
        print(data_list)


def hubei_qs():
    def inputID(driver, relationId):
        time.sleep(1)
        driver.find_element(By.ID, 'yzmimg').click()
        code_img = driver.find_element(By.ID, 'yzmimg').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.CLASS_NAME, 'inlineinput').clear()
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.CLASS_NAME, 'inlineinput').send_keys(code)
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        driver.find_element(By.ID, 'search').click()

    def getTotal(driver):
        total_span = driver.find_element(By.CLASS_NAME, 'layui-table-page').find_element(By.CLASS_NAME,
                                                                                         'layui-laypage-count').text
        total = re.findall('\d+', total_span)[0]
        return int(total)

    def getDataList(driver):
        data_list = []
        try:
            WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div/div[1]/div[2]/table/tbody/tr'))
            )
        except:
            return data_list
        total = getTotal(driver)

        for i in range(total):
            WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div/div[1]/div[2]/table/tbody/tr'))
            )
            tr_list = driver.find_elements(By.XPATH, '/html/body/div[1]/div/div[1]/div[2]/table/tbody/tr')
            for tr in tr_list:
                td_list = tr.find_elements(By.TAG_NAME, 'td')
                data = {
                    '纳税人名称': td_list[1].text.lstrip().strip(),
                    '纳税人识别号': td_list[2].text.lstrip().strip(),
                    '省份': td_list[3].text.lstrip().strip(),
                    '法定代表人或负责人姓名': td_list[4].text.lstrip().strip(),
                    '居民身份证或其他有效身份证件号码': td_list[5].text.lstrip().strip(),
                    '经营地址': td_list[6].text.lstrip().strip(),
                    '欠税税种': td_list[7].text.lstrip().strip(),
                    '欠税金额': td_list[8].text.lstrip().strip(),
                    '缴款期限': td_list[9].text.lstrip().strip(),
                    '公告时间': td_list[10].text.lstrip().strip(),
                    '主管税务机关': td_list[11].text.lstrip().strip(),
                }
                data_list.append(data)
            driver.find_element(By.CLASS_NAME, 'layui-table-page').find_element(By.CLASS_NAME,
                                                                                'layui-laypage-next').click()
        return data_list

    relationIds = ['91420105717936682B', '91420105572048376B']
    driver = baseCore.buildDriver()
    url = 'https://etax.hubei.chinatax.gov.cn/portal/iframe.c?menu=GZFW_170000303&title=%E6%AC%A0%E7%A8%8E%E5%85%AC%E5%91%8A&goUrl=/webroot/nsfw/fwdt/gzcx/qsgg.jsp'
    driver.get(url)
    time.sleep(2)
    driver.switch_to.frame('ifm')
    for relationId in relationIds:
        inputID(driver, relationId)
        data_list = getDataList(driver)
        if data_list:
            print(data_list)
        else:
            print(f'{relationId}===无数据')
        time.sleep(2)
    driver.close()


# http://hunan.chinatax.gov.cn/ 湖南
def hunan_aj():
    def getTotal(relationId):
        data_post = {
            'case_type': '1',
            'page': '1',
            'limit': '10',
            'is_search': '1',
            'taxpayerName': '',
            'taxpayerNumber': relationId,
            'organizationalCode': '',
            'place': '',
            'legalName': '',
            'legalIdCard': '',
            'financeName': '',
            'financeIdCard': '',
            'personName': '',
            'personIdCard': '',
            '_csrf': 'b26e94d2-0d17-4232-99de-4cc1287405d9',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        total = req.json()['hardCasePage']['totalPages']
        return int(total)

    def getDatalist(relationId, page):
        data_list = []
        data_post = {
            'case_type': '1',
            'page': f'{page}',
            'limit': '10',
            'is_search': '1',
            'taxpayerName': '',
            'taxpayerNumber': relationId,
            'organizationalCode': '',
            'place': '',
            'legalName': '',
            'legalIdCard': '',
            'financeName': '',
            'financeIdCard': '',
            'personName': '',
            'personIdCard': '',
            '_csrf': 'b26e94d2-0d17-4232-99de-4cc1287405d9',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        datas_json = req.json()['data']
        for data_json in datas_json:
            data = {}
            if data_json['taxpayerName'] == '' or data_json['taxpayerName'] is None:
                data['纳税人名称'] = ''
            else:
                data['纳税人名称'] = data_json['taxpayerName']
            if data_json['taxpayerNumber'] == '' or data_json['taxpayerNumber'] is None:
                data['纳税人识别号或社会信用代码'] = ''
            else:
                data['纳税人识别号或社会信用代码'] = data_json['taxpayerNumber']
            if data_json['organizationalCode'] == '' or data_json['organizationalCode'] is None:
                data['组织机构代码'] = ''
            else:
                data['组织机构代码'] = data_json['organizationalCode']
            if data_json['place'] == '' or data_json['place'] is None:
                data['注册地址'] = ''
            else:
                data['注册地址'] = data_json['place']
            if data_json['legalName'] == '' or data_json['legalName'] is None:
                data['法定代表人或负责人姓名、性别及身份证号码（或其他证件号码）'] = ''
            else:
                if data_json['legalSex'] == 1:
                    data['法定代表人或负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['legalName']},男,{data_json['legalIdCard']}"
                else:
                    data['法定代表人或负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['legalName']},女,{data_json['legalIdCard']}"
            if data_json['legalNameDuring'] == '' or data_json['legalNameDuring'] is None:
                data['违法期间法人代表或负责人姓名、性别及身份证号码（或其他证件号码）'] = ''
            else:
                if data_json['legalSexDuring'] == 1:
                    data[
                        '违法期间法人代表或负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['legalNameDuring']},男,{data_json['legalIdCardDuring']}"
                else:
                    data[
                        '违法期间法人代表或负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['legalNameDuring']},女,{data_json['legalIdCardDuring']}"
            if data_json['financeName'] == '' or data_json['financeName'] is None:
                data['负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）'] = ''
            else:
                if data_json['financeSex'] == 1:
                    data[
                        '负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['financeName']},男,{data_json['financeIdCard']}"
                else:
                    data[
                        '负有直接责任的财务人员姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['financeName']},女,{data_json['financeIdCard']}"
            if data_json['principalName'] == '' or data_json['principalName'] is None:
                data['实际负责人姓名、性别及身份证号码（或其他证件号码）'] = ''
            else:
                if data_json['principalSex'] == 1:
                    data['实际负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['principalName']},男,{data_json['principalIdCard']}"
                else:
                    data['实际负责人姓名、性别及身份证号码（或其他证件号码）'] = f"{data_json['principalName']},女,{data_json['principalIdCard']}"
            if data_json['agency'] == '' or data_json['agency'] is None:
                data['负有直接责任的中介机构信息'] = ''
            else:
                data['负有直接责任的中介机构信息'] = data_json['agency']
            if data_json['hardCaseType']['typeName'] == '' or data_json['hardCaseType']['typeName'] is None:
                data['案件性质'] = ''
            else:
                data['案件性质'] = data_json['hardCaseType']['typeName']
            if data_json['content'] == '' or data_json['content'] is None:
                data['主要违法事实相关法律依据及税务处理处罚情况'] = ''
            else:
                data['主要违法事实相关法律依据及税务处理处罚情况'] = data_json['content']
            data_list.append(data)
        return data_list

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'hunan.chinatax.gov.cn',
        'Origin': 'http://hunan.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'http://hunan.chinatax.gov.cn/hardcase/20190408002097',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'X-Requested-With': 'XMLHttpRequest'
    }
    url = 'http://hunan.chinatax.gov.cn/hardcasegetdatanew'
    relationIds = ['91430481096047531M', '91430400MA4RW6AN1J', '91430481096047531A']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            data_list += getDatalist(relationId, page)
        print(data_list)


def hunan_qs():
    @retry(tries=5, delay=5)
    def inputID(driver, relationId):
        driver.find_element(By.ID, 'codeImgGm').click()
        time.sleep(1)
        code_img = driver.find_element(By.ID, 'codeImgGm').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.CLASS_NAME, 'verifycode').clear()
        driver.find_element(By.ID, 'search_value').clear()
        driver.find_element(By.CLASS_NAME, 'verifycode').send_keys(code)
        driver.find_element(By.ID, 'search_value').send_keys(relationId)
        driver.find_element(By.CLASS_NAME, 'searchitems').find_element(By.TAG_NAME, 'button').click()
        result = EC.alert_is_present()(driver)
        time.sleep(2)
        if result:
            driver.switch_to.alert.accept()
            raise

    def getDataList(driver, pageSize):
        data_list = []
        for page in range(pageSize):
            tr_list = driver.find_element(By.ID, 'dataList').find_elements(By.TAG_NAME, 'tr')
            for tr in tr_list:
                td_list = tr.find_elements(By.TAG_NAME, 'td')
                data = {
                    '纳税人名称': td_list[0].text.lstrip().strip(),
                    '纳税人识别号': td_list[1].text.lstrip().strip(),
                    '法定代表人(负责人)姓名': td_list[2].text.lstrip().strip(),
                    '法定代表人(负责人)证件类型': td_list[3].text.lstrip().strip(),
                    '法定代表人(负责人)证件号码': td_list[4].text.lstrip().strip(),
                    '经营地点': td_list[5].text.lstrip().strip(),
                    '欠税税种': td_list[6].text.lstrip().strip(),
                    '欠税金额(元)': td_list[7].text.lstrip().strip(),
                    '当期新发生的欠税金额(元)': td_list[8].text.lstrip().strip(),
                    '时间': td_list[9].text.lstrip().strip(),
                }
                print(data)
                data_list.append(data)
        return data_list

    url = 'http://hunan.chinatax.gov.cn/arrears/20190717003534'
    relationIds = ['92431281MACB3BPG5U', '92431281MA4RKRQ31A', '92431281MA4RKRQ31B']
    driver = baseCore.buildDriver()
    driver.get(url)
    time.sleep(2)
    Select(driver.find_element(By.ID, 'search_type')).select_by_value('taxpayer_number')
    time.sleep(1)
    for relationId in relationIds:
        inputID(driver, relationId)
        try:
            total = driver.find_element(By.CLASS_NAME, 'layui-laypage-count').text
            total = int(re.findall('\d+', total)[0])
        except:
            print(f'{relationId}===无数据')
            continue
        if total % 10 == 0:
            pageSize = int(total / 10)
        else:
            pageSize = int(total / 10) + 1
        data_list = getDataList(driver, pageSize)
    driver.close()


# http://guangdong.chinatax.gov.cn/ 广东
def guangdong_aj():
    def getTotal(relationId):
        url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/query.jsp'
        data_post = {
            'pageSize': '10',
            'pageNo': '1',
            'nsr_mc': '',
            'nsr_sbh': relationId,
            'fddbr_xm': '',
            'searchway': '02',
            'zcdz': '',
            'zzjgdm': '',
            'fddbrzjhm': '',
            'cwfzrxm': '',
            'cwfzrzjhm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = int(re.findall('共\s+(\d+)\s+页', soup.find('div', class_='pagesdiv').find_all('span')[-2].text)[0])
        return total

    def getIDList(relationId, page):
        id_list = []
        url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/query.jsp'
        data_post = {
            'pageSize': '10',
            'pageNo': f'{page}',
            'nsr_mc': '',
            'nsr_sbh': relationId,
            'fddbr_xm': '',
            'searchway': '02',
            'zcdz': '',
            'zzjgdm': '',
            'fddbrzjhm': '',
            'cwfzrxm': '',
            'cwfzrzjhm': '',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.find('table', attrs={'id': 'zdss_tb'}).find_all('tr')
        del (tr_list[0])
        for tr in tr_list:
            id = re.findall('\(\'(.*?)\'\)', tr.find('a').get('onclick'))[0]
            id_list.append(id)
        return id_list

    def getData(id):
        data = {}
        url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/service.jsp'
        data_post = {
            'manuscriptId': id
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.find_all('tr')
        del (tr_list[-1])
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/html, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'guangdong.chinatax.gov.cn',
        'Origin': 'http://guangdong.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/index.jsp',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['91440400MA4X1MB69A', '91440400MA4X1MB69C']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            id_list = getIDList(relationId, page)
            for id in id_list:
                data = getData(id)
                data_list.append(data)
                print(data)


def guangdong_qs():
    def inputID(driver, relationId):
        code_div = driver.find_element(By.CLASS_NAME, 'verify-code')
        code_font_list = code_div.find_elements(By.TAG_NAME, 'font')
        code = ''
        for code_font in code_font_list:
            code += code_font.text.lstrip().strip()
        driver.find_element(By.CLASS_NAME, 'varify-input-code').clear()
        driver.find_element(By.ID, 'nsr_sbh').clear()
        driver.find_element(By.CLASS_NAME, 'varify-input-code').send_keys(code)
        driver.find_element(By.ID, 'nsr_sbh').send_keys(relationId)
        driver.find_element(By.ID, 'cx').click()

    def getTotal(driver):
        total = driver.find_element(By.CLASS_NAME, 'pagediv').find_elements(By.TAG_NAME, 'span')[
            -2].text.lstrip().strip()
        total = re.findall('\d+', total)[0]
        return int(total)

    def getDataList(driver):
        data_list = []
        tr_list = driver.find_element(By.CLASS_NAME, 'sscx_list').find_elements(By.TAG_NAME, 'tr')
        del (tr_list[0])
        for tr in tr_list:
            href = tr.find_element(By.TAG_NAME, 'a').get_attribute('href')
            soup = getSoup(href, headers=headers)
            data = {}
            tr_list_ = soup.find('table', class_='nsrcxTable').find_all('tr')
            del (tr_list_[0])
            for tr_ in tr_list_:
                name = tr_.find('th').text.lstrip().strip()
                value = tr_.find('td').text.lstrip().strip()
                data[f'{name}'] = value
            data_list.append(data)
        return data_list

    headers = {
        'Accept': 'text/html, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'guangdong.chinatax.gov.cn',
        'Origin': 'http://guangdong.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/index.jsp',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['914400001903589775', '91440000729196992A']
    url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/qsgg/qsgg_search.jsp?SITECODE=001100'
    driver = baseCore.buildDriver()
    driver.get(url)
    li_list = driver.find_element(By.CLASS_NAME, 'hd').find_elements(By.TAG_NAME, 'li')
    for relationId in relationIds:
        flg = 1
        data_list = []
        for li in li_list:
            li.click()
            frame = driver.find_element(By.XPATH, f'/html/body/div/div[2]/div[{flg}]/iframe')
            flg += 1
            driver.switch_to.frame(frame)
            time.sleep(1)
            inputID(driver, relationIds[0])
            total = getTotal(driver)
            if total == 0:
                continue
            for i in range(total):
                data_list += getDataList(driver)
                driver.find_element(By.ID, 'nextPage').click()
                time.sleep(1)
            driver.switch_to.parent_frame()
            time.sleep(1)
        if len(data_list) == 0:
            print(f'{relationId}===无数据')
        else:
            print(data_list)

    driver.close()


# http://guangxi.chinatax.gov.cn/ 广西
def guangxi_aj():
    def getTotal(relationId):
        ip = baseCore.get_proxy()
        url = f'https://guangxi.chinatax.gov.cn/restSearch?channelid=290909&searchword=(NSR_ID%3D{relationId})&orderby=RELEVANCE&page=1&pageSize=10'
        req = requests.get(url, headers=headers, proxies=ip)
        req.encoding = req.apparent_encoding
        total = req.json()['pager']['pageCount']
        return int(total)

    def getHrefList(relationId, page):
        href_list = []
        ip = baseCore.get_proxy()
        url = f'https://guangxi.chinatax.gov.cn/restSearch?channelid=290909&searchword=(NSR_ID%3D{relationId})&orderby=RELEVANCE&page={page}&pageSize=10'
        req = requests.get(url, headers=headers, proxies=ip)
        req.encoding = req.apparent_encoding
        datas_json = req.json()['datas']
        for data_json in datas_json:
            href = data_json['DOCPUBURL']
            href_list.append(href)
        req.close()
        return href_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        tr_list = soup.find('div', class_='article-body').find_all('tr')
        for tr in tr_list:
            name = tr.find('td', class_='tab_left').text.lstrip().strip().replace('\xa0', ' ')
            value = tr.find('td', class_='tab_right').text.lstrip().strip().replace('\xa0', ' ')
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'etax.guangxi.chinatax.gov.cn:9723',
        'Origin': 'https://etax.guangxi.chinatax.gov.cn:9723',
        'Pragma': 'no-cache',
        'Refer': 'https://etax.guangxi.chinatax.gov.cn:9723/web/dzswj/taxclient/ggfw/qsgg.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['91450100MA5MUKH374', '91450100MA5MWH1C19', '91450100MA5MUKH388']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            href_list = getHrefList(relationId, page)
            for href in href_list:
                data = getData(href)
                data_list.append(data)
        print(data_list)


def guangxi_qs():
    def getTotal(type):
        ip = baseCore.get_proxy()
        url = f'https://etax.guangxi.chinatax.gov.cn:9723/web/selectDM_ZJ_GG.do?callback=jQuery111107930089136844747_{time.time()}'
        data_post = {
            'QYLX': type,
            'GGSQ': '',
            'START': '1',
            'END': '10',
            'CUR_USERID': '-1',
        }
        req = requests.post(url, headers=headers, data=data_post, proxies=ip)
        req.encoding = req.apparent_encoding
        datas = re.findall('\((.*)\)', req.text)[0]
        datas_json = json.loads(datas)
        total = datas_json['data'][0]['TOTAL']
        req.close()
        return int(total)

    def getDataList(type, start, end):
        ip = baseCore.get_proxy()
        url = f'https://etax.guangxi.chinatax.gov.cn:9723/web/selectDM_ZJ_GG.do?callback=jQuery111107930089136844747_{time.time()}'
        data_post = {
            'QYLX': type,
            'GGSQ': '',
            'START': f'{start}',
            'END': f'{end}',
            'CUR_USERID': '-1',
        }
        req = requests.post(url, headers=headers, data=data_post, proxies=ip)
        req.encoding = req.apparent_encoding
        datas = re.findall('\((.*)\)', req.text)[0]
        datas_json = json.loads(datas)['data']
        req.close()
        return datas_json

    def select(publishDate, nsrName, nsrsbh, qssz):
        sql = f"select * from QS_Notice where publishDate='{publishDate}' and province='广西省' and nsrName='{nsrName}' and nsrsbh='{nsrsbh}' and qssz='{qssz}'"
        cursor.execute(sql)
        result = cursor.execute()
        return result

    headers = {
        'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'etax.guangxi.chinatax.gov.cn:9723',
        'Origin': 'https://etax.guangxi.chinatax.gov.cn:9723',
        'Pragma': 'no-cache',
        'Refer': 'https://etax.guangxi.chinatax.gov.cn:9723/web/dzswj/taxclient/ggfw/qsgg.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'X-Requested-With': 'XMLHttpRequest'
    }

    num = 0
    types = ['企业', '个体', '个人']
    for type in types:
        total = getTotal(type)
        for start in range(1, total + 1, 10):
            end = start + 9
            if end > total:
                end = total
            datas_json = getDataList(type, start, end)
            for data_json in datas_json:
                pub_time = data_json['GGRQ']
                nsrmc = data_json['NSRMC']
                nsrsbh = data_json['NSRSBH']
                fddbrw = data_json['FDDBR_MC']
                fddbrwsfzh = data_json['SFZJHM']
                scjydz = data_json['JYDD']
                qssz = data_json['QSSZ']
                qsje = data_json['WNCQ_QSYE']
                xzqs = data_json['BNXQ_QSJE']
                zgswjg = data_json['SWJG_MC']
                result = select(pub_time, nsrmc, nsrsbh, qssz)
                if result:
                    continue
                sql = f"INSERT INTO QS_Notice(nsrName,nsrsbh,fdName,IDNumber,address,qssz,qsje,xzqs,zgswjg,province,publishDate) VALUES('{nsrmc}','{nsrsbh}','{fddbrw}','{fddbrwsfzh}','{scjydz}','{qssz}','{qsje}','{xzqs}','{zgswjg}','广西省','{pub_time}')"
                cursor.execute(sql)
                cnx.commit()
                time.sleep(0.5)
                print('新增一条')
                num += 1
    print(f'共{num}条')


# http://hainan.chinatax.gov.cn/ 海南
def hainan_aj():
    def getTotal(relationId):
        ip = baseCore.get_proxy()
        url = 'https://hainan.chinatax.gov.cn/weifaCase/weifa_case_list.htm?pageNo=1'
        data_post = {
            'area': '',
            'ajinformation': '',
            'startDate': '',
            'month': '',
            'nsrname': '',
            'nsridentify': relationId,
            'regaddress': '',
            'organization': '',
            'legal': '',
            'legalId': '',
            'finance': '',
            'financeId': '',
        }
        req = requests.post(url, headers=headers, data=data_post, proxies=ip)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        if '暂时没有数据' in soup.text:
            return 0
        total = soup.find('div', class_='zdsswfgbl-lm2-r-lm3-div2').find_all('em')[1].text
        return int(total)

    def getIDList(relationId, page):
        ip = baseCore.get_proxy()
        id_list = []
        url = f'https://hainan.chinatax.gov.cn/weifaCase/weifa_case_list.htm?pageNo={page}'
        data_post = {
            'area': '',
            'ajinformation': '',
            'startDate': '',
            'month': '',
            'nsrname': '',
            'nsridentify': relationId,
            'regaddress': '',
            'organization': '',
            'legal': '',
            'legalId': '',
            'finance': '',
            'financeId': '',
        }
        req = requests.post(url, headers=headers, data=data_post, proxies=ip)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        table = soup.find_all('table')[1]
        tr_list = table.find_all('tr')
        for tr in tr_list:
            input = tr.find_all('td')[-1].find('input').get('onclick')
            id = re.findall('\d+', input)[0]
            id_list.append(id)
        return id_list

    def getData(id):
        data = {}
        url = 'https://hainan.chinatax.gov.cn/weifaCase/weifa_case_list.htm'
        data_post = {
            'id': id,
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.find('table').find_all('tr')
        del (tr_list[-1])
        for tr in tr_list:
            name = tr.find('th').text.lstrip().strip()
            value = tr.find('td').text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'hainan.chinatax.gov.cn',
        'Origin': 'https://hainan.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://hainan.chinatax.gov.cn/bsfw_5_8/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['91460300399794702A', '91460300399794702M']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        for page in range(1, total + 1):
            id_list = getIDList(relationId, page)
            for id in id_list:
                data = getData(id)
                data_list.append(data)


def hainan_qs():
    @retry(tries=5, delay=2)
    def inputID(driver, relationId):
        flg = 0
        driver.find_element(By.ID, 'ck1_img').click()
        code_img = driver.find_element(By.ID, 'ck1_img').screenshot_as_png
        code = getCode(code_img)
        driver.find_elements(By.CLASS_NAME, 'wingsInput')[2].clear()
        driver.find_element(By.ID, 'ck1_input').clear()
        driver.find_elements(By.CLASS_NAME, 'wingsInput')[2].send_keys(relationId)
        driver.find_element(By.ID, 'ck1_input').send_keys(code)
        driver.find_element(By.CLASS_NAME, 'cearch_btn').click()
        time.sleep(1)
        try:
            text_flg = driver.find_element(By.CLASS_NAME, 'modal-content').text
            driver.find_element(By.CLASS_NAME, 'btn-primary').click()
            if '验证码' in text_flg:
                flg = 1
            if '未查询到结果' in text_flg:
                flg = 2
        except:
            pass
        if flg == 1:
            raise
        elif flg == 2:
            return False
        else:
            return True

    relationIds = ['92460107MA5T7DRT50', '92460107MA5T7DRT5']
    driver = baseCore.buildDriver()
    url = 'https://zzs.hainan.chinatax.gov.cn:2030/sword?ctrl=XzzfgsptXzcfCtrl_initSwxzybcxcfjg&swjgDm=14600000000,14600600000&sxq=hainansheng'
    driver.get(url)
    for relationId in relationIds:
        data_list = []
        flg = inputID(driver, relationId)
        if flg:
            table = driver.find_element(By.CLASS_NAME, 'table-bordered')
            tr_list = table.find_elements(By.TAG_NAME, 'tr')
            name_list = []
            th_list = tr_list[0].find_elements(By.TAG_NAME, 'th')
            for th in th_list:
                name = th.text.lstrip().strip()
                name_list.append(name)
            del (tr_list[0])
            for tr in tr_list:
                data = {}
                td_list = tr.find_elements(By.TAG_NAME, 'td')
                for i in range(len(td_list)):
                    value = td_list[i].get_attribute('title')
                    if value == '':
                        value = td_list[i].get_attribute('realvalue')
                    data[f'{name_list[i]}'] = value
                data_list.append(data)
        else:
            print(f'{relationId}===无数据')
            continue
        time.sleep(2)
    driver.close()


# http://chongqing.chinatax.gov.cn/ 重庆
# http://sichuan.chinatax.gov.cn/ 四川
def sichuan_aj():
    def getTotal(relationId):
        url = f'https://sichuan.chinatax.gov.cn/module/search/index.jsp?vc_name=&field_439={relationId}&field_440=&field_441=&field_442=&field_442=&field_443=&field_443=&strSelectID=390%2C439%2C440%2C441%2C442%2C443&i_columnid=14253&field=vc_name%3A1%3A0%2Cfield_439%3A1%3A0%2Cfield_440%3A1%3A0%2Cfield_441%3A1%3A0%2Cfield_442%3A1%3A0%2Cfield_443%3A1%3A0&initKind=FieldForm&type=0%2C0%2C0%2C0%2C0%2C0&currentplace=&splitflag=&fullpath=0&download=%E6%9F%A5%E8%AF%A2&currpage=1'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = soup.find('div', class_='digg').text
        total = re.findall('共\s+(\d+)\s+页', total)[0]
        return int(total)

    def getHrefList(relationId, page):
        href_list = []
        url = f'https://sichuan.chinatax.gov.cn/module/search/index.jsp?vc_name=&field_439={relationId}&field_440=&field_441=&field_442=&field_442=&field_443=&field_443=&strSelectID=390%2C439%2C440%2C441%2C442%2C443&i_columnid=14253&field=vc_name%3A1%3A0%2Cfield_439%3A1%3A0%2Cfield_440%3A1%3A0%2Cfield_441%3A1%3A0%2Cfield_442%3A1%3A0%2Cfield_443%3A1%3A0&initKind=FieldForm&type=0%2C0%2C0%2C0%2C0%2C0&currentplace=&splitflag=&fullpath=0&download=%E6%9F%A5%E8%AF%A2&currpage={page}'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        table = soup.find_all('table')[1]
        tr_list = table.find_all('tr')
        for tr in tr_list:
            try:
                table_ = tr.find('table')
                tr_ = table_.find('tr')
            except:
                continue
            href = tr_.find_all('td')[-1].find('a').get('href')
            if 'http' not in href:
                href = href.replace('../../', 'https://sichuan.chinatax.gov.cn/')
                href_list.append(href)
        return href_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        table = soup.find('div', class_='common-list-items').find('table')
        tr_list = table.find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Host': 'sichuan.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://sichuan.chinatax.gov.cn/col/col14253/index.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'iframe',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
    }
    relationIds = ['91510100092838766K', '92460107MA5T7DRT5']
    for relationId in relationIds:
        total = getTotal(relationIds[0])
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        data_list = []
        for page in range(1, total + 1):
            href_list = getHrefList(relationIds[0], page)
            for href in href_list:
                data = getData(href)
                data_list.append(data)


def sichuan_qs():
    def inputID(driver, relationId):
        # driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/form/div[2]/div[5]/div/div/img').click()
        # time.sleep(1)
        code_img = driver.find_element(By.CLASS_NAME, 'yzm').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.XPATH, '/html/body/div/div/div/div[2]/form/div[2]/div/div[2]/div/div/span/input').clear()
        driver.find_element(By.XPATH, '/html/body/div/div/div/div[2]/form/div[2]/div[4]/div/div/span/input').clear()
        driver.find_element(By.XPATH,
                            '/html/body/div/div/div/div[2]/form/div[2]/div/div[2]/div/div/span/input').send_keys(
            relationId)
        driver.find_element(By.XPATH, '/html/body/div/div/div/div[2]/form/div[2]/div[4]/div/div/span/input').send_keys(
            code)
        driver.find_element(By.XPATH, '/html/body/div/div/div/div[2]/form/div[2]/button').click()
        time.sleep(2)

    def getDataList(driver):
        data_list = []
        th_list = driver.find_elements(By.XPATH, '/html/body/div[1]/div/div/div[2]/div/div/div/div/table/thead//th')
        del (th_list[0])
        tr_list = driver.find_elements(By.XPATH, '/html/body/div[1]/div/div/div[2]/div/div/div/div/table/tbody/tr')
        for tr in tr_list:
            td_list = tr.find_elements(By.TAG_NAME, 'td')
            del (td_list[0])
            for i in range(td_list):
                data = {}
                name = th_list[i].find_element(By.TAG_NAME, 'span').text.lstrip().strip()
                value = td_list[i].find_element(By.TAG_NAME, 'span').get_attribute('title').lstrip().strip()
                data[f'{name}'] = value
                data_list.append(data)
        return data_list

    def getTotal(driver):
        li = driver.find_element(By.CLASS_NAME, 'ant-table-pagination').find_elements(By.TAG_NAME, 'li')[-2]
        total = li.find_element(By.TAG_NAME, 'a').text.lstrip().strip()
        return int(total)

    relationIds = ['915111122071504877', '915104113378239854']
    driver = baseCore.buildDriver()
    url = 'https://etax.sichuan.chinatax.gov.cn/yhs-web/cxzx/index.html?&id=122935&_=1571908297594#/qscx'
    driver.get(url)
    time.sleep(5)
    for relationId in relationIds:
        data_list = []
        inputID(driver, relationId)
        total = getTotal(driver)
        for i in range(total):
            data_list += getDataList(driver)

    time.sleep(5)
    driver.close()


# http://guizhou.chinatax.gov.cn/ 贵州
def guizhou_aj():
    def getPageSize(driver):
        total = driver.find_element(By.CLASS_NAME, 'layui-laypage-count').text
        total = int(re.findall('\d+', total)[0])
        if total % 10 == 0:
            pageSize = int(total / 10)
        else:
            pageSize = int(total / 10) + 1
        return pageSize

    def getHrefList(driver):
        href_list = []
        if '没有找到您搜索的结果，请重新输入关键字检索。' in driver.find_element(By.ID, 'idData').text:
            return href_list
        tr_list = driver.find_element(By.ID, 'idData').find_elements(By.TAG_NAME, 'tr')
        for tr in tr_list:
            href = tr.find_element(By.TAG_NAME, 'a').get_attribute('href')
            href_list.append(href)
        return href_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        tr_list = soup.select('.nr > .main-inner > table > tbody > tr > td > table > tbody > tr')
        del (tr_list[-1])
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    def inputID(driver, relationId):
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        driver.find_element(By.ID, 'search_cx').click()
        time.sleep(2)

    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'guizhou.chinatax.gov.cn',
        'Origin': 'https://guizhou.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://guizhou.chinatax.gov.cn/zdsswfsxajxxgbl/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['91520322MA6DNAW9X7', '91520900MA6E42GA1X']
    url = 'https://guizhou.chinatax.gov.cn/zdsswfsxajxxgbl/'
    driver = baseCore.buildDriver()
    driver.get(url)
    time.sleep(1)
    for relationId in relationIds:
        inputID(driver, relationId)
        data_list = []
        pageSize = getPageSize(driver)
        for i in range(pageSize):
            href_list = getHrefList(driver)
            if len(href_list) == 0:
                print(f'{relationId}===无数据')
                continue
            for href in href_list:
                data = getData(href)
                data_list.append(data)
            driver.find_element(By.CLASS_NAME, 'layui-laypage-next').click()
            time.sleep(2)
        print(data_list)
    driver.close()


# 反爬过不去
def guizhou_qs():
    def getData(driver):
        tr_list = driver.find_element(By.CLASS_NAME, 'layui-table-header').find_elements(By.TAG_NAME, 'tr')
        name_list = []
        for tr in tr_list:
            name = tr.find_element(By.TAG_NAME, 'span').text.lstrip().strip()
            name_list.append(name)
        tr_list = driver.find_element(By.CLASS_NAME, 'layui-table-body ').find_elements(By.TAG_NAME, 'tr')
        for tr in tr_list:
            pass

    def inputID(driver, relationId):
        driver.find_element(By.ID, 'yzmImg').click()
        time.sleep(1)
        code_img = driver.find_element(By.ID, 'yzmImg').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.ID, 'captcha').clear()
        driver.find_element(By.ID, 'nsrsbh').clear()
        driver.find_element(By.ID, 'captcha').send_keys(code)
        driver.find_element(By.ID, 'nsrsbh').send_keys(relationId)
        driver.find_element(By.ID, 'queryBtn').click()
        time.sleep(1)

    relationIds = ['91520322MA6DNAW9X7', '91520900MA6E42GA1X']
    url = 'https://etax.guizhou.chinatax.gov.cn/tycx-cjpt-web/view/sscx/gzcx/qsqycx/qsqycx.jsp'
    driver = baseCore.buildDriver()
    driver.get(url)
    time.sleep(1)
    for relationId in relationIds:
        inputID(driver, relationId)
        if '查无数据' in driver.find_element(By.CLASS_NAME, 'layui-table-body').text:
            print(f'{relationId}===无数据')
            continue
        getData(driver)
        time.sleep(2)

    driver.close()


# http://yunnan.chinatax.gov.cn/ 云南
def yunnan_aj():
    def getaList(relationId):
        url = 'https://yunnan.chinatax.gov.cn/bulletin/ajaxfors.jsp'
        data_post = {
            'tablename': 'jcms_41',
            'title': f'{relationId},',
            'key': '4,',
        }
        req = requests.post(url, headers=headers, data=data_post)
        req.encoding = req.apparent_encoding
        if 'dataStore = [];' in req.text:
            print(f'{relationId}===无数据')
            return []
        data_json = re.findall('\[ \"(.*?)\"\]\;', req.text)[0]
        soup = BeautifulSoup(data_json, 'lxml')
        a_list = soup.find_all('a')
        return a_list

    def getData(href):
        data = {}
        soup = getSoup(href, headers)
        scripts = soup.find_all('script')
        for script in scripts:
            script.decompose()
        tr_list = soup.find('table', class_='rightlistbox').find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/javascript, application/javascript, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'guizhou.chinatax.gov.cn',
        'Origin': 'https://yunnan.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://guizhou.chinatax.gov.cn/zdsswfsxajxxgbl/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'X-Requested-With': 'XMLHttpRequest'
    }
    relationIds = ['915301006836995340', '91530427557789213F', '915304275577892132']
    for relationId in relationIds:
        data_list = []
        a_list = getaList(relationId)
        if len(a_list) == 0:
            continue
        for a in a_list:
            href = a.get('href')
            data = getData(href)
            data_list.append(data)
        print(data_list)


def yunnan_qs():
    def inputID(driver, relationId):
        driver.find_element(By.ID, 'yzmImg').click()
        time.sleep(1)
        code_img = driver.find_element(By.ID, 'yzmImg').screenshot_as_png
        code = getCode(code_img)
        driver.find_element(By.ID, 'vcode').clear()
        driver.find_element(By.ID, 'nsrxx').clear()
        driver.find_element(By.ID, 'vcode').send_keys(code)
        driver.find_element(By.ID, 'nsrxx').send_keys(relationId)
        driver.find_element(By.CLASS_NAME, 'table-noborder').find_element(By.ID, 'check').click()

    driver = baseCore.buildDriver()
    url = 'https://etax.yunnan.chinatax.gov.cn/zjgfdacx/sscx/qsggxxcx/qsggxxcx.html'
    driver.get(url)
    time.sleep(1)
    relationIds = ['915301006836995340', '91530427557789213F', '915304275577892132']
    for relationId in relationIds:
        inputID(driver, relationId)
        time.sleep(2)
        try:
            flg = driver.find_element(By.CLASS_NAME, 'zj-alert').text
            driver.find_element(By.CLASS_NAME, 'confirm').click()
            if '未查询到相关信息！' in flg:
                print(f'{relationId}===无数据')
                time.sleep(1)
                continue
        except:
            pass
        time.sleep(1)
        break
    time.sleep(10)
    driver.close()


# https://xizang.chinatax.gov.cn/ 西藏
def xizang_aj():
    def getTotal(relationId):
        url = f'https://xizang.chinatax.gov.cn/module/search/index.jsp?field=vc_name:1,field_406:1,field_407:1,field_408:1&i_columnid=style_63&vc_name=&field_406={relationId}&field_407=&field_408=&currpage=1'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = soup.find('div', class_='digg').find_all('span')[-1].text
        total = int(re.findall('共\s+(\d+)\s+页', total)[0])
        return total

    def getHrefList(relationId, page):
        href_list = []
        url = f'https://xizang.chinatax.gov.cn/module/search/index.jsp?field=vc_name:1,field_406:1,field_407:1,field_408:1&i_columnid=style_63&vc_name=&field_406={relationId}&field_407=&field_408=&currpage={page}'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.find_all('tr', class_='form-list')
        for tr in tr_list:
            href = tr.find('a').get('href')
            if 'http' not in href:
                href = href.replace('../../', 'https://xizang.chinatax.gov.cn/')
            href_list.append(href)
        return href_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        div = soup.find('div', attrs={'id': 'zoom'})
        data['纳税人识别号'] = re.findall('<!--<\$\[纳税人识别号]>begin-->(.*?)<!--<\$\[纳税人识别号]>end-->', str(div))[0]
        data['法人信息'] = \
            re.findall('<!--<\$\[法定代表人姓名性别及身份证号码]>begin-->(.*?)<!--<\$\[法定代表人姓名性别及身份证号码]>end-->', str(div))[0]
        data['财务代表人信息'] = re.findall('<!--<\$\[财务负责人身份证号]>begin-->(.*?)<!--<\$\[财务负责人身份证号]>end-->', str(div))[0]
        data['组织机构代码'] = re.findall('<!--<\$\[组织机构代码]>begin-->(.*?)<!--<\$\[组织机构代码]>end-->', str(div))[0]
        data['违法案件性质'] = re.findall('违法案件性质：(.*?)<!--违法案件性质-->', str(div))[0]
        data['注册地址'] = re.findall('<!--<\$\[注册地址]>begin-->(.*?)<!--<\$\[注册地址]>end-->', str(div))[0]
        data['主要违法事实和相关法律依据'] = ''
        p_list = div.find_all('p')
        for p in p_list:
            data['主要违法事实和相关法律依据'] = (data['主要违法事实和相关法律依据'] + '\n' + p.text.lstrip().strip()).lstrip().strip()
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Host': 'xizang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://xizang.chinatax.gov.cn/col/col2371/index.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'iframe',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
    }
    relationIds = ['91540100MA6T10PD5D', '915403233213258937', '915403233213258939']
    for relationId in relationIds:
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
            continue
        data_list = []
        for page in range(1, total + 1):
            href_list = getHrefList(relationId, page)
            for href in href_list:
                data = getData(href)
                data_list.append(data)
        print(data_list)


def xizang_qs():
    def getTotal(type, relationId):
        url = type[0]
        url = url.replace(f'={type[1]}', f'={relationId}')
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = soup.find('div', class_='digg').find_all('span')[-1].text
        total = int(re.findall('共\s+(\d+)\s+页', total)[0])
        return total

    def getDataList(type, relationId, page):
        data_list = []
        url = type[0]
        url = url.replace(f'={type[1]}', f'={relationId}').replace('currpage=1', f'currpage={page}')
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        name_list = []
        tr_name = soup.select('body > div > table.result-form > tr')[0]
        td_list = tr_name.find_all('td')
        for td in td_list:
            name = td.text.lstrip().strip()
            name_list.append(name)
        tr_list = soup.select('body > div > table:nth-of-type(2) > tr')
        for tr in tr_list:
            data = {}
            td_list = tr.select('td > table > tr > td')
            for i in range(len(td_list)):
                value = td_list[i].text.lstrip().strip()
                data[f'{name_list[i]}'] = value
            if data:
                data_list.append(data)
        return data_list

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Host': 'xizang.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'https://xizang.chinatax.gov.cn/col/col2371/index.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Sec-Ch-Ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'iframe',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
    }
    relationIds = ['91540200MA6TFC9R7Q', '915403233213258937', '915403233213258939']
    types = [
        [
            'https://xizang.chinatax.gov.cn/module/search/index.jsp?field=field_2678:1,field_2679:1,field_2680:1&i_columnid=style_119&field_2678=&field_2679=field_2679&field_2680=&currpage=1',
            'field_2679'],
        [
            'https://xizang.chinatax.gov.cn/module/search/index.jsp?field=field_1764:1,field_1766:1,field_1767:1&i_columnid=style_102&field_1764=&field_1766=field_1766&field_1767=&currpage=1',
            'field_1766']]
    for relationId in relationIds:
        for type in types:
            total = getTotal(type, relationId)
            data_list = []
            for page in range(1, total + 1):
                data_list += getDataList(type, relationId, page)
            print(len(data_list))
            if len(data_list) == 0:
                print(f'{relationId}===无数据')
                continue
            print(data_list)


# https://shaanxi.chinatax.gov.cn/ 陕西
# 信息页内容获取不到
def shan_xi_aj():
    def getTotal(relationId):
        url = f'http://shaanxi.chinatax.gov.cn/module/search/index.jsp?field=field_2166:1:0,field_2213:1:0,field_1656:1:0,field_2391:1:0,field_2410:12:0,field_1651:12:0,field_1652:1:0,field_2390:1:0,field_1672:12:0,field_1670:12:0,field_1653:1:0,field_1663:1:0&i_columnid=style_3&field_2166=&field_2213=&field_1656={relationId}&field_2391=&field_1652=&field_2390=&field_1653=&field_1663=&field_2410=&field_1651=&field_1672=&field_1670=&currpage=1'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = soup.find('div', class_='digg').find_all('span')[-1].text
        total = int(re.findall('共\s+(\d+)\s+页', total)[0])
        return total

    def getHrefList(relationId, page):
        href_list = []
        url = f'http://shaanxi.chinatax.gov.cn/module/search/index.jsp?field=field_2166:1:0,field_2213:1:0,field_1656:1:0,field_2391:1:0,field_2410:12:0,field_1651:12:0,field_1652:1:0,field_2390:1:0,field_1672:12:0,field_1670:12:0,field_1653:1:0,field_1663:1:0&i_columnid=style_3&field_2166=&field_2213=&field_1656={relationId}&field_2391=&field_1652=&field_2390=&field_1653=&field_1663=&field_2410=&field_1651=&field_1672=&field_1670=&currpage={page}'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.find('ul', class_='chaxun_list').find('table').find_all('tr')
        del (tr_list[-1])
        for tr in tr_list:
            href = tr.find('a').get('href')
            if 'http' not in href:
                href = href.replace('../../', 'http://shaanxi.chinatax.gov.cn/')
            href_list.append(href)
        return href_list

    def getData(url):
        data = {}
        req = requests.get(url)
        req.encoding = req.apparent_encoding
        print(req.text)
        # tr_list = soup.select('#barrierfree_container > .content > .contentB > table > tr ')
        # print(tr_list)
        # for tr in tr_list:
        #     print(tr.text)
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Host': 'shaanxi.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'http://shaanxi.chinatax.gov.cn/col/col15616/index.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Upgrade-Insecure-Requests': '1',
    }
    relationIds = ['91610602MA7EG3DW2W', '915403233213258937', '915403233213258939']
    for relationId in relationIds:
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
        for page in range(1, total + 1):
            href_list = getHrefList(relationId, page)
            for href in href_list:
                data = getData(href)
        break


# http://gansu.chinatax.gov.cn/ 甘肃
def gansu_aj():
    def getTotal(relationId):
        url = f'http://gansu.chinatax.gov.cn/module/search/index.jsp?field=field_849:1:1,field_850:1:1,field_851:1:1,field_852:1:1,field_857:1:1,field_860:1:1,field_867:1:1,field_868:1:1,field_855:1:1,field_866:1:1,field_865:1:1,field_856:1:1&i_columnid=8350&field_849=&field_850={relationId}&field_851=&field_852=&field_857=&field_860=&field_867=&field_868=&field_855=&field_866=&field_865=&field_856=&currpage=1'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total = soup.find('div', class_='digg').find_all('span')[-1].text
        total = int(re.findall('共\s+(\d+)\s+页', total)[0])
        return total

    def getHrefList(relationId, page):
        href_list = []
        url = f'http://gansu.chinatax.gov.cn/module/search/index.jsp?field=field_849:1:1,field_850:1:1,field_851:1:1,field_852:1:1,field_857:1:1,field_860:1:1,field_867:1:1,field_868:1:1,field_855:1:1,field_866:1:1,field_865:1:1,field_856:1:1&i_columnid=8350&field_849=&field_850={relationId}&field_851=&field_852=&field_857=&field_860=&field_867=&field_868=&field_855=&field_866=&field_865=&field_856=&currpage={page}'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        tr_list = soup.select('body > table:nth-of-type(2) > tr:nth-of-type(1) > td > table > tr')
        for tr in tr_list:
            href = tr.find('a').get('href')
            if 'http' not in href:
                href = href.replace('../../', 'http://gansu.chinatax.gov.cn/')
            href_list.append(href)
        return href_list

    def getData(url):
        data = {}
        soup = getSoup(url, headers)
        tr_list = soup.find('div', class_='main').find('table').find_all('tr')
        for tr in tr_list:
            td_list = tr.find_all('td')
            name = td_list[0].text.lstrip().strip()
            value = td_list[1].text.lstrip().strip()
            data[f'{name}'] = value
        return data

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'no-cache',
        'Host': 'gansu.chinatax.gov.cn',
        'Pragma': 'no-cache',
        'Refer': 'http://gansu.chinatax.gov.cn/col/col8350/index.html',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76',
        'Upgrade-Insecure-Requests': '1',
    }
    relationIds = ['91620500MA72Q7U46X', '915403233213258937']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
        for page in range(1, total + 1):
            href_list = getHrefList(relationId, page)
            for href in href_list:
                data = getData(href)
                data_list.append(data)
                print(data)


# http://qinghai.chinatax.gov.cn/ 青海
# http://ningxia.chinatax.gov.cn/ 宁夏
# https://xinjiang.chinatax.gov.cn/ 新疆
# http://dalian.chinatax.gov.cn/ 大连
# http://ningbo.chinatax.gov.cn/ 宁波
# http://xiamen.chinatax.gov.cn/ 厦门
# http://qingdao.chinatax.gov.cn/ 青岛

def qingdao_aj():
    def getTotal(relationId):
        url = f'http://qingdao.chinatax.gov.cn/search/was5/web/search?channelid=223090&searchword=(nsrsbh={relationId})'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        total_text = soup.find('div',class_='waspage').text
        pattern = r'\d+'
        matches = re.findall(pattern, total_text)
        total = int(matches[0])
        return total

    def getHrefList(relationId, page):
        href_list = []
        url = f'http://qingdao.chinatax.gov.cn/search/was5/web/search?channelid=223090&searchword=(nsrsbh={relationId})'
        req = requests.get(url, headers=headers)
        req.encoding = req.apparent_encoding
        soup = BeautifulSoup(req.text, 'lxml')
        detail_hreflist = soup.find('tbody').find_all('tr')
        for tr in detail_hreflist:
            detail_href = tr.find('a')['href']
            href_list.append(detail_href)
        return href_list

    def getData(href):
        data = {}
        soup = getSoup(href, headers)
        tr_list = soup.find('tbody').find_all('tr')
        for tr in tr_list:
            name = tr.find_all('td')[0].text.strip()
            value = tr.find_all('td')[1].text.strip()
            data[f'{name}'] = value
        return data


    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Host': 'qingdao.chinatax.gov.cn',
        'Referer': 'http://qingdao.chinatax.gov.cn/resourcedata/sswf2019/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'
    }
    relationIds = ['91370203MA3CK88L7K']
    for relationId in relationIds:
        data_list = []
        total = getTotal(relationId)
        if total == 0:
            print(f'{relationId}===无数据')
        for page in range(1, total + 1):
            href_list = getHrefList(relationId, page)
            for href in href_list:
                data = getData(href)
                data_list.append(data)
                print(data)

import ddddocr
from PIL import Image


def use_ocr(img):
    ocr = ddddocr.DdddOcr()
    with open(img, 'rb') as f:
        image = f.read()
    res = ocr.classification(image)
    print(res)
    return res

def screenshot_by_element(driver, ele, screen_img_path, out_img_path):
    driver.get_screenshot_as_file(screen_img_path)
    x = ele.location['x']
    y = ele.location['y']
    width = x + ele.size['width']
    height = y + ele.size['height']
    img = Image.open(screen_img_path)
    img = img.crop((x, y, width, height))
    print(x, y, width, height)
    img.save(out_img_path)

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def createDriver():
    chrome_driver = r'D:\cmd100\chromedriver.exe'
    path = Service(chrome_driver)
    chrome_options = webdriver.ChromeOptions()
    chrome_options.binary_location = r'D:\Google\Chrome\Application\chrome.exe'
    # 设置代理
    # proxy = "127.0.0.1:8080"  # 代理地址和端口
    # chrome_options.add_argument('--proxy-server=http://' + proxy)
    driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
    return driver

def qingdao_qs():
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': 'info.qingdao.chinatax.gov.cn',
        'If-Modified-Since': 'Thu, 12 May 2022 00:57:42 GMT',
        'If-None-Match': '"8654-5dec60b896d80"',
        'Referer': 'http://qingdao.chinatax.gov.cn/',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'cross-site',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"'
    }
    requests.DEFAULT_RETRIES = 5
    time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    log.info(f'开始时间为：{time_start}')

    url = "https://info.qingdao.chinatax.gov.cn/webPortals/page/qsgg.html"
    browser = createDriver()
    try:
        browser.get(url)
        time.sleep(2)
    except Exception as e:
        log.info(e)
    relationIDS = ['370282F68040630']
    for relationID in relationIDS:
        screen_img_path = "D:/screen/aa.png"
        out_img_path = "D:/out/aa.png"
        # div-left
        browser.find_element(By.ID, 'ipt_nsrsbh').send_keys(relationID)

        browser.find_element(By.ID, 'qyhdwDate').click()
        browser.find_element(By.ID, 'qyhdwDate').send_keys('2023-07-01 - 2023-10-01')
        #点击一下验证码 ipt_xm
        browser.find_element(By.ID, 'ipt_xm').click()
        page_source = browser.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        img_url = soup.find('img',class_='yzm-img')['src']
        print(img_url)
        # req_img = requests.get(img_url,headers)
        browser_img = createDriver()
        browser_img.get(img_url)
        ele = browser_img.find_element(By.TAG_NAME, 'img')
        screenshot_by_element(browser_img, ele, screen_img_path, out_img_path)
        code = use_ocr(out_img_path)
        print(code)
        browser.find_element(By.ID, 'ipt_xm').send_keys(code)
        browser.find_element(By.ID, 'qyhdwDate').click()
        # layui-laydate1
        # //*[@id="layui-laydate1"]/div[3]/div/span[2]
        aa = browser.find_element(By.ID,'layui-laydate1')
        aa.find_element(By.CLASS_NAME, "laydate-btns-confirm").click()

        browser.find_element(By.CLASS_NAME,'btnstyle').click()

        #https://info.qingdao.chinatax.gov.cn/webPortals/page/qyHtml.html

        # browser.get('https://info.qingdao.chinatax.gov.cn/webPortals/page/qyHtml.html')
        detail_html = browser.page_source
        detail_soup = BeautifulSoup(detail_html, 'html.parser')
        print(detail_soup)
        content = soup.find('div',id='htmlStr').text
        if content=='根据输入内容查询，无欠税公告信息。':
            log.info(f'======{relationID}，无欠税公告信息。')
            continue
        detail_data = detail_soup.find('div', id='htmlStr')
        table_list = detail_data.find_all('table',class_='pc_jzxx')
        for table in table_list:
            rows = table.find_all('tr')
            dic = {}
            for row in rows:
                cells = row.find_all('td')
                if len(cells) == 2:
                    key = cells[0].text.strip()
                    value = cells[1].text.strip()
                    dic[key] = value

            print(dic)


# https://shenzhen.chinatax.gov.cn/ 深圳



def shenzhen_aj():
    #使用模拟浏览器 打开404

    requests.DEFAULT_RETRIES = 5
    time_start = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    log.info(f'开始时间为：{time_start}')
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Encoding': ' gzip, deflate, br',
        'Accept-Language': ' zh-CN,zh;q=0.9',
        'Connection': ' keep-alive',
        'Host': ' shenzhen.chinatax.gov.cn',
        'Referer': ' https://shenzhen.chinatax.gov.cn/sztaxapp/zdsswfaj/index',
        'Sec-Fetch-Dest': ' iframe',
        'Sec-Fetch-Mode': ' navigate',
        'Sec-Fetch-Site': ' same-origin',
        'Sec-Fetch-User': ' ?1',
        'Upgrade-Insecure-Requests': ' 1',
        'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'sec-ch-ua': ' "Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
        'sec-ch-ua-mobile': ' ?0',
        'sec-ch-ua-platform': ' "Windows"'
    }
    url = "https://shenzhen.chinatax.gov.cn/sztaxapp/zdsswfaj/"
    browser = createDriver()
    try:
        browser.get(url)
        time.sleep(2)
    except Exception as e:
        log.info(e)
    relationIds = ['91440300MA5FTCG883']
    # for relationId in relationIds:
    #     data_list = []
    #     total = getTotal(relationId)
    # pass



if __name__ == "__main__":
    pass
    # beijing_aj()
    # beijing_qs()
    # hebei_aj()
    # hebei_qs()
    # liaoning_aj()
    # jilin_qs()
    # zhejiang_aj()
    # qingdao_aj()
    qingdao_qs()
    baseCore.close()
