新三板企业企查查id获取

1a93992c · 薛凌堃 · ea2b7efb · 1a93992c · 1a93992c · 1a93992c
--- a/comData/dfcfwGpdm/NQenterprise/NQgetid.py
+++ b/comData/dfcfwGpdm/NQenterprise/NQgetid.py
+# -*- coding: utf-8 -*-
+import pandas as pd
+
+import time
+
+import requests
+import json
+
+from kafka import KafkaProducer
+from base.BaseCore import BaseCore
+from getQccId import find_id_by_name
+
+baseCore = BaseCore()
+cnx_ = baseCore.cnx
+cursor_ = baseCore.cursor
+log = baseCore.getLogger()
+
+# 通过企查查id获取企业基本信息
+def info_by_id(com_id,com_name,gpdm):
+    aa_dict_list = []
+
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+
+    com_jc_name = ''
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        log.info(com_name + ":获取失败===========重新放入redis")
+        baseCore.rePutIntoR('EnterpriseIpo:nq_gpdm',gpdm)
+        return aa_dict_list
+
+    company_name = result_dict['Name']
+    CreditCode = result_dict['CreditCode']
+    if CreditCode is None:
+        CreditCode = ''
+    try:
+        OperName = result_dict['Oper']['Name']
+    except:
+        OperName = ''
+    if OperName is None:
+        OperName = ''
+    if baseCore.str_have_num(OperName):
+        OperName = ''
+    try:
+        Status = result_dict['ShortStatus']
+    except:
+        Status = ''
+    if Status is None:
+        Status = ''
+    try:
+        StartDate = result_dict['StartDate']
+    except:
+        StartDate = ''
+    if StartDate is None:
+        StartDate = ''
+    try:
+        RegistCapi = result_dict['RegistCapi']
+    except:
+        RegistCapi = ''
+    if RegistCapi is None:
+        RegistCapi = ''
+    RecCap = ''  # result_dict['RecCap']  #实际缴纳金额，现已没有显示
+    if RecCap is None:
+        RecCap = ''
+
+    try:
+        OrgNo = result_dict['CreditCode'][8:-2] + '-' + result_dict['CreditCode'][-2]  # 组织机构代码，现已没有显示
+    except:
+        OrgNo = ''
+    if OrgNo is None:
+        OrgNo = ''
+
+    try:
+        TaxNo = result_dict['TaxNo']
+    except:
+        TaxNo = ''
+    if TaxNo is None:
+        TaxNo = ''
+    try:
+        EconKind = result_dict['EconKind']
+    except:
+        EconKind = ''
+    if EconKind is None:
+        EconKind = ''
+    TermStart = ''  # result_dict['TermStart']  营业期限自，现已没有显示
+    if TermStart is None:
+        TermStart = ''
+    TeamEnd = ''  # result_dict['TeamEnd']营业期限至，现已没有显示
+    if TeamEnd is None:
+        TeamEnd = ''
+
+    try:
+        SubIndustry = result_dict['Industry']['SubIndustry']
+    except:
+        SubIndustry = ''
+    if SubIndustry is None:
+        SubIndustry = ''
+    try:
+        Province = result_dict['Area']['Province']
+    except:
+        Province = ''
+    try:
+        City = result_dict['Area']['City']
+    except:
+        City = ''
+    try:
+        County = result_dict['Area']['County']
+    except:
+        County = ''
+    try:
+        region = Province + City + County
+    except:
+        region = ''
+    BelongOrg = ''  # result_dict['BelongOrg']登记机关，现已没有显示
+    can_bao = ''
+    CommonList = []  # result_dict['CommonList']参保人数，现已没有显示
+    for Common_dict in CommonList:
+        try:
+            KeyDesc = Common_dict['KeyDesc']
+        except:
+            continue
+        if KeyDesc == '参保人数':
+            can_bao = Common_dict['Value']
+    if can_bao == '0':
+        can_bao = ''
+    OriginalName = ''
+    try:
+        OriginalName_lists = result_dict['OriginalName']
+        for OriginalName_dict in OriginalName_lists:
+            OriginalName += OriginalName_dict['Name'] + ' '
+    except:
+        OriginalName = ''
+    try:
+        OriginalName.strip()
+    except:
+        OriginalName = ''
+    EnglishName = ''  # result_dict['EnglishName']企业英文名，现已没有显示
+    if EnglishName is None:
+        EnglishName = ''
+    IxCode = ''  # result_dict['IxCode']进出口企业代码，现已没有显示
+    if IxCode is None:
+        IxCode = ''
+    Address = result_dict['Address']
+    if Address is None:
+        Address = ''
+    Scope = ''  # result_dict['Scope']经营范围，现已没有显示
+    if Scope is None:
+        Scope = ''
+    try:
+        PhoneNumber = result_dict['companyExtendInfo']['Tel']
+    except:
+        PhoneNumber = ''
+    if PhoneNumber is None:
+        PhoneNumber = ''
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    try:
+        Email = result_dict['companyExtendInfo']['Email']
+    except:
+        Email = ''
+    if Email is None:
+        Email = ''
+    try:
+        Desc = result_dict['companyExtendInfo']['Desc']
+    except:
+        Desc = ''
+    if Desc is None:
+        Desc = ''
+    try:
+        Info = result_dict['companyExtendInfo']['Info']
+    except:
+        Info = ''
+    if Info is None:
+        Info = ''
+
+    company_name = baseCore.hant_2_hans(company_name)
+
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v6/base/getEntDetail?token={}&t={}&unique={}".format(token, t,
+                                                                                                         com_id)
+    resp_dict2 = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(1)
+
+    try:
+        com2 = resp_dict2['result']['Company']
+    except:
+        com2 = ''
+    try:
+        Scope = com2['Scope']
+    except:
+        Scope = ''
+    try:
+        CheckDate = com2['CheckDate']
+    except:
+        CheckDate = ''
+    if CheckDate is None:
+        CheckDate = ''
+    try:
+        TaxpayerType = com2['TaxpayerType']     #纳税人资质
+    except:
+        TaxpayerType = ''
+    if TaxpayerType is None:
+        TaxpayerType = ''
+
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    if No is None:
+        No = ''
+    try:
+        IxCode = com2['IxCode']
+    except:
+        IxCode = ''
+    try:
+        OrgNo = com2['OrgNo']
+    except:
+        OrgNo = ''
+    try:
+        for Common_t in com2['CommonList']:
+            try:
+                if Common_t['KeyDesc'] == '参保人数':
+                    can_bao = Common_t['Value']
+            except:
+                pass
+    except:
+        can_bao = ''
+    try:
+        TermStart = com2['TermStart']
+    except:
+        TermStart = ''
+    try:
+        TeamEnd = com2['TeamEnd']
+    except:
+        TeamEnd = ''
+    try:
+        RecCap = com2['RecCap']
+    except:
+        RecCap = ''
+    try:
+        No = com2['No']
+    except:
+        No = ''
+    try:
+        SubIndustry = com2['IndustryArray'][-1]
+    except:
+        SubIndustry = ''
+    try:
+        BelongOrg = com2['BelongOrg']
+    except:
+        BelongOrg = ''
+    try:
+        EnglishName = com2['EnglishName']
+    except:
+        EnglishName = ''
+
+    aa_dict = {
+        'qccId': com_id,  # 企查查企业id
+        'name': company_name,  # 企业名称
+        'shortName': com_jc_name,  # 企业简称
+        'socialCreditCode': CreditCode,  # 统一社会信用代码
+        'legalPerson': OperName,  # 法定代表人
+        'officialPhone': PhoneNumber,  # 电话
+        'officialUrl': WebSite,  # 官网
+        'officialEmail': Email,  # 邮箱
+        'briefInfo': Desc,  # 简介
+        'registerStatus': Status,  # 登记状态
+        'incorporationDate': StartDate,  # 成立日期
+        'capital': RegistCapi,  # 注册资本
+        'paidCapital': RecCap,  # 实缴资本
+        'approvalDate': CheckDate,  # 核准日期
+        'organizationCode': OrgNo,  # 组织机构代码
+        'registerNo': No,  # 工商注册号
+        'taxpayerNo': CreditCode,  # 纳税人识别号
+        'type': EconKind,  # 企业类型
+        'businessStartDate': TermStart,  # 营业期限自
+        'businessEndDate': TeamEnd,  # 营业期限至
+        'taxpayerQualification': TaxpayerType,  # 纳税人资质
+        'industry': SubIndustry,  # 所属行业
+        'region': region,
+        'province': Province,  # 所属省
+        'city': City,  # 所属市
+        'county': County,  # 所属县
+        'registerDepartment': BelongOrg,  # 登记机关
+        'scale': Info,  # 人员规模
+        'insured': can_bao,  # 参保人数
+        'beforeName': OriginalName,  # 曾用名
+        'englishName': EnglishName,  # 英文名
+        'importExportEnterpriseCode': IxCode,  # 进出口企业代码
+        'address': Address,  # 地址
+        'businessRange': Scope,  # 经营范围
+        'status': 0,  # 状态
+    }
+
+    aa_dict_list.append(aa_dict)
+    print(company_name + "：爬取完成")
+    return aa_dict_list
+
+
+if __name__ == '__main__':
+    taskType = '基本信息/企查查'
+    headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-Timestamp': '',
+        'Qcc-Version': '1.0.0',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+        'content-type': 'application/json',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br,'
+    }
+
+    #从redis里拿数据
+    while True:
+        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
+        token = baseCore.GetToken()
+        list_weicha = []
+        list_all_info = []
+        name_list = []
+        start_time = time.time()
+        # 获取企业信息
+        com_code = baseCore.redicPullData('EnterpriseIpoqccid:nq_gpdm')
+        com_code = com_code + '.NQ'
+
+        company_id = find_id_by_name(start_time,token,com_code)
+
+        if not company_id:
+            log.info(com_code + "：企业ID获取失败===重新放入redis")
+            list_weicha.append(com_code + "：企业ID获取失败")
+            baseCore.rePutIntoR('EnterpriseIpoqccid:nq_gpdm',com_code)
+            log.info('-----已重新放入redis-----')
+            time.sleep(20)
+            continue
+        else:
+            log.info(f'====={com_code}===={company_id}=====获取企业id成功=====')
+            # todo:企查查id写入gpdm表中
+            updateSql = f"update gpdm set QCCID = '{company_id}' where gpdm = '{com_code}'"
+            cursor_.execute(updateSql)
+            cnx_.commit()
+        # try:
+        #     post_data_list = info_by_id(company_id, '',com_code)
+        # except:
+        #     log.info(f'====={com_code}=====获取基本信息失败，重新放入redis=====')
+        #     baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_social_code', com_code)
+        #     continue
+        # if post_data_list:
+        #     pass
+        # else:
+        #     log.info(f'======{com_code}====企查查token失效====')
+        #     time.sleep(20)
+        #     continue
+        # for post_data in post_data_list:
+        #     list_all_info.append(post_data)
+        #     if post_data is None:
+        #         print(com_code + "：企业信息获取失败")
+        #         list_weicha.append(com_code + "：企业信息获取失败")
+        #         continue
+        #     get_name = post_data['name']
+        #     get_socialcode = post_data['socialCreditCode']
+        #     name_compile = {
+        #         'yuan_name':com_code,
+        #         'get_name':get_name
+        #     }
+        #     name_list.append(name_compile)
+        #
+        #     log.info(f'采集{com_code}成功=======耗时{baseCore.getTimeCost(start_time,time.time())}')
+        #     try:
+        #         producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
+        #         kafka_result = producer.send("regionInfo", json.dumps(post_data, ensure_ascii=False).encode('utf8'))
+        #         print(kafka_result.get(timeout=10))
+        #     except:
+        #         exception = 'kafka传输失败'
+        #         state = 0
+        #         takeTime = baseCore.getTimeCost(start_time, time.time())
+        #         baseCore.recordLog(get_socialcode, taskType, state, takeTime, '', exception)
+        #         log.info(f"{get_name}--{get_socialcode}--kafka传输失败")
+        # # 信息采集完成后将该企业的采集次数更新
+
+    # nowtime = baseCore.getNowTime(1).replace('-','_')[:10]
+    # companyName = pd.DataFrame(name_list)
+    # companyName.to_excel(f'./data/企业名称对比_{nowtime}.xlsx',index=False)
+    # false_com = pd.DataFrame(list_weicha)
+    # false_com.to_excel(f'./data/采集失败企业名单_{nowtime}.xlsx',index=False)
+
+
+
+
+
+
--- a/comData/dfcfwGpdm/NQenterprise/getQccId.py
+++ b/comData/dfcfwGpdm/NQenterprise/getQccId.py
+
+# -*- coding: utf-8 -*-
+
+import time
+from urllib.parse import quote
+import requests
+import urllib3
+from base.BaseCore import BaseCore
+
+baseCore = BaseCore()
+log = baseCore.getLogger()
+headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-Timestamp': '',
+        'Qcc-Version': '1.0.0',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+        'content-type': 'application/json',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br,'
+    }
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(start,token,name):
+    urllib3.disable_warnings()
+
+    qcc_key = name
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except:
+            print('重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    #{'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频，请升级小程序版本'}
+    if resp_dict['status']==40101:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    if resp_dict['status']==401:
+        KeyNo = False
+        log.info(f'=======您的账号访问超频，请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    try:
+        if resp_dict['result']['Result']:
+            result_dict = resp_dict['result']['Result'][0]
+            KeyNo = result_dict['KeyNo']
+            Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+            if Name == '':
+                KeyNo = ''
+        else:
+            KeyNo = ''
+    except:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start,time.time())}')
+        return KeyNo
+
+    print("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
\ No newline at end of file
--- a/comData/weixin_solo/wxList.py
+++ b/comData/weixin_solo/wxList.py
@@ -262,7 +262,8 @@ if __name__=="__main__":
            log.info("redis已经没有数据了，重新放置数据")
            getFromSql()
            time.sleep(60)
-            infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
+            continue
+            # infoSourceCode = baseCore.redicPullData('WeiXinGZH:infoSourceCode')
        getWxList(infoSourceCode)

    # infoSourceCode = 'IN-20220917-0159'