获取企查查id

903604f5 · 薛凌堃 · c1e02a33 · 903604f5
--- a/comData/BaseInfo_qcc/base_info_1101.py
+++ b/comData/BaseInfo_qcc/base_info_1101.py
+# -*- coding: utf-8 -*-
+import time
+from urllib.parse import quote
+import requests
+import json
+import urllib3
+from kafka import KafkaProducer
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+cnx_ = baseCore.cnx
+cursor_ = baseCore.cursor
+# 通过企业名称或信用代码获取企查查id
+def find_id_by_name(start,token,social_code):
+    urllib3.disable_warnings()
+    qcc_key = social_code
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = f"https://xcx.qcc.com/mp-weixin/forwardApp/v3/base/advancedSearch?token={token}&t={t}&pageIndex=1&needGroup=yes&insuredCntStart=&insuredCntEnd=&startDateBegin=&startDateEnd=&registCapiBegin=&registCapiEnd=&countyCode=&province=&sortField=&isSortAsc=&searchKey={quote(qcc_key)}&searchIndex=default&industryV3="
+    for lll in range(1, 6):
+        try:
+            resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+            break
+        except:
+            print('重试')
+            time.sleep(5)
+            continue
+    time.sleep(2)
+    # {'status': 40101, 'message': '无效的sessionToken!'} {'status': 401, 'message': '您的账号访问超频，请升级小程序版本'}
+    if resp_dict['status'] == 40101:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    if resp_dict['status'] == 401:
+        KeyNo = False
+        log.info(f'=======您的账号访问超频，请升级小程序版本=====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    if resp_dict['status'] == 40102:
+        KeyNo = False
+        log.info(f'=======无效的session=====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    try:
+        if resp_dict['result']['Result']:
+            result_dict = resp_dict['result']['Result'][0]
+            KeyNo = result_dict['KeyNo']
+            Name = result_dict['Name'].replace('<em>', '').replace('</em>', '').strip()
+            if Name == '':
+                KeyNo = 'null'
+        else:
+            KeyNo = 'null'
+    except:
+        KeyNo = False
+        log.info(f'====token失效====时间{baseCore.getTimeCost(start, time.time())}')
+        return KeyNo
+    print("{}，企业代码为:{}".format(qcc_key, KeyNo))
+    return KeyNo
+# 判断字符串里是否含数字
+def str_have_num(str_num):
+    panduan = False
+    for str_1 in str_num:
+        ppp = str_1.isdigit()
+        if ppp:
+            panduan = ppp
+    return panduan
+# 通过企查查id获取企业官网
+def info_by_id(com_id,com_name):
+    aa_dict_list = []
+    t = str(int(time.time()) * 1000)
+    headers['Qcc-Timestamp'] = t
+    url = "https://xcx.qcc.com/mp-weixin/forwardApp/v1/ent/detail?token={}&t={}&unique={}".format(token, t, com_id)
+    resp_dict = requests.get(url=url, headers=headers, verify=False).json()
+    time.sleep(2)
+    try:
+        result_dict = resp_dict['result']['Company']
+    except:
+        print(com_name + ":获取失败")
+    try:
+        WebSite = result_dict['companyExtendInfo']['WebSite']
+    except:
+        WebSite = None
+    if WebSite is None:
+        try:
+            WebSite = result_dict['ContactInfo']['WebSite'][0]['Url']
+        except:
+            WebSite = ''
+    print(com_name + "：爬取完成")
+    return WebSite
+if __name__ == '__main__':
+    taskType = '基本信息/企查查/id'
+    headers = {
+        'Host': 'xcx.qcc.com',
+        'Connection': 'keep-alive',
+        'Qcc-Platform': 'mp-weixin',
+        'Qcc-Timestamp': '',
+        'Qcc-Version': '1.0.0',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat',
+        'content-type': 'application/json',
+        'Referer': 'https://servicewechat.com/wx395200814fcd7599/166/page-frame.html',
+        'Accept-Encoding': 'gzip, deflate, br,'
+    }
+    name_list = []
+    #从redis里拿数据
+    while True:
+        # TODO:需要隔两个小时左右抓包修改,token从数据库中获得
+        token = baseCore.GetToken()
+        if token:
+            pass
+        else:
+            log.info('==========已无token==========')
+            time.sleep(30)
+            continue
+        start_time = time.time()
+        # 获取企业信息
+        social_code = baseCore.redicPullData('BaseInfoEnterprise:gnqy_socialCode')
+        # social_code = '91130800757548430L'
+        if social_code == '' or social_code is None:
+            time.sleep(20)
+            continue
+        if '搜索不到' in social_code:
+            continue
+        else:
+            pass
+        dic_info = baseCore.getInfomation(social_code)
+        log.info(f'----当前企业{social_code}--开始处理---')
+        count = dic_info[14]
+        com_name = dic_info[1]
+        social_code = dic_info[2]
+        #企查查id
+        company_id = dic_info[12]
+        #如果没有信用代码 就通过名字搜索 如果有信用代码 就通过信用代码
+        if company_id == '' or company_id == None:
+            if social_code:
+                company_id = find_id_by_name(start_time,token,social_code)
+            else:
+                company_id = find_id_by_name(start_time,token,com_name)
+            if company_id == 'null':
+                log.info('=====搜索不到该企业====')
+                #todo:搜不到的企业没有信用代码 传输不过去 生成一个信用代码
+                baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_socialCode', social_code + '：搜索不到')
+                continue
+            if not company_id:
+                log.info(com_name + "：企业ID获取失败===重新放入redis")
+                baseCore.rePutIntoR('BaseInfoEnterprise:gnqy_socialCode',social_code)
+                baseCore.delete_token(token)
+                log.info('=====已重新放入redis,失效token已删除======')
+                time.sleep(20)
+                continue
+            else:
+                log.info(f'====={social_code}===={company_id}=====获取企业id成功=====')
+                # todo:写入数据库
+                updateSql = f"update EnterpriseInfo set QCCID = '{company_id}' where SocialCode = '{social_code}'"
+                cursor_.execute(updateSql)
+                cnx_.commit()
+                log.info(f'----企查查id已更新-----')
+        # 信息采集完成后将该企业的采集次数更新
+        runType = 'BaseInfoRunCount'
+        count += 1
+        baseCore.updateRun(social_code, runType, count)