新三板财务数据脚本维护

930da4ff · LiJunMing · e3ee9068 · e3ee9068 · 930da4ff · 930da4ff
--- a/comData/annualReport_ZJH/CenterPerson/核心人员.py
+++ b/comData/annualReport_ZJH/CenterPerson/核心人员.py
++ /dev/null
-import json
-import requests,time,re,random,pymysql
-import pandas as pd
-from bs4 import BeautifulSoup
-import urllib3
-from base.BaseCore import BaseCore
-baseCore = BaseCore()
-log = baseCore.getLogger()
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-cnx = pymysql.connect(host='114.115.159.144',user='root', password='zzsn9988', db='clb_project', charset='utf8mb4')
-cursor = cnx.cursor()
-cnx_ = baseCore.cnx
-cursor_ = baseCore.cursor
-def get_proxy():
-    sql = "select proxy from clb_proxy"
-    cursor.execute(sql)
-    proxy_lists = cursor.fetchall()
-    ip_list = []
-    for proxy_ in proxy_lists:
-        ip_list.append(str(proxy_).replace("('", '').replace("',)", ''))
-    proxy_list = []
-    for str_ip in ip_list:
-        str_ip_list = str_ip.split('-')
-        proxyMeta = "http://%(host)s:%(port)s" % {
-            "host": str_ip_list[0],
-            "port": str_ip_list[1],
-        }
-        proxy = {
-            "HTTP": proxyMeta,
-            "HTTPS": proxyMeta
-        }
-        proxy_list.append(proxy)
-    return proxy_list
-headers = {
-    'Cookie': 'TYCID=82cbe530204b11ed9f23298cecec1c60; ssuid=3927938144; _ga=GA1.2.1842488970.1670638075; jsid=SEO-BAIDU-ALL-SY-000001; tyc-user-info={%22state%22:%220%22%2C%22vipManager%22:%220%22%2C%22mobile%22:%2215565837784%22}; tyc-user-info-save-time=1678953978429; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTU2NTgzNzc4NCIsImlhdCI6MTY3ODk1Mzk3OCwiZXhwIjoxNjgxNTQ1OTc4fQ.wsNxLWMkZVrtOEvo_CCDPD38R7F23c5yk7dFAdHkwFPkZhEEvmiv0nlt7UD0ZWfo3t8aYxc4qvu4ueEgMubJ5g; tyc-user-phone=%255B%252215565837784%2522%255D; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22284710084%22%2C%22first_id%22%3A%22182b9ca585ead-089598c1d7f7928-26021d51-1327104-182b9ca585f7f1%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfbG9naW5faWQiOiIyODQ3MTAwODQiLCIkaWRlbnRpdHlfY29va2llX2lkIjoiMTgyYjljYTU4NWVhZC0wODk1OThjMWQ3Zjc5MjgtMjYwMjFkNTEtMTMyNzEwNC0xODJiOWNhNTg1ZjdmMSJ9%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22284710084%22%7D%2C%22%24device_id%22%3A%22182b9ca585ead-089598c1d7f7928-26021d51-1327104-182b9ca585f7f1%22%7D; HWWAFSESID=fa776898fa88a6520ea; HWWAFSESTIME=1679899464128; csrfToken=m3cB6mHsznwIuppkT-S8oYc6; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1679016180,1679471093,1679732923,1679899468; bdHomeCount=28; bannerFlag=true; show_activity_id_92=92; searchSessionId=1679899783.48494979; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1679899783',
-    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
-}
-list_code = []
-while True:
-        list_weicha = []
-        list_all_info = []
-        name_list = []
-        start_time = time.time()
-        # 获取企业信息
-        query = "SELECT * FROM Tfbs where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%'   and  state2 is null  limit 1 "
-        #兴业银行
-        # query = "SELECT * FROM Tfbs where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%' and col5='兴业银行'"
-        cursor_.execute(query)
-        row = cursor_.fetchone()
-        if row:
-            pass
-        else:
-            print('没有数据了，结束脚本')
-            break
-        com_name = row[6]
-        social_code = row[4]
-        code = row[7]
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        updateBeginSql = f"update Tfbs set state1=0,date2='{time_now}' where col3='{social_code}' "
-        # print(updateBeginSql)
-        cursor_.execute(updateBeginSql)
-        cnx_.commit()
-        t = time.time()
-        ip = get_proxy()[random.randint(0,3)]
-        url_t = f'https://www.tianyancha.com/search?key={social_code}&sessionNo={t}'
-        res_t = requests.get(url_t,headers=headers, proxies=ip,verify=False)  #, proxies=ip,verify=False
-        time.sleep(10)
-        soup_t = BeautifulSoup(res_t.content, 'html.parser')
-        try:
-            com_id = soup_t.find('div',{'class':'index_header__x2QZ3'}).find('a').get('href').split('/')[-1]
-            print(f"{com_name}:{com_id}")
-        except:
-            com_id = '--'
-            print(f'{com_name}:没有查询到该企业')
-        #colext1获取天眼查id
-        updateBeginSql = f"update Tfbs set state2=0,colext1='{com_id}',date2='{time_now}' where col3='{social_code}' "
-        cursor_.execute(updateBeginSql)
-        cnx_.commit()
-        log.info(f'{com_name}===天眼查id更新入库===== ')
-        if com_id == '--':
-            continue
-        list_one_info = []
-        list_all_1 = []
-        list_all_2 = []
-        # 采集天眼查企业核心人员并通过接口入库
-        log.info('=====开始采集企业核心人员=======')
-        print(f'{social_code}:{com_id}')
-        num = 1
-        for page in range(1, 2):
-            t = int(time.time() * 1000)
-            url = f'https://capi.tianyancha.com/cloud-listed-company/listed/noRepeatSeniorExecutive?_={t}&gid={com_id}&pageSize=20&pageNum={page}'
-            ip = get_proxy()[random.randint(0, 3)]
-            res = requests.get(url, headers=headers, proxies=ip)  # ,verify=False
-            time.sleep(10)
-            list_all = res.json()['data']['dataList']
-            if list_all:
-                for one_info in list_all:
-                    name = one_info['name']
-                    sex = one_info['sex']
-                    education = one_info['education']
-                    position = one_info['position']
-                    Salary = one_info['salary']
-                    try:
-                        birthYear = 2023 - int(one_info['age'])
-                    except:
-                        birthYear = ''
-                    StockKeepings = one_info['numberOfShares']
-                    currentTerm = one_info['term']
-                    personInfo = one_info['resume']
-                    try:
-                        person_img = one_info['logo']
-                    except:
-                        person_img = '--'
-                    dic_json = {
-                        "socialCreditCode": social_code,
-                        "name": name,
-                        "sex": sex,
-                        "education": education,
-                        "position": position,
-                        "salary": Salary,
-                        "birthYear": birthYear,
-                        "shareNum": StockKeepings,
-                        "shareRatio": '',
-                        "benefitShare": '',
-                        "currentTerm": currentTerm,
-                        "personInfo": personInfo,
-                        "sort": str(num)
-                    }
-                    dic_json_img = {
-                        "socialCreditCode": social_code,
-                        "name": name,
-                        "sex": sex,
-                        "education": education,
-                        "position": position,
-                        "salary": Salary,
-                        "birthYear": birthYear,
-                        "shareNum": StockKeepings,
-                        "shareRatio": '',
-                        "benefitShare": '',
-                        "currentTerm": currentTerm,
-                        "personInfo": personInfo,
-                        "头像": person_img,
-                        "sort": str(num)
-                    }
-                    num = num + 1
-                    list_one_info.append(dic_json)
-                    list_all_2.append(dic_json_img)
-            else:
-                t = int(time.time() * 1000)
-                url = f'https://capi.tianyancha.com/cloud-company-background/company/dim/staff?_={t}&gid={com_id}&pageSize=20&pageNum={page}'
-                ip = get_proxy()[random.randint(0, 3)]
-                res = requests.get(url, headers=headers, proxies=ip)  # ,verify=False
-                list_all = res.json()['data']['result']
-                for one_info in list_all:
-                    name = one_info['name']
-                    sex = ''
-                    education = ''
-                    position = one_info['typeSore']
-                    Salary = ''
-                    birthYear = ''
-                    shareRatio = one_info['percent']
-                    try:
-                        benefitShare = one_info['finalBenefitShares']
-                    except:
-                        benefitShare = ''
-                    person_id = one_info['id']
-                    person_url = f'https://www.tianyancha.com/human/{person_id}-c{com_id}'
-                    person_res = requests.get(person_url, headers=headers, proxies=ip)
-                    person_soup = BeautifulSoup(person_res.content, 'html.parser')
-                    try:
-                        personInfo = person_soup.find('span', {'class': '_56d0a'}).text.strip()
-                    except:
-                        personInfo = ''
-                    try:
-                        person_img = one_info['logo']
-                    except:
-                        person_img = '--'
-                    dic_json = {
-                        "socialCreditCode": social_code,
-                        "name": name,
-                        "sex": sex,
-                        "education": education,
-                        "position": position,
-                        "salary": Salary,
-                        "birthYear": birthYear,
-                        "shareNum": '',
-                        "shareRatio": shareRatio,
-                        "benefitShare": benefitShare,
-                        "currentTerm": '',
-                        "personInfo": personInfo,
-                        "sort": str(num)
-                    }
-                    dic_json_img = {
-                        "socialCreditCode": social_code,
-                        "name": name,
-                        "sex": sex,
-                        "education": education,
-                        "position": position,
-                        "salary": Salary,
-                        "birthYear": birthYear,
-                        "shareNum": '',
-                        "shareRatio": shareRatio,
-                        "benefitShare": benefitShare,
-                        "currentTerm": '',
-                        "personInfo": personInfo,
-                        "头像": person_img,
-                        "sort": str(num)
-                    }
-                    num = num + 1
-                    list_one_info.append(dic_json)
-                    list_all_2.append(dic_json_img)
-        log.info(f'{com_name}===该企业采集完成====')
-        df_info = pd.DataFrame(list_one_info)
-        df_info.to_excel('主要人员.xlsx', index=False)
-        json_updata = json.dumps(list_one_info)
-        if json_updata == '[]':
-            continue
-        else:
-            pass
-        response = requests.post('http://114.115.236.206:8088/sync/executive', data=json_updata, timeout=300,
-                                 verify=False)
-        print(response.text)
-cnx.close()
-cursor.close()
-baseCore.close()
-# df_img = pd.DataFrame(list_all_2)
-# df_img.to_excel('企业主要人员-头像-23年500强新榜.xlsx',index=False)
--- a/comData/dfcfwGpdm/NQenterprise/新三板--公告.py
+++ b/comData/dfcfwGpdm/NQenterprise/新三板--公告.py
@@ -24,8 +24,9 @@ cursor_ = baseCore.cursor_
 # tracker_conf = get_tracker_conf('./client.conf')
 # client = Fdfs_client(tracker_conf)
-taskType = '企业公告/证监会'
+taskType = '企业公告/证监会/新三板'
+#todo:股转公告和挂牌审核包含在公司公告中，没有单独的id
 type_map = {
    'zljgcs':'自律监管措施',
    'wxh':'问询函',

--- a/fdfs_client/__init__.py
+++ b/fdfs_client/__init__.py
+# __init__.py
+__version__ = '2.2.0'
+VERSION = tuple(map(int, __version__.split('.')))