9/14

a97764ed · 薛凌堃 · 495275b6 · a97764ed · a97764ed · a97764ed
--- a/comData/annualReport_XQW/client.conf
+++ b/comData/annualReport_XQW/client.conf
--- a/comData/BaseInfo_qcc/base_info.py
+++ b/comData/BaseInfo_qcc/base_info.py
@@ -339,7 +339,7 @@ if __name__ == '__main__':
            continue
        dic_info = baseCore.getInfomation(social_code)
        log.info(f'----当前企业{social_code}--开始处理---')
-        count = dic_info[13]
+        count = dic_info[14]
        com_name = dic_info[1]
        social_code = dic_info[2]
        #企查查id

--- a/comData/tcyQydt/CorePerson.py
+++ b/comData/tcyQydt/CorePerson.py
--- a/comData/tcyQydt/__init__.py
+++ b/comData/tcyQydt/__init__.py
--- a/comData/tcyQydt/fbs_tyc_qydt.py
+++ b/comData/tcyQydt/fbs_tyc_qydt.py
--- a/comData/tcyQydt/getTycId.py
+++ b/comData/tcyQydt/getTycId.py
@@ -66,8 +66,8 @@ def getTycIdByXYDM(xydm):
 def updateTycInfo():
    while True:
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
-        # social_code = baseCore.redicPullData('NewsEnterprise:gnqy_socialCode')
+        social_code = baseCore.redicPullData('NewsEnterprise:gnqy_socialCode')
-        social_code = '9111000066990444XF'
+        # social_code = '9111000066990444XF'
        # 判断 如果Redis中已经没有数据，则等待
        if social_code == None:
            time.sleep(20)
@@ -88,7 +88,7 @@ def updateTycInfo():
            try:
                retData = getTycIdByXYDM(xydm)
                if retData['tycData'] and retData['reput']:
-                    tycid = retData['tycData']['id']
+                    tycid = retData['id']
                    # todo:写入数据库
                    updateSql = f"update EnterpriseInfo set TYCID = '{tycid}' where SocialCode = '{xydm}'"
                    cursor_.execute(updateSql)

--- a/comData/tcyQydt/reademe.txt
+++ b/comData/tcyQydt/reademe.txt
--- a/comData/tcyQydt/start.bat
+++ b/comData/tcyQydt/start.bat
--- a/comData/Tyc/test.py
+++ b/comData/Tyc/test.py
+import json
+import redis
+from bs4 import BeautifulSoup
+import langid
+from base.BaseCore import BaseCore
+baseCore =BaseCore()
+import pymysql
+# print(baseCore.detect_language("是对jhjjhjhhjjhjhjh的浮点数"))
+# cnx_ = baseCore.cnx
+# cursor_ = baseCore.cursor
+cnx_ = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                                   charset='utf8mb4')
+cursor_ = cnx_.cursor()
+# updateBeginSql = f"update Tfbs set state3=%s where col3=%s "
+# # print(updateBeginSql)
+# cursor_.execute(updateBeginSql,(200,'91350000158142711F'))
+# cnx_.commit()
+import time
+# from getTycId import getTycIdByXYDM
+# social_code = '91440101231247350J'
+# data = baseCore.getInfomation(social_code)
+# tycid = data[11]
+# if tycid == None:
+#     print(data)
+#     retData = getTycIdByXYDM(social_code)
+#     tycid = retData['tycData']['id']
+#     print(tycid)
+# time_struct = time.localtime(int(1692762780000 / 1000))  # 首先把时间戳转换为结构化时间
+# time_format = time.strftime("%Y-%m-%d %H-%M-%S", time_struct)  # 把结构化时间转换为格式化时间
+# print(time_format)
+# r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=6)
+# #原键名
+# key1 = 'CorPersonEnterpriseFbs:gnqy_socialCode'
+# #目标键名
+# key2 = 'NewsEnterpriseFbs:gnqy_socialCode'
+# values = r.lrange(key1,0,-1)
+# for value in values:
+#     r.rpush(key2, value)
+#
+# # 关闭Redis连接
+# r.close()
+list_all = []
+if list_all:
+    print(len(list_all))
+else:
+    print('---')
--- a/comData/tcyQydt/tyc_qydt.py
+++ b/comData/tcyQydt/tyc_qydt.py
--- a/comData/tcyQydt/tyc_qydt_add.py
+++ b/comData/tcyQydt/tyc_qydt_add.py
--- a/comData/annualReport_ZJH/client.conf
+++ b/comData/annualReport_ZJH/client.conf
--- a/comData/annualReport_ZJH/fbs_annualreport.py
+++ b/comData/annualReport_ZJH/fbs_annualreport.py
--- a/comData/annualReport_XQW/fdfs_client/__init__.py
+++ b/comData/annualReport_XQW/fdfs_client/__init__.py
--- a/comData/annualReport_XQW/fdfs_client/client.py
+++ b/comData/annualReport_XQW/fdfs_client/client.py
--- a/comData/annualReport_XQW/fdfs_client/connection.py
+++ b/comData/annualReport_XQW/fdfs_client/connection.py
--- a/comData/annualReport_XQW/fdfs_client/exceptions.py
+++ b/comData/annualReport_XQW/fdfs_client/exceptions.py
--- a/comData/annualReport_XQW/fdfs_client/fdfs_protol.py
+++ b/comData/annualReport_XQW/fdfs_client/fdfs_protol.py
--- a/comData/annualReport_XQW/fdfs_client/fdfs_test.py
+++ b/comData/annualReport_XQW/fdfs_client/fdfs_test.py
--- a/comData/annualReport_XQW/fdfs_client/storage_client.py
+++ b/comData/annualReport_XQW/fdfs_client/storage_client.py
--- a/comData/annualReport_XQW/fdfs_client/tracker_client.py
+++ b/comData/annualReport_XQW/fdfs_client/tracker_client.py
--- a/comData/annualReport_XQW/fdfs_client/utils.py
+++ b/comData/annualReport_XQW/fdfs_client/utils.py
--- a/comData/annualReport_ZJH/证监会-年报.py
+++ b/comData/annualReport_ZJH/证监会-年报.py
--- a/comData/annualReport_ZJH/雪球网-年报.py
+++ b/comData/annualReport_ZJH/雪球网-年报.py
--- a/comData/annualReport_XQW/annualreportUS.py
+++ b/comData/annualReport_XQW/annualreportUS.py
--- a/comData/annualReport_XQW/annualreport_US.py
+++ b/comData/annualReport_XQW/annualreport_US.py
--- a/comData/noticeReport_ZJH/client.conf
+++ b/comData/noticeReport_ZJH/client.conf
--- a/comData/annualReport_ZJH/fdfs_client/__init__.py
+++ b/comData/annualReport_ZJH/fdfs_client/__init__.py
--- a/comData/annualReport_ZJH/fdfs_client/client.py
+++ b/comData/annualReport_ZJH/fdfs_client/client.py
--- a/comData/annualReport_ZJH/fdfs_client/connection.py
+++ b/comData/annualReport_ZJH/fdfs_client/connection.py
--- a/comData/annualReport_ZJH/fdfs_client/exceptions.py
+++ b/comData/annualReport_ZJH/fdfs_client/exceptions.py
--- a/comData/annualReport_ZJH/fdfs_client/fdfs_protol.py
+++ b/comData/annualReport_ZJH/fdfs_client/fdfs_protol.py
--- a/comData/annualReport_ZJH/fdfs_client/fdfs_test.py
+++ b/comData/annualReport_ZJH/fdfs_client/fdfs_test.py
--- a/comData/annualReport_ZJH/fdfs_client/storage_client.py
+++ b/comData/annualReport_ZJH/fdfs_client/storage_client.py
--- a/comData/annualReport_ZJH/fdfs_client/tracker_client.py
+++ b/comData/annualReport_ZJH/fdfs_client/tracker_client.py
--- a/comData/annualReport_ZJH/fdfs_client/utils.py
+++ b/comData/annualReport_ZJH/fdfs_client/utils.py
--- a/comData/caiwushuju/finance_xq.py
+++ b/comData/caiwushuju/finance_xq.py
+"""
+雪球网财务数据 根据接口
+"""
+import json
+import time
+import redis
+import requests
+from bs4 import BeautifulSoup
+import datetime
+from selenium import webdriver
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+def getFormatedate(timestamp):
+    date = datetime.datetime.fromtimestamp(timestamp)
+    formatted_date = date.strftime('%Y-%m-%d')
+    return formatted_date
+def check_code(com_code):
+    r = redis.Redis(host="114.115.236.206",port=6379,password='clbzzsn',db=3)
+    res = r.exists('com_xqcwsj_code::'+com_code)
+    if res:
+        return False
+    else:
+        return True
+def check_date(com_code,info_date):
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn', db=3)
+    res = r.sismember('com_xqcwsj_code::'+com_code, info_date)  # 注意是 保存set的方式
+    if res:
+        return True
+    else:
+        return False
+# 将采集后的股票代码对应的报告期保存进redis
+def add_date(com_code,report_date):
+    r = redis.Redis(host="114.115.236.206", port=6379, password='clbzzsn',db=3)
+    res = r.sadd('com_xqcwsj_code::'+com_code,report_date)
+def getrequests(url):
+    req = requests.get(url=url, headers=headers)
+    data = req.json()
+    a_infoData = data['data']['list']
+    return a_infoData
+def getdriver(url_name):
+    # 使用模拟浏览器打开
+    browser.get(url_name)
+    time.sleep(3)
+    page_source = browser.page_source
+    soup = BeautifulSoup(page_source, 'html.parser')
+    return soup
+def getdetail(reportInfodata,name_map,listinfo,url_name):
+    # # 报告期
+    # report_date = reportInfodata['report_date']
+    # report_date = getFormatedate(int(report_date / 1000))
+    #模拟浏览器请求
+    soup = getdriver(url_name)
+    time.sleep(2)
+    # 利润表
+    table = soup.find('div', class_='tab-table-responsive')
+    list_tr = table.find_all('tr')
+    for tr in list_tr[1:]:
+        info_name = tr.find('td', colspan='2').text
+        # 营业总收入
+        try:
+            info_enname = name_map[info_name]
+            info_data = reportInfodata[info_enname][0]
+            if info_data is None:
+                info_data = '--'
+        except:
+            info_enname = '--'
+            info_data = '--'
+        dic_info = {
+            "name": info_name,
+            'enName': info_enname,
+            "value": info_data
+        }
+        listinfo.append(dic_info)
+    return listinfo
+def getinfo(com_code):
+    dic_info = {}
+    for nnn in range(0, 3):
+        try:
+            ynFirst = check_code(com_code)
+            break
+        except:
+            time.sleep(1)
+          #'https://stock.xueqiu.com/v5/stock/finance/cn/balance.json?symbol=NQ873286&type=all&is_detail=true&count=5&timestamp=1694508688637'
+    url_lrb = f'https://stock.xueqiu.com/v5/stock/finance/cn/income.json?symbol={com_code}&type=all&is_detail=true&count=5&timestamp=1694414063178'
+    url_zcfzb = f'https://stock.xueqiu.com/v5/stock/finance/cn/balance.json?symbol={com_code}&type=all&is_detail=true&count=5&timestamp=1694508688637'
+    url_xjllb = f'https://stock.xueqiu.com/v5/stock/finance/cn/cash_flow.json?symbol={com_code}&type=all&is_detail=true&count=5&timestamp=1694512695956'
+    lrb_name = f'https://xueqiu.com/snowman/S/{com_code}/detail#/GSLRB'
+    zcfzb_name = f'https://xueqiu.com/snowman/S/{com_code}/detail#/ZCFZB'
+    xjllb_name = f'https://xueqiu.com/snowman/S/{com_code}/detail#/XJLLB'
+    a_infoData = getrequests(url_lrb)
+    b_infoData = getrequests(url_zcfzb)
+    c_infoData = getrequests(url_xjllb)
+    #对报告期做循环
+    for i in range(0,5):
+        listLrb = []
+        listZcfzb = []
+        listXjllb = []
+        reportLrbdata = a_infoData[i]
+        report_date = a_infoData[i]['report_date']
+        #时间戳转化为日期
+        report_date = getFormatedate(int(report_date / 1000))
+        # 检查报告期是否已经存在
+        for nnn in range(0, 3):
+            try:
+                panduan = check_date(com_code, report_date)
+                if panduan:
+                    return dic_info
+                else:
+                    pass
+                break
+            except:
+                time.sleep(1)
+        log.info(f'======正在采集：{report_date}=======')
+        #利润表
+        list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
+        print(list_Lrb)
+        #资产负债表
+        reportZcfzbdata = b_infoData[i]
+        list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
+        #现金流量表
+        reportXjllbdata = c_infoData[i]
+        list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
+        dic_info = {
+            "socialCreditCode": social_code,
+            "securitiesCode": com_code[2:],
+            "date": report_date,
+            "debt": list_Zcfzb,
+            "profit": list_Lrb,
+            "cash": list_Xjllb,
+            "ynFirst": ynFirst,
+        }
+        print(dic_info)
+        #一个报告期结束
+        for nnn in range(0, 3):
+            try:
+                add_date(com_code, report_date)
+                break
+            except:
+                time.sleep(1)
+        log.info(f'----{com_code}--{report_date}----结束')
+        # if dic_info:
+        #     # 调凯歌接口存储数据
+        #     data = json.dumps(dic_info)
+        #     # print(data)
+        #     url_baocun = 'http://114.115.236.206:8088/sync/finance/df'
+        #     for nnn in range(0, 3):
+        #         try:
+        #             res_baocun = requests.post(url_baocun, data=data)
+        #             break
+        #         except:
+        #             time.sleep(1)
+        #     print(res_baocun.text)
+        return dic_info
+if __name__ == '__main__':
+    info_date_list = []
+    try:
+        chromedriver = "D:/chrome/chromedriver.exe"
+        browser = webdriver.Chrome(chromedriver)
+    except Exception as e:
+        print(e)
+    social_code = ''
+    #中英文名称映射
+    lrb_name_map = {
+        '营业总收入':'total_revenue',
+        '其中：营业收入':'revenue',
+        '营业总成本':'operating_costs',
+        '其中：营业成本':'operating_cost',
+        '营业税金及附加':'operating_taxes_and_surcharge',
+        '销售费用':'sales_fee',
+        '管理费用':'manage_fee',
+        '研发费用':'rad_cost',
+        '财务费用':'financing_expenses',
+        '其中：利息费用':'finance_cost_interest_fee',
+        '利息收入':'finance_cost_interest_income',
+        '资产减值损失':'asset_impairment_loss',
+        '信用减值损失':'credit_impairment_loss',
+        '加：公允价值变动收益':'',
+        '投资收益':'invest_income',
+        '其中：对联营企业和合营企业的投资收益':'',
+        '资产处置收益':'asset_disposal_income',
+        '其他收益':'other_income',
+        '营业利润':'op',
+        '加：营业外收入':'non_operating_income',
+        '其中：非流动资产处置利得':'',
+        '减：营业外支出':'non_operating_payout',
+        '其中：非流动资产处置损失':'',
+        '利润总额':'profit_total_amt',
+        '减：所得税费用':'income_tax_expenses',
+        '净利润差额(合计平衡项目)':'',
+        '净利润':'net_profit',
+        '（一）持续经营净利润':'continous_operating_np',
+        '归属于母公司股东的净利润':'net_profit_atsopc',
+        '少数股东损益':'minority_gal',
+        '扣除非经常性损益后的净利润':'net_profit_after_nrgal_atsolc',
+        '基本每股收益':'basic_eps',
+        '稀释每股收益':'dlt_earnings_per_share',
+        '其他综合收益':'othr_compre_income',
+        '归属母公司所有者的其他综合收益':'',
+        '综合收益总额':'total_compre_income',
+        '归属于母公司股东的综合收益总额':'net_profit_atsopc',
+        '归属于少数股东的综合收益总额':'total_compre_income_atms'
+    }
+    zcfzb_name_map = {
+        '货币资金':'currency_funds',
+        '交易性金融资产':'',
+        '应收票据及应收账款':'ar_and_br',
+        '其中：应收票据':'bills_receivable',
+        '应收账款':'account_receivable',
+        '预付款项':'pre_payment',
+        '应收利息':'',
+        '应收股利':'',
+        '其他应收款':'othr_receivables',
+        '存货':'inventory',
+        '合同资产':'',
+        '划分为持有待售的资产':'',
+        '一年内到期的非流动资产':'nca_due_within_one_year',
+        '其他流动资产':'intangible_assets',
+        '流动资产合计':'total_current_assets',
+        '可供出售金融资产':'',
+        '持有至到期投资':'',
+        '长期应收款':'',
+        '长期股权投资':'',
+        '其他权益工具投资':'',
+        '其他非流动金融资产':'',
+        '投资性房地产':'',
+        '固定资产合计':'fixed_asset_sum',
+        '其中：固定资产':'fixed_asset',
+        '固定资产清理':'',
+        '在建工程合计':'construction_in_process_sum',
+        '其中：在建工程':'construction_in_process',
+        '工程物资':'',
+        '生产性生物资产':'',
+        '油气资产':'',
+        '无形资产':'intangible_assets',
+        '开发支出':'dev_expenditure',
+        '商誉':'',
+        '长期待摊费用':'lt_deferred_expense',
+        '递延所得税资产':'dt_assets',
+        '其他非流动资产':'othr_noncurrent_assets',
+        '非流动资产合计':'total_noncurrent_assets',
+        '资产合计':'total_assets',
+        '短期借款':'st_loan',
+        '交易性金融负债':'',
+        '衍生金融负债':'',
+        '应付票据及应付账款':'accounts_payable',
+        '应付票据':'',
+        '应付账款':'',
+        '预收款项':'',
+        '合同负债':'contract_liabilities',
+        '应付职工薪酬':'payroll_payable',
+        '应交税费':'tax_payable',
+        '应付利息':'',
+        '应付股利':'',
+        '其他应付款':'othr_payables',
+        '划分为持有待售的负债':'',
+        '一年内到期的非流动负债':'noncurrent_liab_due_in1y',
+        '其他流动负债':'othr_current_liab',
+        '流动负债合计':'total_current_liab',
+        '长期借款':'lt_loan',
+        '应付债券':'',
+        '长期应付款合计':'lt_payable_sum',
+        '长期应付款':'lt_payable',
+        '专项应付款':'',
+        '预计负债':'',
+        '递延所得税负债':'dt_liab',
+        '递延收益-非流动负债':'',
+        '其他非流动负债':'',
+        '非流动负债合计':'total_noncurrent_liab',
+        '负债合计':'total_liab',
+        '实收资本(或股本)':'shares',
+        '其他权益工具':'',
+        '永续债':'',
+        '资本公积':'capital_reserve',
+        '减：库存股':'',
+        '其他综合收益':'',
+        '专项储备':'special_reserve',
+        '盈余公积':'earned_surplus',
+        '未分配利润':'undstrbtd_profit',
+        '一般风险准备':'',
+        '外币报表折算差额':'',
+        '归属于母公司股东权益合计':'total_quity_atsopc',
+        '少数股东权益':'minority_equity',
+        '股东权益合计':'total_holders_equity',
+        '负债和股东权益总计':'total_assets'
+    }
+    xjllb_name_map = {
+        '销售商品、提供劳务收到的现金':'cash_received_of_sales_service',
+        '收到的税费返还':'refund_of_tax_and_levies',
+        '收到其他与经营活动有关的现金':'cash_received_of_othr_oa',
+        '经营活动现金流入小计':'sub_total_of_ci_from_oa',
+        '购买商品、接受劳务支付的现金':'goods_buy_and_service_cash_pay',
+        '支付给职工以及为职工支付的现金':'cash_paid_to_employee_etc',
+        '支付的各项税费':'payments_of_all_taxes',
+        '支付其他与经营活动有关的现金':'othrcash_paid_relating_to_oa',
+        '经营活动现金流出小计':'sub_total_of_cos_from_oa',
+        '经营活动产生的现金流量净额':'ncf_from_oa',
+        '收回投资收到的现金':'cash_received_of_dspsl_invest',
+        '取得投资收益收到的现金':"othrcash_paid_relating_to_fa",
+        '处置固定资产、无形资产和其他长期资产收回的现金净额':'net_cash_of_disposal_assets',
+        '处置子公司及其他营业单位收到的现金净额':'',
+        '收到其他与投资活动有关的现金':'',
+        '投资活动现金流入小计':'sub_total_of_ci_from_ia',
+        '购建固定资产、无形资产和其他长期资产支付的现金':'cash_paid_for_assets',
+        '投资支付的现金':'invest_paid_cash',
+        '取得子公司及其他营业单位支付的现金净额':'',
+        '支付其他与投资活动有关的现金':'',
+        '投资活动现金流出小计':'sub_total_of_cos_from_ia',
+        '投资活动产生的现金流量净额':'ncf_from_ia',
+        '筹资活动产生的现金流量':'',
+        '吸收投资收到的现金':'cash_received_of_absorb_invest',
+        '其中：子公司吸收少数股东投资收到的现金':'',
+        '取得借款收到的现金':'cash_received_of_borrowing',
+        '发行债券收到的现金':'',
+        '收到其他与筹资活动有关的现金':'cash_received_of_othr_fa',
+        '筹资活动现金流入小计':'sub_total_of_ci_from_fa',
+        '偿还债务支付的现金':'cash_pay_for_debt',
+        '分配股利、利润或偿付利息支付的现金':'cash_paid_of_distribution',
+        '其中：子公司支付给少数股东的股利':'',
+        '支付其他与筹资活动有关的现金':'othrcash_paid_relating_to_fa',
+        '筹资活动现金流出小计':'sub_total_of_cos_from_fa',
+        '筹资活动产生的现金流量净额':'ncf_from_fa',
+        '汇率变动对现金及现金等价物的影响':'',
+        '现金及现金等价物净增加额':'net_increase_in_cce',
+        '加：期初现金及现金等价物余额':'final_balance_of_cce',
+        '期末现金及现金等价物余额':'final_balance_of_cce'
+    }
+    table_type = ['income','balance']
+    com_code = 'NQ' + '873286'
+    headers = {
+        'authority': 'stock.xueqiu.com',
+        'method': 'GET',
+        'path': '/v5/stock/finance/cn/income.json?symbol=NQ873286&type=all&is_detail=true&count=5&timestamp=1694414063178',
+        'scheme': 'https',
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cache-Control': 'no-cache',
+        'Cookie': 'device_id=84ced64554d8060750b1528dc22a3696; s=bt110kz0n8; cookiesu=661693188384462; u=661693188384462; Hm_lvt_1db88642e346389874251b5a1eded6e3=1693188388; xq_a_token=29bdb37dee2432c294425cc9e8f45710a62643a5; xqat=29bdb37dee2432c294425cc9e8f45710a62643a5; xq_r_token=3a35db27fcf5471898becda7aa5dab6afeafe471; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOi0xLCJpc3MiOiJ1YyIsImV4cCI6MTY5NjgxMTc5NCwiY3RtIjoxNjk0NDEzNTQ2ODU4LCJjaWQiOiJkOWQwbjRBWnVwIn0.Xxu329nQq4bMtwKFJWlScnUUSWrky4T5SWkMum46c2G8la2z4g0d4nyvsO08WP-7moMffId6P3bGWuELULkbv6EHvIZgqge9-fAD4-hmLOjeRh96NsoGfyTAQK7tbnt9LhKz1fDg6SUi8loMqYgM7l-4g-ZM4B6zrZ5hKWdQJFLy0-V8Wzx7HTFYZSX3FNSsbgGqHlW4vykIpsRaNeOOX1M6LYdt6BhbAi1Iv4TflB08LIdu6F1n4dTRbmPq1KCndb2LsLR2HrJZmqmHJB9WMzwlVcIGdz778_CutNrwuWgJbWtb-s3dSESzO0WWw1uIIGZUvRl1D0KSl0P_GQLw9w; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1694414056',
+        'Origin': 'https://xueqiu.com',
+        'Pragma': 'no-cache',
+        'Referer': 'https://xueqiu.com/snowman/S/NQ873286/detail',
+        'Sec-Ch-Ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
+        'Sec-Ch-Ua-Mobile': '?0',
+        'Sec-Ch-Ua-Platform': '"Windows"',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-site',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
+    }
+    dic_info = getinfo(com_code)
--- a/comData/dfcfwGpdm/NQenterprise/NQipoInfo.py
+++ b/comData/dfcfwGpdm/NQenterprise/NQipoInfo.py
+"""
+    企业上市信息：只有上市的企业才能如企业库，未上市企业跳过采集步骤。退市企业标注为0
+"""
+import json
+import time
+import requests
+from bs4 import BeautifulSoup
+from selenium import webdriver
+import urllib3
+from base.BaseCore import BaseCore
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+# from gpdm import Gpdm
+baseCore = BaseCore()
+# chromedriver = r"E:\kkwork\zzsn_spider\comData\ipoInfo\chromedriver.exe"
+# browser = webdriver.Chrome(chromedriver)
+taskType = '上市信息/东方财富网/新三板'
+# gpdm = Gpdm()
+# gpdmList = gpdm.doJob()
+log = baseCore.getLogger()
+error_list = []
+list_all_info = []
+# 需要提供股票代码、企业信用代码
+while True:
+    #从表中读取企业
+    # com_code1 = baseCore.redicPullData('EnterpriseIpo:nq_gpdm')
+    com_code = '838616'
+    short_name = ''
+    social_code = ''
+    start = time.time()
+    log.info(f'======开始采集{com_code}======')
+    url = f'https://xinsanban.eastmoney.com/F10/CompanyInfo/Introduction/833658.html'
+    headers = {
+        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cache-Control': 'no-cache',
+        'Connection': 'keep-alive',
+        'Cookie': 'qgqp_b_id=28edcf226f056ee077983f40f115eacf; st_si=15067486119520; emshistory=%5B%22%E4%BA%A7%E4%B8%9A%E9%93%BE%22%2C%22sz007sz%22%5D; websitepoptg_show_time=1694403032729; HAList=ty-0-002342-%u5DE8%u529B%u7D22%u5177%2Cty-0-301192-%u6CF0%u7965%u80A1%u4EFD%2Cty-1-688382-%u76CA%u65B9%u751F%u7269-U%2Cty-1-600895-%u5F20%u6C5F%u9AD8%u79D1%2Cty-1-600669-*ST%u978D%u6210%2Cty-116-00691-%u5C71%u6C34%u6C34%u6CE5%2Cty-0-300865-%u5927%u5B8F%u7ACB%2Cty-0-000656-%u91D1%u79D1%u80A1%u4EFD%2Cty-1-600257-%u5927%u6E56%u80A1%u4EFD%2Cty-1-688981-%u4E2D%u82AF%u56FD%u9645; xsb_history=833658%7C%u94C1%u8840%u79D1%u6280%2C838616%7C%u5317%u9CD0%u98DF%u54C1; st_asi=delete; st_pvi=44810095342512; st_sp=2023-07-18%2013%3A55%3A09; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=337; st_psi=20230914142347564-119112305908-4534169252',
+        'Host': 'xinsanban.eastmoney.com',
+        'Pragma': 'no-cache',
+        'Referer': 'https://xinsanban.eastmoney.com/F10/CompanyInfo/Introduction/833658.html',
+        'Sec-Fetch-Dest': 'document',
+        'Sec-Fetch-Mode': 'navigate',
+        'Sec-Fetch-Site': 'same-origin',
+        'Sec-Fetch-User': '?1',
+        'Upgrade-Insecure-Requests': '1',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+        'sec-ch-ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"'
+    }
+    req = requests.get(url=url,headers=headers)
+    reslut = BeautifulSoup(req.content,'html.parser')
+    # print(reslut)
+    li_list = reslut.find('div',id='company_info').find('ul',class_='company-page-left').find_all('li')
+    security = reslut.find('div',id='security_info').find('ul',class_='company-page-right').find_all('li')
+    listingDate = security[1].find('span',class_='company-page-item-right').text
+    businessScope = li_list[7].find('span',class_='company-page-item-right').text
+    industry = li_list[8].find('span',class_='company-page-item-right').text
+    secutities_type = '新三板'
+    category = '3'
+    exchange = '1'
+    dic_cwsj = {
+        "exchange": exchange,
+        "category": category,  # 股票类型(1-A股;2-B股;3-新三板;4-H股)
+        'listed': '1',
+        "listingDate": listingDate,
+        "securitiesCode": com_code,
+        "securitiesShortName": short_name,
+        "securitiesType": secutities_type,
+        "socialCreditCode": social_code,
+        "businessScope": businessScope,
+        "eastIndustry": industry,
+        "csrcIndustry": ''
+    }
+    print(dic_cwsj)
+    break
+#     dic_cwsj = {
+#         "exchange": jys_code,
+#         "category": '1',  # 股票类型(1-A股;2-B股;3-新三板;4-H股)
+#         'listed':'1',
+#         "listingDate": shangshishijian,
+#         "securitiesCode": com_code[2:],
+#         "securitiesShortName": short_name,
+#         "securitiesType": '新三板',
+#         "socialCreditCode": id_code,
+#         "businessScope": zhuyingfanwei,
+#         "eastIndustry": dongcai,
+#         "csrcIndustry": zhengjian
+#     }
+#
+#     list_all_info.append(dic_cwsj)
+#     log.info(f'======{com_code}====采集成功=====')
+#
+# # 通过接口将数据保存进数据库
+# for num in range(0, len(list_all_info),100):
+#
+#     json_updata = json.dumps(list_all_info[num:num+100])
+#     # print(json_updata)
+#     try:
+#         response = requests.post('http://114.115.236.206:8088/sync/enterpriseIpo', data=json_updata, timeout=300,
+#                                  verify=False)
+#     except Exception as e:
+#         print(e)
+#     print("{}：到：{}".format(num, num + 100))
+#     print(response.text)
--- a/comData/dfcfwGpdm/NQenterprise/新三板--公告.py
+++ b/comData/dfcfwGpdm/NQenterprise/新三板--公告.py
+"""
+"""
+证监会公告采集，只能按照搜索企业来采，从上市库里拿企业数据，sys_enterprise_ipo_copy1
+craw_state:已采集过表示为True,未采集表示为0，拿取数据表示为ing，解析失败表示为400
+update_state：为1 表示需要更新，用来增量循环
+如何统计出来该报告采到了没有，dt_error库统计失败的信息
+"""
+import json
+import random
+import re
+import time
+import fitz
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+from kafka import KafkaProducer
+from datetime import datetime
+from base import BaseCore
+# from fdfs_client.client import get_tracker_conf, Fdfs_client
+baseCore = BaseCore.BaseCore()
+log = baseCore.getLogger()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+cnx_ = baseCore.cnx_
+cursor_ = baseCore.cursor_
+# tracker_conf = get_tracker_conf('./client.conf')
+# client = Fdfs_client(tracker_conf)
+taskType = '企业公告/证监会'
+type_map = {
+    'zljgcs':'自律监管措施',
+    'wxh':'问询函',
+    'jlcf':'纪律处分',
+    '9506':'公司公告',
+    '9509':'公司公告',
+    '9503':'公司公告',
+    '9504':'公司公告',
+    '9505':'公司公告',
+    '9510':'公司公告',
+    '9520':'公司公告',
+    '9605':'公司公告',
+    '9533':'公司公告',
+}
+def secrchATT(item_id, name, type_id):
+    sel_sql = '''select id from clb_sys_attachment where item_id = %s and name = %s and type_id=%s '''
+    cursor_.execute(sel_sql, (item_id, name, type_id))
+    selects = cursor_.fetchone()
+    return selects
+# 插入到att表 返回附件id
+def tableUpdate(retData, com_name, year, pdf_name, num):
+    item_id = retData['item_id']
+    type_id = retData['type_id']
+    group_name = retData['group_name']
+    path = retData['path']
+    full_path = retData['full_path']
+    category = retData['category']
+    file_size = retData['file_size']
+    status = retData['status']
+    create_by = retData['create_by']
+    page_size = retData['page_size']
+    create_time = retData['create_time']
+    order_by = num
+    selects = secrchATT(item_id, pdf_name, type_id)
+    if selects:
+        log.info(f'com_name:{com_name}已存在')
+        id = selects[0]
+        return id
+    else:
+        Upsql = '''insert into clb_sys_attachment(year,name,type_id,item_id,group_name,path,full_path,category,file_size,order_by,status,create_by,create_time,page_size) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+        values = (
+            year, pdf_name, type_id, item_id, group_name, path, full_path, category, file_size, order_by,
+            status, create_by,
+            create_time, page_size)
+        cursor_.execute(Upsql, values)  # 插入
+        cnx_.commit()  # 提交
+        log.info("更新完成:{}".format(Upsql))
+        selects = secrchATT(item_id, pdf_name, type_id)
+        id = selects[0]
+        return id
+def RequestUrl(url, payload, social_code,start_time):
+    # ip = get_proxy()[random.randint(0, 3)]
+    pattern = r"\(\[(.*?)\]\)"
+    for m in range(0, 3):
+        try:
+            response = requests.post(url=url, headers=headers, data=payload)  # ,proxies=ip)
+            response.encoding = response.apparent_encoding
+            break
+        except Exception as e:
+            log.error(f"request请求异常----{m}-----{e}")
+            pass
+    # 检查响应状态码
+    if response.status_code == 200:
+        # 请求成功，处理响应数据
+        # print(response.text)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        match = re.search(pattern, str(soup))
+        if match:
+            retJsonData = match.group(1)
+            retJsonData = json.loads(retJsonData)
+        # retJsonData = response.json()
+        pass
+    else:
+        # 请求失败，输出错误信息
+        log.error('请求失败:', url)
+        state = 0
+        takeTime = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, takeTime, url, '请求失败')
+        retJsonData = ''
+    return retJsonData
+def getPages(url,com_code):
+    payload = f"startTime=&page=1&companyCd={com_code}&keyword=&disclosureType%5B%5D=9506&disclosureType%5B%5D=9509&disclosureType%5B%5D=9503&disclosureType%5B%5D=9504&disclosureType%5B%5D=9505&disclosureType%5B%5D=9510&disclosureType%5B%5D=9520&disclosureType%5B%5D=9605&disclosureType%5B%5D=9533&wxhType=wxh&zljgcsType=zljgcs&jlcfType=jlcf&newThreeArray%5B%5D=0&newThreeArray%5B%5D=1&newThreeArray%5B%5D=2&siteId=1&sortfield=publishDate&sorttype=desc&keyword1="
+    retJsonData = RequestUrl(url, payload, social_code, start_time)
+    # 第一次请求获取页数
+    # print(retJsonData)
+    totalPages = retJsonData['listInfo']['totalPages']
+    print(totalPages)
+    return totalPages
+def InsterInto(short_name, social_code, pdf_url):
+    inster = False
+    sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s and origin='全国中小企业股份转让系统' and type='1' '''
+    cursor.execute(sel_sql, (social_code, pdf_url))
+    selects = cursor.fetchone()
+    if selects:
+        print(f'com_name:{short_name}、{pdf_url}已存在')
+        return inster
+    # 信息插入数据库
+    try:
+        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,type,create_time) values(%s,%s,%s,%s,now())'''
+        list_info = [
+            social_code,
+            pdf_url,
+            '全国中小企业股份转让系统',
+            '1',
+        ]
+        #144数据库
+        cursor.execute(insert_sql, tuple(list_info))
+        cnx.commit()
+        insert = True
+        return insert
+    except:
+        state = 0
+        takeTime = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '数据库传输失败')
+        return insert
+def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time,com_name,num):
+    #上传至文件服务器
+    retData = baseCore.upLoadToServe(pdf_url,8,social_code)
+    #附件插入att数据库
+    if retData['state']:
+        pass
+    else:
+        log.info(f'====pdf解析失败====')
+        return False
+    num = num + 1
+    att_id = tableUpdate(retData,com_name,year,pdf_name,num)
+    content = retData['content']
+    if retData['state']:
+        pass
+    else:
+        log.info(f'====pdf解析失败====')
+        return False
+    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    dic_news = {
+        'attachmentIds': att_id,
+        'author': '',
+        'content': content,
+        'contentWithTag': '',
+        'createDate': time_now,
+        'deleteFlag': '0',
+        'id': '',
+        'keyWords': '',
+        'lang': 'zh',
+        'origin': '全国中小企业股份转让系统',
+        'publishDate': pub_time,
+        'sid': '1684032033495392257',
+        'sourceAddress': pdf_url,  # 原文链接
+        'summary': '',
+        'title': pdf_name,
+        'type': 3,
+        'socialCreditCode': social_code,
+        'year': year
+    }
+    # print(dic_news)
+    # 将相应字段通过kafka传输保存
+    try:
+        producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+        kafka_result = producer.send("researchReportTopic", json.dumps(dic_news, ensure_ascii=False).encode('utf8'))
+        print(kafka_result.get(timeout=10))
+        dic_result = {
+            'success': 'ture',
+            'message': '操作成功',
+            'code': '200',
+        }
+        print(dic_result)
+        return True
+    except Exception as e:
+        dic_result = {
+            'success': 'false',
+            'message': '操作失败',
+            'code': '204',
+            'e': e
+        }
+        state = 0
+        takeTime = baseCore.getTimeCost(start_time, time.time())
+        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, 'Kafka操作失败')
+        print(dic_result)
+        return False
+# 采集信息
+def SpiderByZJH(url, dic_info, start_time,num):  # dic_info 数据库中获取到的基本信息
+    okCount = 0
+    errorCount = 0
+    social_code = dic_info[2]
+    short_name = dic_info[4]
+    com_name = dic_info[1]
+    totalPages = getPages(url, com_code)
+    for i in range(0, int(totalPages)):
+        payload = f"startTime=&page={i}&companyCd={com_code}&keyword=&disclosureType%5B%5D=9506&disclosureType%5B%5D=9509&disclosureType%5B%5D=9503&disclosureType%5B%5D=9504&disclosureType%5B%5D=9505&disclosureType%5B%5D=9510&disclosureType%5B%5D=9520&disclosureType%5B%5D=9605&disclosureType%5B%5D=9533&wxhType=wxh&zljgcsType=zljgcs&jlcfType=jlcf&newThreeArray%5B%5D=0&newThreeArray%5B%5D=1&newThreeArray%5B%5D=2&siteId=1&sortfield=publishDate&sorttype=desc&keyword1="
+        retjson = RequestUrl(url, payload, social_code, start_time)
+        content_list = retjson['listInfo']['content']
+        for rp in content_list:
+            pdf_url = 'https://www.neeq.com.cn' + rp['destFilePath']
+            name_pdf = rp['disclosureTitle']
+            rp_type = type_map[rp['disclosureType']]
+            publishDate = rp['publishDate']
+            year = publishDate[:4]
+            # 数据入库
+            insert = InsterInto(short_name, social_code, name_pdf)
+            if insert:
+                #     # 公告信息列表
+                #     okCount = okCount + 1
+                # 解析PDF内容，先获取PDF链接 下载 解析成功，解析失败 ，传输成功，传输失败
+                log.info(f'======={short_name}===========插入公告库成功')
+                result = GetContent(pdf_url, name_pdf, social_code, year, publishDate, start_time, com_name, num)
+                if result:
+                    # 公告信息列表
+                    okCount = okCount + 1
+                    log.info(f'{short_name}==============解析传输操作成功')
+                    state = 1
+                    takeTime = baseCore.getTimeCost(start_time, time.time())
+                    baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, '')
+                    pass
+                else:
+                    errorCount += 1
+                    # time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    log.error(f'{short_name}=============解析或传输操作失败')
+                    continue
+            else:
+                log.info(f'======={short_name}===========已存在')
+                continue
+if __name__ == '__main__':
+    num = 0
+    headers = {
+        'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cache-Control': 'no-cache',
+        'Connection': 'keep-alive',
+        'Content-Length': '442',
+        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+        'Cookie': 'HOY_TR=FCEXZOPIKBGTYHDL,945C236781ABDFE0,xfslaodpytzTmieq; Hm_lvt_b58fe8237d8d72ce286e1dbd2fc8308c=1694480321; Hm_lpvt_b58fe8237d8d72ce286e1dbd2fc8308c=1694597182',
+        'Host': 'www.neeq.com.cn',
+        'Origin': 'https://www.neeq.com.cn',
+        'Pragma': 'no-cache',
+        'Referer': 'https://www.neeq.com.cn/products/neeq_listed_companies/related_announcement.html?companyCode=430054',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-origin',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+        'X-Requested-With': 'XMLHttpRequest',
+        'sec-ch-ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"'
+    }
+    dic_parms = {}
+    # 读取数据库获取股票代码 简称 以及 社会信用代码
+    while True:
+        start_time = time.time()
+        # # 获取企业信息
+        # # social_code = baseCore.redicPullData('NoticeEnterpriseFbs:gnqy_socialCode')
+        social_code = '9110000071092841XX'
+        com_code = '430045'
+        short_name = '超毅网络'
+        dic_info = {}
+        # # 判断 如果Redis中已经没有数据，则等待
+        # if social_code == None:
+        #     time.sleep(20)
+        #     continue
+        # dic_info = baseCore.getInfomation(social_code)
+        # count = dic_info[16]
+        url = 'https://www.neeq.com.cn/disclosureInfoController/productInfoResult.do'
+        #翻页 page 0~ 25  totalPages
+        SpiderByZJH(url, dic_info, start_time, num)
+    cursor.close()
+    cnx.close()
+    # cursor_.close()
+    # cnx_.close()
+    # 释放资源
+    baseCore.close()
--- a/comData/noticeReport/client.conf
+++ b/comData/noticeReport/client.conf
+# connect timeout in seconds
+# default value is 30s
+connect_timeout=300
+# network timeout in seconds
+# default value is 30s
+network_timeout=600
+# the base path to store log files
+#base_path=/home/tarena/django-project/cc_shop1/cc_shop1/logs
+# tracker_server can ocur more than once, and tracker_server format is
+#  "host:port", host can be hostname or ip address
+tracker_server=114.115.215.96:22122
+#standard log level as syslog, case insensitive, value list:
+### emerg for emergency
+### alert
+### crit for critical
+### error
+### warn for warning
+### notice
+### info
+### debug
+log_level=info
+# if use connection pool
+# default value is false
+# since V4.05
+use_connection_pool = false
+# connections whose the idle time exceeds this time will be closed
+# unit: second
+# default value is 3600
+# since V4.05
+connection_pool_max_idle_time = 3600
+# if load FastDFS parameters from tracker server
+# since V4.05
+# default value is false
+load_fdfs_parameters_from_tracker=false
+# if use storage ID instead of IP address
+# same as tracker.conf
+# valid only when load_fdfs_parameters_from_tracker is false
+# default value is false
+# since V4.05
+use_storage_id = false
+# specify storage ids filename, can use relative or absolute path
+# same as tracker.conf
+# valid only when load_fdfs_parameters_from_tracker is false
+# since V4.05
+storage_ids_filename = storage_ids.conf
+#HTTP settings
+http.tracker_server_port=80
+#use "#include" directive to include HTTP other settiongs
+##include http.conf
\ No newline at end of file
--- a/comData/noticeReport_ZJH/fbs_notice.py
+++ b/comData/noticeReport_ZJH/fbs_notice.py
@@ -19,10 +19,7 @@ from fdfs_client.client import get_tracker_conf, Fdfs_client
 baseCore = BaseCore.BaseCore()
 log = baseCore.getLogger()
-# cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='clb_project', charset='utf8mb4')
+cnx_ = pymysql.connect(host='114.116.44.11', user='caiji', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
-cnx_ = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')
-# cnx_ip = pymysql.connect(host='114.115.159.144',user='root', password='zzsn9988', db='clb_project', charset='utf8mb4')
-# cursor = cnx.cursor()
 cursor_ = cnx_.cursor()
 cnx = baseCore.cnx
@@ -32,7 +29,7 @@ cursor = baseCore.cursor
 tracker_conf = get_tracker_conf('./client.conf')
 client = Fdfs_client(tracker_conf)
-taskType = '企业公告/证监会'
+taskType = '企业公告/证监会/福布斯'
 def RequestUrl(url, payload, social_code,start_time):
    # ip = get_proxy()[random.randint(0, 3)]
@@ -142,30 +139,25 @@ def InsterInto(short_name, social_code, name_pdf, pub_time, pdf_url, report_type
    inster = False
    sel_sql = '''select social_credit_code,source_address from brpa_source_article where social_credit_code = %s and source_address = %s'''
-    cursor_.execute(sel_sql, (social_code, pdf_url))
+    cursor.execute(sel_sql, (social_code, pdf_url))
-    selects = cursor_.fetchone()
+    selects = cursor.fetchone()
    if selects:
        print(f'com_name:{short_name}、{pdf_url}已存在')
        return inster
-    # 信息插入数据库
+    # todo:信息插入数据库,更换数据库
    try:
-        insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,content,publish_date,source_address,origin,author,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
+        insert_sql = '''insert into brpa_source_article(social_credit_code,source_address,origin,author,type) values(%s,%s,%s,%s,%s)'''
        list_info = [
            social_code,
-            name_pdf,
-            '',  # 摘要
-            '',  # 正文
-            pub_time,  # 发布时间
            pdf_url,
            '证监会',
            report_type,
-            '1',
+            '1'
-            'zh'
        ]
-        cursor_.execute(insert_sql, tuple(list_info))
+        cursor.execute(insert_sql, tuple(list_info))
-        cnx_.commit()
+        cnx.commit()
        insert = True
        return insert
    except:
@@ -201,6 +193,7 @@ def GetContent(pdf_url, pdf_name, social_code, year, pub_time, start_time):
        baseCore.recordLog(social_code, taskType, state, takeTime, pdf_url, dic_result['message'])
        return False
    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    #todo:附件id需要上传至att数据库
    dic_news = {
        'attachmentIds': id,
        'author': '',
@@ -372,33 +365,29 @@ if __name__ == '__main__':
    while True:
        start_time = time.time()
        # 获取企业信息
-        # social_code = baseCore.redicPullData('NoticeEnterprise:gnqy_socialCode')
+        social_code = baseCore.redicPullData('NoticeEnterpriseFbs:gnqy_socialCode')
        # 判断 如果Redis中已经没有数据，则等待
-        # if social_code == None:
+        if social_code == None:
-        #     time.sleep(20)
+            time.sleep(20)
-        #     continue
+            continue
        # 获取企业信息
-        # query = "SELECT * FROM Tfbs_bak where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%'   and  state2 is Null  limit 1 "
+        # query = "SELECT * FROM Tfbs_bak where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%'   and  state2 ='1'  limit 1 "
        # 兴业银行
-        query = "SELECT * FROM Tfbs_bak where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%' and col5='兴业银行'"
+        # query = "SELECT * FROM Tfbs_bak where col3 is not null and length(col3)>3  and col3 not like 'ZZSN%' and col5='兴业银行'"
-        cursor.execute(query)
+        # cursor.execute(query)
-        row = cursor.fetchone()
+        # row = cursor.fetchone()
-        if row:
+        # if row:
-            pass
+        #     pass
-        else:
+        # else:
-            print('没有数据了，结束脚本')
+        #     print('没有数据了，结束脚本')
-            break
+        #     break
        # tycid = row[14]
-        com_name = row[6]
-        social_code = row[4]
-        code = row[7]
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        # 1表示拿到数据
-        updateBeginSql = f"update Tfbs_bak set state2='1',date1='{time_now}' where col3='{social_code}' "
-        cursor.execute(updateBeginSql)
-        cnx.commit()
        dic_info = baseCore.getInfomation(social_code)
+        com_name = dic_info[1]
+        social_code = dic_info[2]
+        code = dic_info[3]
+        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        count = dic_info[16]
        # 沪市http://eid.csrc.gov.cn/101111/index.html 深市http://eid.csrc.gov.cn/101811/index.html 北交所http://eid.csrc.gov.cn/102611/index.html
        # url 变量 翻页 栏目 http://eid.csrc.gov.cn/101811/index_3_f.html
@@ -412,7 +401,6 @@ if __name__ == '__main__':
        # 根据股票代码选链接
        # 股票代码0、2、3开头的为深圳交易所，6、9开头的为上海交易所，4、8开头的为北京交易所
-        code = dic_info[3]
        short_name = dic_info[4]
        dic_parms = getUrl(code, url_parms, Catagory2_parms)
        dic_parms_ls = getUrl(code, url_parms_ls, Catagory2_parms_ls)

--- a/comData/noticeReport_ZJH/fdfs_client/__init__.py
+++ b/comData/noticeReport_ZJH/fdfs_client/__init__.py
--- a/comData/noticeReport_ZJH/fdfs_client/client.py
+++ b/comData/noticeReport_ZJH/fdfs_client/client.py
--- a/comData/noticeReport_ZJH/fdfs_client/connection.py
+++ b/comData/noticeReport_ZJH/fdfs_client/connection.py
--- a/comData/noticeReport_ZJH/fdfs_client/exceptions.py
+++ b/comData/noticeReport_ZJH/fdfs_client/exceptions.py
--- a/comData/noticeReport_ZJH/fdfs_client/fdfs_protol.py
+++ b/comData/noticeReport_ZJH/fdfs_client/fdfs_protol.py
--- a/comData/noticeReport_ZJH/fdfs_client/fdfs_test.py
+++ b/comData/noticeReport_ZJH/fdfs_client/fdfs_test.py
--- a/comData/noticeReport_ZJH/fdfs_client/storage_client.py
+++ b/comData/noticeReport_ZJH/fdfs_client/storage_client.py
--- a/comData/noticeReport_ZJH/fdfs_client/tracker_client.py
+++ b/comData/noticeReport_ZJH/fdfs_client/tracker_client.py
--- a/comData/noticeReport_ZJH/fdfs_client/utils.py
+++ b/comData/noticeReport_ZJH/fdfs_client/utils.py
--- a/comData/noticeReport_ZJH/东方财富网-公告.py
+++ b/comData/noticeReport_ZJH/东方财富网-公告.py
--- a/comData/noticeReport_ZJH/证监会-公告.py
+++ b/comData/noticeReport_ZJH/证监会-公告.py
--- a/comData/policylaw/2.py
+++ b/comData/policylaw/2.py
 # _*_ coding:utf-8 _*_
 """数据全量跑一遍，不做判重逻辑"""
+import datetime
 import json
 import re
 import time
@@ -8,7 +9,6 @@ import time
 import fitz
 import pymongo
 import requests
-from bs4 import BeautifulSoup
 from kafka import KafkaProducer
 from pyquery import PyQuery as pq
 from requests.packages import urllib3
@@ -23,7 +23,8 @@ from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
 from lxml import etree
 from random import choice
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
 log = baseCore.getLogger()
 taskType = '政策法规'
@@ -37,14 +38,13 @@ taskType = '政策法规'
 db_storage = pymongo.MongoClient('mongodb://114.115.221.202:27017', username='admin', password='zzsn@9988').caiji[
    '国务院_国资委_copy1']
-driver_path = r'D:\cmd100\chromedriver.exe'
+driver_path = r'D:\fbs_spider\cmd100\chromedriver.exe'
-chromr_bin = r'D:\Google\Chrome\Application\chrome.exe'
+chromr_bin = r'D:\fbs_spider\Google\Chrome\Application\chrome.exe'
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
 }
 # 将html中的相对地址转换成绝对地址
 def paserUrl(html, listurl):
    # 获取所有的<a>标签和<img>标签
@@ -60,7 +60,6 @@ def paserUrl(html, listurl):
            link['src'] = urljoin(listurl, link['src'])
    return html
 def getDriver():
    service = Service(driver_path)
    chrome_options = webdriver.ChromeOptions()
@@ -85,7 +84,6 @@ def getDriver():
    # })
    return bro
 def save_data(dic_news):
    aaa_dic = {
@@ -97,7 +95,6 @@ def save_data(dic_news):
    }
    db_storage.insert_one(aaa_dic)
 def sendKafka(dic_news):
    start_time = time.time()
    try:  # 114.116.116.241
@@ -132,12 +129,10 @@ def sendKafka(dic_news):
        takeTime = baseCore.getTimeCost(start_time, time.time())
        return False
 def redefid(idList):
    id_ = ','.join(map(str, idList))
    return id_
 def remove_dup():
    pass
@@ -310,7 +305,6 @@ def get_content1():
        end_time = time.time()
        log.info(f'共抓取国务院文件{num}条数据，共耗时{start_time - end_time}')
 # 国务院部门文件
 def get_content2():
    def getTotalpage(bmfl,headers,session):
@@ -460,7 +454,6 @@ def get_content2():
    end_time = time.time()
    log.info(f'共抓取国务院部门文件{num}条数据，耗时{end_time - start_time}')
 # 国务院国有资产监督管理委员会-政策发布
 def get_content3():
    def getPage():
@@ -612,12 +605,8 @@ def get_content3():
        log.info(f'共抓取国资委文件{num}条数据，耗时{end_time - start_time}')
    partOne()
-    # 增量执行需要注释掉
+    # 增量执行需要注释掉partTwo()
-    partTwo()
+    # partTwo()
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin
 # 北京
 def bei_jing():
@@ -771,7 +760,6 @@ def bei_jing():
        log.info(e)
        pass
 # 内蒙古
 def nei_meng_gu():
    start = time.time()
@@ -891,7 +879,6 @@ def nei_meng_gu():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 吉林
 def ji_lin():
    start = time.time()
@@ -1082,9 +1069,8 @@ def ji_lin():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 上海
-# todo:列表页没有获取完全 已完成
 def shang_hai():
    start = time.time()
    num = 0
@@ -1216,7 +1202,6 @@ def shang_hai():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 浙江
 def zhe_jiang():
    start = time.time()
@@ -1337,7 +1322,6 @@ def zhe_jiang():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 福建
 def fu_jian():
    error_tag = str(404)
@@ -1385,12 +1369,11 @@ def fu_jian():
                        i_html = href_text.text
                        i_soup = BeautifulSoup(i_html, 'html.parser')
                real_href = href
-                real_href = 'http://gzw.fujian.gov.cn/zwgk/xxgkzl/xxgkml/gfxwj/202211/t20221129_6064610.htm'
+                # real_href = 'http://gzw.fujian.gov.cn/zwgk/xxgkzl/xxgkml/gfxwj/202211/t20221129_6064610.htm'
                # print(real_href)
-                # is_href = db_storage.find_one({'网址': real_href})
+                is_href = db_storage.find_one({'网址': real_href})
-                # if is_href:
+                if is_href:
-                #     continue
+                    continue
                try:
                    # 文章是远程pdf
                    # 直接下载文件至服务器，解析出正文内容
@@ -1496,9 +1479,7 @@ def fu_jian():
    end_time = time.time()
    print(f'共抓取{num}条数据，共耗时{end_time - start_time}')
 # 山东
-# todo:成文日期 发文机关 已完成
 def shan_dong():
    headers = {
        'Cookie': 'COLLCK=2502513302; COLLCK=2493627587',
@@ -1609,7 +1590,6 @@ def shan_dong():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 广东
 def guang_dong():
    start = time.time()
@@ -1710,7 +1690,6 @@ def guang_dong():
    end = time.time()
    print('共', num, '条', '...........', '共耗时', end - start, '秒')
 # 海南
 def hai_nan():
    def hai_nan1():
@@ -2348,7 +2327,6 @@ def hai_nan():
    hai_nan1()
    hai_nan2()
 # 四川
 def si_chuan():
    num = 0
@@ -2442,7 +2420,6 @@ def si_chuan():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 广西
 def guang_xi():
    num = 0
@@ -2567,7 +2544,6 @@ def guang_xi():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 贵州
 def gui_zhou():
    """
@@ -2677,7 +2653,6 @@ def gui_zhou():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 云南
 def yun_nan():
    def yun_nan1():
@@ -2908,7 +2883,6 @@ def yun_nan():
    yun_nan1()
    yun_nan2()
 # 重庆
 def chong_qing():
    """
@@ -3035,7 +3009,6 @@ def chong_qing():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 天津
 def tian_jin():
    def tian_jin1():
@@ -3413,7 +3386,6 @@ def tian_jin():
    tian_jin2()
    tian_jin3()
 # 新疆
 def xin_jiang():
    def xin_jiang1():
@@ -3620,7 +3592,6 @@ def xin_jiang():
    xin_jiang1()
    xin_jiang_jsbt()
 # 山西
 def shan_xi():
    num = 0
@@ -3734,7 +3705,6 @@ def shan_xi():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 辽宁
 def liao_ning():
    num = 0
@@ -3844,7 +3814,6 @@ def liao_ning():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 黑龙江
 def hei_long_jiang():
    num = 0
@@ -3940,7 +3909,6 @@ def hei_long_jiang():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 江苏
 def jiang_su():
    num = 0
@@ -4052,7 +4020,6 @@ def jiang_su():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 安徽
 def an_hui():
    def an_hui1():
@@ -4246,9 +4213,9 @@ def an_hui():
        print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
    an_hui1()
+    log.info('------test----')
    an_hui2()
 # 江西
 def jiang_xi():
    """
@@ -4377,7 +4344,6 @@ def jiang_xi():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 河南
 def he_nan():
    num = 0
@@ -4470,7 +4436,6 @@ def he_nan():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 湖南
 def hu_nan():
    num = 0
@@ -4572,7 +4537,6 @@ def hu_nan():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 甘肃
 def gan_su():
    def gan_su1():
@@ -4992,7 +4956,6 @@ def gan_su():
    gan_su2()
    gan_su3()
 # 宁夏
 def ning_xia():
    num = 0
@@ -5087,7 +5050,6 @@ def ning_xia():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 陕西
 def shanxi():
    num = 0
@@ -5188,7 +5150,6 @@ def shanxi():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 西藏
 def xi_zang():
    start_time = time.time()
@@ -5280,7 +5241,6 @@ def xi_zang():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 青海
 def qing_hai():
    def qing_hai1():
@@ -5500,7 +5460,6 @@ def qing_hai():
    qing_hai1()
    qing_hai2()
 # 河北
 def he_bei():
    num = 0
@@ -5591,7 +5550,6 @@ def he_bei():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 # 湖北
 def hu_bei():
    num = 0
@@ -5701,39 +5659,42 @@ def hu_bei():
    end_time = time.time()
    print(f'共抓取{num}条数据,共耗时{end_time - start_time}')
 if __name__ == '__main__':
    get_content1()
    get_content2()
    get_content3()
-    # bei_jing()
+    bei_jing()
-    # nei_meng_gu()
+    nei_meng_gu()
-    # ji_lin()
+    ji_lin()
-    # shang_hai()
+    shang_hai()
-    # zhe_jiang()
+    zhe_jiang()
-    # fu_jian()
+    fu_jian()
-    # shan_dong()
+    shan_dong()
-    # guang_dong()
+    guang_dong()
-    # hai_nan()
+    hai_nan()
-    # si_chuan()
+    si_chuan()
-    # guang_xi()
+    guang_xi()
-    # gui_zhou()
+    gui_zhou()
-    # yun_nan()
+    yun_nan()
-    # chong_qing()
+    chong_qing()
-    # tian_jin()
+    tian_jin()
-    # xin_jiang()
+    xin_jiang()
-    # shan_xi()
+    shan_xi()
-    # liao_ning()
+    liao_ning()
-    # hei_long_jiang()
+    hei_long_jiang()
-    # jiang_su()
+    jiang_su()
-    # an_hui()
+    an_hui()
-    # jiang_xi()
+    jiang_xi()
-    # he_nan()
+    he_nan()
-    # hu_nan()
+    hu_nan()
-    # gan_su()
+    gan_su()
-    # ning_xia()
+    ning_xia()
-    # xi_zang()
+    xi_zang()
-    # shanxi()
+    shanxi()
-    # qing_hai()
+    qing_hai()
-    # he_bei()
+    he_bei()
-    # qing_hai()
+    qing_hai()
+    current_time = datetime.datetime.now()
+    midnight_time = current_time.replace(hour=0, minute=0, second=0, microsecond=0) + datetime.timedelta(days=1)
+    sleep_seconds = (midnight_time - current_time).total_seconds()
+    time.sleep(sleep_seconds)
--- a/comData/policylaw/task_.py
+++ b/comData/policylaw/task_.py
+import policy
+import tingtype
+import BaseCore
+from apscheduler.schedulers.blocking import BlockingScheduler
+basecore = BaseCore.BaseCore()
+log = basecore.getLogger()
+def policylaw_task():
+    # 实例化一个调度器
+    scheduler = BlockingScheduler()
+    # 每天执行一次
+    scheduler.add_job(policy, 'cron', hour=0,minute=0)
+    scheduler.add_job(tingtype, 'cron', hour=0, minute=0)
+    try:
+        scheduler.start()
+    except Exception as e:
+        log.info('定时采集异常', e)
+        pass
+policylaw_task()
\ No newline at end of file
--- a/comData/policylaw/厅局.py
+++ b/comData/policylaw/厅局.py
-import json
+import datetime
+import datetime
 import json
 import random
 import time
@@ -277,6 +278,10 @@ if __name__=='__main__':
        job()
    except Exception as e:
        print(e)
+    current_time = datetime.datetime.now()
+    midnight_time = current_time.replace(hour=0, minute=0, second=0, microsecond=0) + datetime.timedelta(days=1)
+    sleep_seconds = (midnight_time - current_time).total_seconds()
+    time.sleep(sleep_seconds)
    # 创建一个ExcelWriter对象
    # writer = pd.ExcelWriter('国务院厅局.xlsx')

--- a/comData/tcyQydt/test.py
+++ b/comData/tcyQydt/test.py
-import json
-from bs4 import BeautifulSoup
-import langid
-from base.BaseCore import BaseCore
-baseCore =BaseCore()
-import pymysql
-# print(baseCore.detect_language("是对jhjjhjhhjjhjhjh的浮点数"))
-# cnx_ = baseCore.cnx
-# cursor_ = baseCore.cursor
-cnx_ = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',
-                                   charset='utf8mb4')
-cursor_ = cnx_.cursor()
-updateBeginSql = f"update Tfbs set state3=%s where col3=%s "
-# print(updateBeginSql)
-cursor_.execute(updateBeginSql,(200,'91350000158142711F'))
-cnx_.commit()