新三板财务数据脚本维护

e3ee9068 · LiJunMing · 6809e271 · e3ee9068 · e3ee9068
--- a/comData/dfcfwGpdm/NQenterprise/finance_xq.py
+++ b/comData/dfcfwGpdm/NQenterprise/finance_xq.py
@@ -84,8 +84,19 @@ def getdetail(reportInfodata,name_map,listinfo,url_name):
        listinfo.append(dic_info)
    return listinfo

-def getinfo(com_code,social_code):
-    dic_info = {}
+def getinfo(info_date,com_code,social_code):
+
+    for nnn in range(0, 3):
+        try:
+            panduan = check_date(com_code, info_date)
+        except:
+            time.sleep(1)
+    if panduan:
+        log.info(f'{info_date}----已采集过')
+        return
+    else:
+        pass
+
    for nnn in range(0, 3):
        try:
            ynFirst = check_code(com_code)
@@ -107,80 +118,114 @@ def getinfo(com_code,social_code):
    a_infoData = getrequests(url_lrb)
    b_infoData = getrequests(url_zcfzb)
    c_infoData = getrequests(url_xjllb)
-    #对报告期做循环
-    for i in range(0,5):
-        listLrb = []
-        listZcfzb = []
-        listXjllb = []
-        reportLrbdata = a_infoData[i]
-        report_date = a_infoData[i]['report_date']
-        #时间戳转化为日期
-        report_date = getFormatedate(int(report_date / 1000))
-        # 检查报告期是否已经存在
-        for nnn in range(0, 3):
-            try:
-                panduan = check_date(com_code, report_date)
-            except:
-                time.sleep(1)
-        if panduan:
-            log.info(f'{report_date}----已采集过')
+
+    listLrb = []
+    listZcfzb = []
+    listXjllb = []
+    for i in range(len(a_infoData)):
+        report_date_a = a_infoData[i]['report_date']
+        report_date_a = getFormatedate(int(report_date_a / 1000))
+        if info_date == report_date_a:
+            log.info(f'======正在采集利润表：{com_code}---{info_date}=======')
+            # 利润表
+            reportLrbdata = a_infoData[i]
+            listLrb = getdetail(reportLrbdata, lrb_name_map, listLrb, lrb_name)
+            log.info(f'利润表数据：{len(listLrb)}个')
+            break
+        else:
+            continue
+
+    for j in range(len(b_infoData)):
+        report_date_b = b_infoData[j]['report_date']
+        report_date_b = getFormatedate(int(report_date_b / 1000))
+        if info_date == report_date_b:
+            log.info(f'======正在采集资产负债表：{com_code}---{info_date}=======')
+            reportZcfzbdata = b_infoData[j]
+            listZcfzb = getdetail(reportZcfzbdata, zcfzb_name_map, listZcfzb, zcfzb_name)
+            log.info(f'资产负债表数据：{len(listZcfzb)}个')
+            break
+        else:
            continue
+    for k in range(len(c_infoData)):
+        report_date_c = c_infoData[k]['report_date']
+        report_date_c = getFormatedate(int(report_date_c / 1000))
+        if info_date == report_date_c:
+            log.info(f'======正在采集现金流量表：{com_code}---{info_date}=======')
+            reportXjllbdata = c_infoData[k]
+            listXjllb = getdetail(reportXjllbdata, xjllb_name_map, listXjllb, xjllb_name)
+            log.info(f'现金流量表数据：{len(listXjllb)}个')
+            break
        else:
-            pass
-        log.info(f'======正在采集：{com_code}---{report_date}=======')
-        #利润表
-        list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
-        log.info(f'利润表数据：{len(list_Lrb)}个')
-        # print(list_Lrb)
-        #资产负债表
-        reportZcfzbdata = b_infoData[i]
-        list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
-        #现金流量表
-        reportXjllbdata = c_infoData[i]
-        list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
+            continue
+    # reportLrbname = a_infoData[i]['report_name']
+    # reporZCFZbname = b_infoData[i]['report_name']
+    # reportXJLLBname = c_infoData[i]['report_name']

+    #时间戳转化为日期
+    # report_date = getFormatedate(int(report_date / 1000))
+    # 检查报告期是否已经存在

-        dic_info = {
-            "socialCreditCode": social_code,
-            "securitiesCode": com_code[2:],
-            "date": report_date,
-            "debt": list_Zcfzb,
-            "profit": list_Lrb,
-            "cash": list_Xjllb,
-            "ynFirst": ynFirst,
-        }
-        # print(dic_info)
-        #一个报告期结束
-        log.info(f'----{com_code}--{report_date}----结束')
-        if dic_info:
-            # 调凯歌接口存储数据
-            data = json.dumps(dic_info)
-            # print(data)
-            url_baocun = 'http://114.115.236.206:8088/sync/finance/xq'
-            for nnn in range(0, 3):
-                try:
-                    res_baocun = requests.post(url_baocun, data=data)
-                    break
-                except:
-                    time.sleep(1)
-            log.info(f'----{com_code}--{report_date}--------数据发送接口完毕------------')
-            for nnn in range(0, 3):
-                try:
-                    add_date(com_code, report_date)
-                    break
-                except:
-                    time.sleep(1)
-        else:
-            log.error(f'---{com_code}--{report_date}--')
+    # log.info(f'======正在采集：{com_code}---{info_date}=======')
+    # #利润表
+    # reportLrbdata = a_infoData[i]
+    # list_Lrb = getdetail(reportLrbdata,lrb_name_map,listLrb,lrb_name)
+    # log.info(f'利润表数据：{len(list_Lrb)}个')
+    #
+    # # print(list_Lrb)
+    # #资产负债表
+    # try:
+    #     reportZcfzbdata = b_infoData[j]
+    #     list_Zcfzb = getdetail(reportZcfzbdata,zcfzb_name_map,listZcfzb,zcfzb_name)
+    #
+    # except:
+    #     list_Zcfzb = []
+    # #现金流量表
+    # reportXjllbdata = c_infoData[k]
+    # list_Xjllb = getdetail(reportXjllbdata,xjllb_name_map,listXjllb,xjllb_name)
+
+    dic_info = {
+        "socialCreditCode": social_code,
+        "securitiesCode": com_code[2:],
+        "date": info_date,
+        "debt": listZcfzb,
+        "profit": listLrb,
+        "cash": listXjllb,
+        "ynFirst": ynFirst,
+    }
+    # print(dic_info)
+    #一个报告期结束
+    log.info(f'----{com_code}--{info_date}----结束')
+    if dic_info:
+        # 调凯歌接口存储数据
+        data = json.dumps(dic_info)
+        # print(data)
+        url_baocun = 'http://114.115.236.206:8088/sync/finance/xq'
+        for nnn in range(0, 3):
+            try:
+                res_baocun = requests.post(url_baocun, data=data)
+                break
+            except:
+                time.sleep(1)
+        log.info(f'----{com_code}--{info_date}--------数据发送接口完毕------------')
+        for nnn in range(0, 3):
+            try:
+                add_date(com_code, info_date)
+                break
+            except:
+                time.sleep(1)
+    else:
+        log.error(f'---{com_code}--{info_date}--')


 if __name__ == '__main__':
    info_date_list = []
-    # try:
-    #     chromedriver = "D:/chrome/chromedriver.exe"
-    #     browser = webdriver.Chrome(chromedriver)
-    # except Exception as e:
-    #     print(e)
+    list_date = ['2023-06-30']
+    list_month = ['-12-31', '-06-30']
+    for year in range(2022, 2020, -1):
+        for month in list_month:
+            date = str(year) + month
+            list_date.append(date)
+
    opt = webdriver.ChromeOptions()
    opt.add_argument(
        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
@@ -191,7 +236,6 @@ if __name__ == '__main__':
    opt.add_experimental_option('excludeSwitches', ['enable-logging'])
    opt.add_experimental_option('useAutomationExtension', False)
    opt.binary_location = r'D:/Google/Chrome/Application/chrome.exe'
-    # chromedriver = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
    chromedriver = r'D:/cmd100/chromedriver.exe'
    browser = webdriver.Chrome(chrome_options=opt, executable_path=chromedriver)
    headers = {
@@ -381,10 +425,10 @@ if __name__ == '__main__':
        '加：期初现金及现金等价物余额':'final_balance_of_cce',
        '期末现金及现金等价物余额':'final_balance_of_cce'
    }
-    table_type = ['income','balance']
+
    while True:
-        social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
-        # social_code = baseCore.redicPullData('NQEnterprise:nq_finance_test')
+        # social_code = baseCore.redicPullData('NQEnterprise:nq_finance')
+        social_code = '9144030067312725XJ'
        if social_code is None:
                log.info('已没有数据----------等待')
                time.sleep(20)
@@ -395,8 +439,9 @@ if __name__ == '__main__':
        com_code = data[3]
        start = time.time()
        com_code = 'NQ' + com_code
-        dic_info = getinfo(com_code,social_code)
-        break
+        for info_date in list_date:
+            dic_info = getinfo(info_date,com_code,social_code)
+




--- a/test.py
+++ b/test.py
@@ -16,4 +16,12 @@ element.getparent() #获取给定元素的父元素
 # data = '"1234","456\r7","897"'
 # print(data)
 # aa = pd.read_csv(StringIO(data),escapechar='\r')
-# print(aa)
\ No newline at end of file
+# print(aa)
+
+import pandas as pd
+
+# 读取txt文件
+data = pd.read_csv('D:\\美国证券交易委员会\\2023q2\\pre.txt', delimiter='\t')  # 根据实际情况选择正确的分隔符
+
+# 将数据保存为csv文件
+data.to_csv('D:\\美国证券交易委员会\\2023q2\\pre.csv', index=False)  # index=False表示不保存行索引