代码修改4

e13aa13e · 刘伟刚 · 07b5b32c · e13aa13e · e13aa13e · e13aa13e
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -16,9 +16,9 @@ from openpyxl import Workbook
 import langid

 #创建连接池
-import pymysql
 from pymysql import connections
 from DBUtils.PooledDB import PooledDB
+import pymysql

 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源


--- a/comData/caiwushuju/RedisPPData.py
+++ b/comData/caiwushuju/RedisPPData.py
@@ -9,7 +9,7 @@ r = basecore.r


 def conn11():
-    conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
+    conn = pymysql.Connect(host='114.116.44.11', port=3306, user='caiji', passwd='f7s0&7qqtK', db='clb_project',
                           charset='utf8')
    cursor = conn.cursor()
    return conn,cursor
@@ -25,6 +25,7 @@ def yahooCodeFromSql():
        print('=======')
        for item in gn_social_list:
            r.rpush('NoticeEnterprise:securities_code', item)
+        print('将股票代码放入redis结束')
    except Exception as e:
        log.info("数据查询异常")
    finally:

--- a/comData/caiwushuju/YAHOO财务数据4.py
+++ b/comData/caiwushuju/YAHOO财务数据4.py
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 # @Author: MENG
 # @Time  : 2022-4-9
+import datetime
+
 import xlrd
 from selenium.webdriver.support.wait import WebDriverWait
 from tqdm import tqdm
@@ -42,6 +44,7 @@ create_time
 """

 class YahooCaiwu(object):
+
    def __init__(self):
        self.config = configparser.ConfigParser()
        # 读取配置文件
@@ -125,6 +128,7 @@ class YahooCaiwu(object):
        all_dict['内容'] = content_dict
        return all_dict

+
    def get_webdriver(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--disable-gpu')
@@ -173,7 +177,7 @@ class YahooCaiwu(object):
                    stock2=stock2[1:]
                url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
                try:
-                    self.logger.info(f'正在采集：{url}')
+                    print(f'正在采集：{url}')
                    self.driver.get(url)
                    # 等待页面加载完成
                    wait = WebDriverWait(self.driver, 300)
@@ -293,13 +297,13 @@ class YahooCaiwu(object):
                        try:
                            resp = requests.get(get_url)
                            print(resp.text)
-                            self.logger.info('调用接口成功！！')
+                            print('调用接口成功！！')
                        except:
                            with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
                                f.write(stock + '\n')
                except Exception as e:
                    print(e)
-                    self.logger.info(f'采集：{url}失败')
+                    print(f'采集：{url}失败')
                    self.driver.quit()
                    time.sleep(10)
                    self.driver=self.get_webdriver()
@@ -307,15 +311,16 @@ class YahooCaiwu(object):
                # time.sleep(60 * 10)
                self.driver.quit()
                time.sleep(10)
-                driver=self.get_webdriver()
-                self.logger.info('出错，重试中！')
+                self.driver=self.get_webdriver()
+                print('出错，重试中！')
                continue
-        # driver.close()
+        # self.driver.close()
+

    def dataToSql(self,conn,cursor,ipo_data):
        try:
            social_credit_code=ipo_data['social_credit_code']
-            stock=str(ipo_data['stock'])
+            stock=ipo_data['stock']
            securities_short_name=ipo_data['securities_short_name']
            content=ipo_data['content']
            level_relation=ipo_data['level_relation']
@@ -331,23 +336,27 @@ class YahooCaiwu(object):
            select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
            cursor.execute(select_sql)
            existing_record = cursor.fetchone()
+            # 获取当前时间
+            current_time = datetime.datetime.now()
+            # 将时间转换为字符串
+            currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
            if existing_record:
                # 记录已存在，执行更新操作
-                update_param=(social_credit_code,content,level_relation,origin_type,stock)
-                update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s WHERE stock_code=%s "
+                update_param=(social_credit_code,content,level_relation,origin_type,currentdate,stock)
+                update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
                cursor.execute(update_sql,update_param)
+                print('更新成功')
            else:
-                insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type)
-                insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type) VALUES ( %s, %s, %s, %s, %s, %s)"
+                insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type,currentdate)
+                insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type,create_time) VALUES ( %s,%s, %s, %s, %s, %s, %s)"
                # 记录不存在，执行插入操作
                cursor.execute(insert_sql,insert_param)
+                print('插入成功')
+
            # 提交事务
            conn.commit()
        except Exception as e:
            return False
-        finally:
-            cursor.close()
-            conn.close()
        return True

    def get_unit(self,doc_resp):
@@ -366,9 +375,13 @@ if __name__ == '__main__':
    #get_content1()
    yahoo=YahooCaiwu()
    while True:
+        securitiescode=''
        try:
            securitiescode=yahoo.getCodeFromRedis()
            yahoo.get_content2(securitiescode)
        except Exception as e:
-            yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
+            if securitiescode:
+                yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
+            else:
+                time.sleep(300)

--- a/comData/caiwushuju/__init__.py
+++ b/comData/caiwushuju/__init__.py
--- a/comData/caiwushuju/config.ini
+++ b/comData/caiwushuju/config.ini
@@ -5,7 +5,7 @@ pass=clbzzsn

 [mysql]
 host=114.115.159.144
-username=root
+username=caiji
 password=zzsn9988
 database=caiji
 url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false

--- a/comData/caiwushuju/huilv.py
+++ b/comData/caiwushuju/huilv.py
+# -*- coding: utf-8 -*-
+import datetime
+
+
+from selenium.webdriver.support.wait import WebDriverWait
+
+import time
+import requests
+from pyquery import PyQuery as pq
+from selenium import webdriver
+
+from requests.packages import urllib3
+urllib3.disable_warnings()
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+
+import configparser
+import redis
+
+
+class CurrencyRate(object):
+    def __init__(self):
+        self.config = configparser.ConfigParser()
+        # 读取配置文件
+        self.config.read('config.ini')
+        self.r = redis.Redis(host=self.config.get('redis', 'host'),
+                             port=self.config.get('redis', 'port'),
+                             password=self.config.get('redis', 'pass'), db=6)
+        self.driver=self.get_webdriver()
+
+    def get_webdriver(self):
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--ignore-certificate-errors')
+        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--start-maximized")
+        # chrome_options.add_argument('--headless')
+        chrome_options.binary_location = self.config.get('selenium', 'binary_location')
+        executable_path =self.config.get('selenium', 'chrome_driver')
+        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
+        return driver
+
+    def getRate(self):
+        rateList=[]
+        for result1 in result_list1:
+            currency_name = result1[0]
+            currency = result1[1]
+            to_USD = ''
+            to_CNY = ''
+            for i in range(len(result_list2)):
+                result2 = result_list2[i]
+                # https://qq.ip138.com/hl.asp?from=CNY&to=USD&q=1
+                url = f'''https://qq.ip138.com/hl.asp?from={currency}&to={result2}&q=1'''
+                # 等待页面加载完成
+                try:
+                    self.driver.get(url)
+                    wait = WebDriverWait(self.driver, 300)
+                    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+                    time.sleep(1)
+                    doc_resp = pq(self.driver.page_source)
+                    money = doc_resp('table tr:nth-child(3) td:nth-child(3)').text()
+                    if money == '1':
+                        money_result = money
+                    else:
+                        try:
+                            money_result = round(float(money), 4)
+                        except:
+                            continue
+                    if i == 0:
+                        to_USD = money_result
+                    else:
+                        to_CNY = money_result
+                except Exception as e:
+                    try:
+                        self.driver.close()
+                        self.driver.quit()
+                    except Exception as e:
+                        print(e)
+                    self.driver=self.get_webdriver()
+
+            now = datetime.datetime.now()
+            now_time = now.strftime('%Y-%m-%d')
+            if to_USD == '' or to_CNY == '':
+                continue
+            result_dict = {
+                '币种': currency_name,
+                '币简称': currency,
+                '对美元': to_USD,
+                '对人民币': to_CNY,
+                '更新时间': now_time }
+            print(result_dict)
+            rate={
+                "currencyName": currency_name,
+                "currencyCode": currency,
+                "rateToUSD": to_USD,
+                "rateToCNY": to_CNY,
+                "createDate": now_time
+            }
+            rateList.append(rate)
+
+        # market_url = f'http://192.168.1.39:8088/sync/currencyRate'
+        market_url = f'http://114.115.236.206:8088/sync/currencyRate'
+        try:
+            resp = requests.post(market_url,json=rateList)
+            # 检查响应状态码
+            if resp.status_code == 200:
+                print("请求成功")
+                # 打印响应内容
+                print(resp.content)
+            else:
+                print("请求失败")
+        except Exception as e:
+            print(e)
+
+if __name__ == '__main__':
+    result_list1 = [
+        [
+            '人民币',
+            'CNY'],
+        [
+            '美元',
+            'USD'],
+        [
+            '欧元',
+            'EUR'],
+        [
+            '瑞士法郎',
+            'CHF'],
+        [
+            '加元',
+            'CAD'],
+        [
+            '波兰兹罗提',
+            'PLN'],
+        [
+            '英镑',
+            'GBP'],
+        [
+            '澳元',
+            'AUD'],
+        [
+            '泰铢',
+            'THB'],
+        [
+            '沙特里亚尔',
+            'SAR'],
+        [
+            '巴西里亚伊',
+            'BRL'],
+        [
+            '新土耳其新里拉',
+            'TRY'],
+        [
+            '新台币',
+            'TWD'],
+        [
+            '印度卢比',
+            'INR'],
+        [
+            '墨西哥比索',
+            'MXN'],
+        [
+            '日元',
+            'JPY'],
+        [
+            '瑞典克朗',
+            'SEK'],
+        [
+            '韩元',
+            'KRW'],
+        [
+            '俄罗斯卢布',
+            'RUB'],
+        [
+            '新加坡元',
+            'SGD'],
+        [
+            '港币',
+            'HKD']]
+    result_list2 = [
+        'USD',
+        'CNY']
+    currenRate=CurrencyRate()
+    currenRate.getRate()
+    currenRate.driver.quit()
+
--- a/comData/caiwushuju/上市企业市值.py
+++ b/comData/caiwushuju/上市企业市值.py
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
+import datetime
+
+import xlrd
+from selenium.webdriver.support.wait import WebDriverWait
+from tqdm import tqdm
+import pymongo
+import pymysql
+import time
+import requests
+from pyquery import PyQuery as pq
+from selenium import webdriver
+import json
+from requests.packages import urllib3
+urllib3.disable_warnings()
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import re
+from BaseCore import BaseCore
+import configparser
+import redis
+
+class Shizhi(object):
+    def __init__(self):
+        self.config = configparser.ConfigParser()
+        # 读取配置文件
+        self.config.read('config.ini')
+        baseCore=BaseCore()
+        self.logger=baseCore.getLogger()
+        self.r = redis.Redis(host=self.config.get('redis', 'host'),
+                             port=self.config.get('redis', 'port'),
+                             password=self.config.get('redis', 'pass'), db=6)
+        self.driver=self.get_webdriver()
+
+    def get_webdriver(self):
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--ignore-certificate-errors')
+        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--start-maximized")
+        # chrome_options.add_argument('--headless')
+        chrome_options.binary_location = self.config.get('selenium', 'binary_location')
+        executable_path =self.config.get('selenium', 'chrome_driver')
+        driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
+        return driver
+
+    def conn11(self):
+        conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
+                               charset='utf8')
+        cursor = conn.cursor()
+        return conn,cursor
+
+    def getmarketCap(self):
+        conn,cursor=self.conn11()
+        try:
+            sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4','5','6')  """  # and stock_code = "SYNH"
+            cursor.execute(sql1)
+            result_data = cursor.fetchall()
+        except Exception as e:
+            self.logger.info("数据查询异常！")
+            return
+        for data in result_data:
+            try:
+                data_list = list(data)
+                print(data_list)
+                social_credit_code = data_list[0]
+                stock = data_list[1]
+                securities_short_name = data_list[2] if data_list[2] is not None else ""
+                # content_sql = ''
+                stock2=str(stock)
+                if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
+                    stock2=stock2[1:]
+                    # https://finance.yahoo.com/quote/032830.KS?p=032830.KS
+                url = f'https://finance.yahoo.com/quote/{stock2}?p={stock2}'
+                try:
+                    self.logger.info(f'正在采集：{url}')
+                    self.driver.get(url)
+                    # 等待页面加载完成
+                    wait = WebDriverWait(self.driver, 300)
+                    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
+                    time.sleep(5)
+                    doc_resp = pq(self.driver.page_source)
+                    unit=doc_resp('div[id="quote-header-info"]>div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
+                    currency = unit.text().split("Currency in ")[1]
+                    market_cap=doc_resp('td[data-test="MARKET_CAP-value"]')
+                    marketcap=market_cap.text()
+                    if marketcap and marketcap!='N/A':
+                        # 获取当前时间
+                        current_time = datetime.datetime.now()
+                        currentdate = current_time.strftime("%Y-%m-%d")
+                        print(f'信用代码：{social_credit_code} 股票代码：{stock}  币种：{currency}  市值：{marketcap} 日期：{currentdate}')
+                        # market_url = f'http://192.168.1.39:8088/sync/marketValue'
+                        market_url = f'http://114.115.236.206:8088/sync/marketValue'
+                        param= {
+                            "socialCreditCode": social_credit_code,
+                            "stockCode": stock,
+                            "marketValue": marketcap,
+                            "originalUnit": currency,
+                            "valueTime": currentdate
+                        }
+                        try:
+                            resp = requests.post(market_url,json=param)
+                            # 检查响应状态码
+                            if resp.status_code == 200:
+                                print("请求成功")
+                                # 打印响应内容
+                                print(resp.content)
+                            else:
+                                print("请求失败")
+                        except:
+                            with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
+                                f.write(stock + '\n')
+
+                except Exception as e:
+                    self.driver.close()
+                    self.driver.quit()
+                    self.driver=self.get_webdriver()
+                    print(e)
+            except Exception as e:
+                print(e)
+                self.driver.close()
+                self.driver.quit()
+                self.driver=self.get_webdriver()
+
+if __name__ == '__main__':
+         shizhi=Shizhi()
+         shizhi.getmarketCap()
\ No newline at end of file
--- a/comData/wanfangdataCrawler/jigou.py
+++ b/comData/wanfangdataCrawler/jigou.py
+# -*- coding: utf-8 -*-
+import datetime
+import time
+
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+from pyquery import PyQuery as pq
+from openpyxl import Workbook
+import pandas as pd
+
+class WanfangSpider(object):
+    def __init__(self):
+        pass
+
+    def req(self,url):
+        header={
+            "accept":"*/*",
+            "connection":"Keep-Alive",
+            "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+        }
+        res = requests.get(url,headers=header)
+        if res.status_code==200:
+            text=res.text
+            print('请求成功！')
+        else:
+            text=''
+            print('请求失败！')
+        return text
+    # 将html中的相对地址转换成绝对地址
+    def paserUrl(self,html,listurl):
+        soup = BeautifulSoup(html, 'html.parser')
+        # 获取所有的<a>标签和<img>标签
+        links = soup.find_all(['a', 'img'])
+        # 遍历标签，将相对地址转换为绝对地址
+        for link in links:
+            if 'href' in link.attrs:
+                link['href'] = urljoin(listurl, link['href'])
+            elif 'src' in link.attrs:
+                link['src'] = urljoin(listurl, link['src'])
+        return soup
+    def pageList(self):
+        listmsg=[]
+        for num in range(1,73):
+            url=f'https://kms.wanfangdata.com.cn/IndustryYJ/Search/Cecdb?q=%E5%86%B6%E9%87%91%2B%E5%86%B6%E7%82%BC%20%E6%9C%BA%E6%9E%84%3Acsi&f=Inst.Type&PageNumber={num}'
+            html=self.req(url)
+            soup=self.paserUrl(html,url)
+            text=str(soup.prettify())
+            doc=pq(text)
+            liTag=doc('li[class="rt-wrap"]')
+            # print(liTag)
+            for li in liTag:
+                lidoc=pq(li)
+                title=lidoc('a[class="title"]').text()
+                turl=lidoc('a[class="title"]').attr('href')
+                msg={
+                    'title':title,
+                    'turl':turl
+                }
+                listmsg.append(msg)
+        return listmsg
+
+    def detailMsg(self,msg):
+        detailList=[]
+        turl = msg['turl']
+        title = msg['title']
+        html=self.req(turl)
+        soup=self.paserUrl(html,turl)
+        dtext=str(soup.prettify())
+        ddoc=pq(dtext)
+        a1=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(1)').text().replace(":","")
+        institutionType=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(2)').text()
+        a2=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(1)').text().replace(":","")
+        formerName=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(2)').text()
+        a3=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(1)').text().replace(":","")
+        leader=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(2)').text()
+        a4=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(1)').text().replace(":","")
+        establishmentDate=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(2)').text()
+        a5=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(1)').text().replace(":","")
+        introduction=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(2)').text()
+        a6=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(1)').text().replace(":","")
+        classification=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(2)').text()
+        a7=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(1)').text().replace(":","")
+        keywords=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(2)').text()
+        a8=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(1)').text().replace(":","")
+        researchEquipment=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(2)').text()
+        a9=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(1)').text().replace(":","")
+        researchAreas=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(2)').text()
+        a10=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(1)').text().replace(":","")
+        awards=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(2)').text()
+        a11=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(1)').text().replace(":","")
+        internalDepartments=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(2)').text()
+        a12=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(1)').text().replace(":","")
+        subsidiaryInstitutions=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(2)').text()
+        a13=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(1)').text().replace(":","")
+        productInformation=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(2)').text()
+        a14=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(1)').text().replace(":","")
+        publicationJournals=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(2)').text()
+        a15=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(1)').text().replace(":","")
+        mailingAddress=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(2)').text()
+        a16=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(1)').text().replace(":","")
+        tel=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(2)').text()
+        a17=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(1)').text().replace(":","")
+        faxNumber=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(2)').text()
+        a18=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(1)').text().replace(":","")
+        email=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(2)').text()
+        a19=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(1)').text().replace(":","")
+        website=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(2)').text()
+        detailmsg={
+            'title':title,
+            a1:institutionType,
+            a2:formerName,
+            a3:leader,
+            a4:establishmentDate,
+            a5:introduction,
+            a6:classification,
+            a7:keywords,
+            a8:researchEquipment,
+            a9:researchAreas,
+            a10:awards,
+            a11:internalDepartments,
+            a12:subsidiaryInstitutions,
+            a13:productInformation,
+            a14:publicationJournals,
+            a15:mailingAddress,
+            a16:tel,
+            a17:faxNumber,
+            a18:email,
+            a19:website
+        }
+        detailList.append(detailmsg)
+        self.writerToExcel(detailList)
+
+    def conn144(self):
+        conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
+                               charset='utf8')
+        cursor = conn.cursor()
+        return conn,cursor
+
+    def dataToSql(self,detailmsg):
+        conn,cursor=self.conn144()
+        try:
+            # 检查记录是否存在
+            # 获取当前时间
+            current_time = datetime.datetime.now()
+            # 将时间转换为字符串
+            currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
+
+        except Exception as e:
+            print('+++++')
+        finally:
+            cursor.close()
+            conn.close()
+    # 将数据追加到excel
+    def writerToExcel(self,detailList):
+        # filename='baidu搜索.xlsx'
+        # 读取已存在的xlsx文件
+        existing_data = pd.read_excel(filename)
+        # 创建新的数据
+        new_data = pd.DataFrame(data=detailList)
+        # 将新数据添加到现有数据的末尾
+        combined_data = existing_data.append(new_data, ignore_index=True)
+        # 将结果写入到xlsx文件
+        combined_data.to_excel(filename, index=False)
+
+if __name__ == '__main__':
+    filename='机构.xlsx'
+    # # 创建一个工作簿
+    workbook = Workbook()
+    workbook.save(filename)
+
+    wanfang=WanfangSpider()
+    lsitmsg=wanfang.pageList()
+    for msg in lsitmsg:
+        wanfang.detailMsg(msg)
\ No newline at end of file