提交 e13aa13e 作者: 刘伟刚

代码修改4

上级 07b5b32c
...@@ -16,9 +16,9 @@ from openpyxl import Workbook ...@@ -16,9 +16,9 @@ from openpyxl import Workbook
import langid import langid
#创建连接池 #创建连接池
import pymysql
from pymysql import connections from pymysql import connections
from DBUtils.PooledDB import PooledDB from DBUtils.PooledDB import PooledDB
import pymysql
# 注意 程序退出前 调用BaseCore.close() 关闭相关资源 # 注意 程序退出前 调用BaseCore.close() 关闭相关资源
......
...@@ -9,7 +9,7 @@ r = basecore.r ...@@ -9,7 +9,7 @@ r = basecore.r
def conn11(): def conn11():
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project', conn = pymysql.Connect(host='114.116.44.11', port=3306, user='caiji', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8') charset='utf8')
cursor = conn.cursor() cursor = conn.cursor()
return conn,cursor return conn,cursor
...@@ -25,6 +25,7 @@ def yahooCodeFromSql(): ...@@ -25,6 +25,7 @@ def yahooCodeFromSql():
print('=======') print('=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('NoticeEnterprise:securities_code', item) r.rpush('NoticeEnterprise:securities_code', item)
print('将股票代码放入redis结束')
except Exception as e: except Exception as e:
log.info("数据查询异常") log.info("数据查询异常")
finally: finally:
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# @Author: MENG # @Author: MENG
# @Time : 2022-4-9 # @Time : 2022-4-9
import datetime
import xlrd import xlrd
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from tqdm import tqdm from tqdm import tqdm
...@@ -42,6 +44,7 @@ create_time ...@@ -42,6 +44,7 @@ create_time
""" """
class YahooCaiwu(object): class YahooCaiwu(object):
def __init__(self): def __init__(self):
self.config = configparser.ConfigParser() self.config = configparser.ConfigParser()
# 读取配置文件 # 读取配置文件
...@@ -125,6 +128,7 @@ class YahooCaiwu(object): ...@@ -125,6 +128,7 @@ class YahooCaiwu(object):
all_dict['内容'] = content_dict all_dict['内容'] = content_dict
return all_dict return all_dict
def get_webdriver(self): def get_webdriver(self):
chrome_options = webdriver.ChromeOptions() chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--disable-gpu')
...@@ -173,7 +177,7 @@ class YahooCaiwu(object): ...@@ -173,7 +177,7 @@ class YahooCaiwu(object):
stock2=stock2[1:] stock2=stock2[1:]
url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}' url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
try: try:
self.logger.info(f'正在采集:{url}') print(f'正在采集:{url}')
self.driver.get(url) self.driver.get(url)
# 等待页面加载完成 # 等待页面加载完成
wait = WebDriverWait(self.driver, 300) wait = WebDriverWait(self.driver, 300)
...@@ -293,13 +297,13 @@ class YahooCaiwu(object): ...@@ -293,13 +297,13 @@ class YahooCaiwu(object):
try: try:
resp = requests.get(get_url) resp = requests.get(get_url)
print(resp.text) print(resp.text)
self.logger.info('调用接口成功!!') print('调用接口成功!!')
except: except:
with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f: with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
f.write(stock + '\n') f.write(stock + '\n')
except Exception as e: except Exception as e:
print(e) print(e)
self.logger.info(f'采集:{url}失败') print(f'采集:{url}失败')
self.driver.quit() self.driver.quit()
time.sleep(10) time.sleep(10)
self.driver=self.get_webdriver() self.driver=self.get_webdriver()
...@@ -307,15 +311,16 @@ class YahooCaiwu(object): ...@@ -307,15 +311,16 @@ class YahooCaiwu(object):
# time.sleep(60 * 10) # time.sleep(60 * 10)
self.driver.quit() self.driver.quit()
time.sleep(10) time.sleep(10)
driver=self.get_webdriver() self.driver=self.get_webdriver()
self.logger.info('出错,重试中!') print('出错,重试中!')
continue continue
# driver.close() # self.driver.close()
def dataToSql(self,conn,cursor,ipo_data): def dataToSql(self,conn,cursor,ipo_data):
try: try:
social_credit_code=ipo_data['social_credit_code'] social_credit_code=ipo_data['social_credit_code']
stock=str(ipo_data['stock']) stock=ipo_data['stock']
securities_short_name=ipo_data['securities_short_name'] securities_short_name=ipo_data['securities_short_name']
content=ipo_data['content'] content=ipo_data['content']
level_relation=ipo_data['level_relation'] level_relation=ipo_data['level_relation']
...@@ -331,23 +336,27 @@ class YahooCaiwu(object): ...@@ -331,23 +336,27 @@ class YahooCaiwu(object):
select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'" select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
cursor.execute(select_sql) cursor.execute(select_sql)
existing_record = cursor.fetchone() existing_record = cursor.fetchone()
# 获取当前时间
current_time = datetime.datetime.now()
# 将时间转换为字符串
currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
if existing_record: if existing_record:
# 记录已存在,执行更新操作 # 记录已存在,执行更新操作
update_param=(social_credit_code,content,level_relation,origin_type,stock) update_param=(social_credit_code,content,level_relation,origin_type,currentdate,stock)
update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s WHERE stock_code=%s " update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
cursor.execute(update_sql,update_param) cursor.execute(update_sql,update_param)
print('更新成功')
else: else:
insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type) insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type,currentdate)
insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type) VALUES ( %s, %s, %s, %s, %s, %s)" insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type,create_time) VALUES ( %s,%s, %s, %s, %s, %s, %s)"
# 记录不存在,执行插入操作 # 记录不存在,执行插入操作
cursor.execute(insert_sql,insert_param) cursor.execute(insert_sql,insert_param)
print('插入成功')
# 提交事务 # 提交事务
conn.commit() conn.commit()
except Exception as e: except Exception as e:
return False return False
finally:
cursor.close()
conn.close()
return True return True
def get_unit(self,doc_resp): def get_unit(self,doc_resp):
...@@ -366,9 +375,13 @@ if __name__ == '__main__': ...@@ -366,9 +375,13 @@ if __name__ == '__main__':
#get_content1() #get_content1()
yahoo=YahooCaiwu() yahoo=YahooCaiwu()
while True: while True:
securitiescode=''
try: try:
securitiescode=yahoo.getCodeFromRedis() securitiescode=yahoo.getCodeFromRedis()
yahoo.get_content2(securitiescode) yahoo.get_content2(securitiescode)
except Exception as e: except Exception as e:
yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode) if securitiescode:
yahoo.r.rpush('NoticeEnterprise:securities_code',securitiescode)
else:
time.sleep(300)
...@@ -5,7 +5,7 @@ pass=clbzzsn ...@@ -5,7 +5,7 @@ pass=clbzzsn
[mysql] [mysql]
host=114.115.159.144 host=114.115.159.144
username=root username=caiji
password=zzsn9988 password=zzsn9988
database=caiji database=caiji
url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false url=jdbc:mysql://114.115.159.144:3306/caiji?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false
......
# -*- coding: utf-8 -*-
import datetime
from selenium.webdriver.support.wait import WebDriverWait
import time
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
from requests.packages import urllib3
urllib3.disable_warnings()
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import configparser
import redis
class CurrencyRate(object):
def __init__(self):
self.config = configparser.ConfigParser()
# 读取配置文件
self.config.read('config.ini')
self.r = redis.Redis(host=self.config.get('redis', 'host'),
port=self.config.get('redis', 'port'),
password=self.config.get('redis', 'pass'), db=6)
self.driver=self.get_webdriver()
def get_webdriver(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument('--headless')
chrome_options.binary_location = self.config.get('selenium', 'binary_location')
executable_path =self.config.get('selenium', 'chrome_driver')
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver
def getRate(self):
rateList=[]
for result1 in result_list1:
currency_name = result1[0]
currency = result1[1]
to_USD = ''
to_CNY = ''
for i in range(len(result_list2)):
result2 = result_list2[i]
# https://qq.ip138.com/hl.asp?from=CNY&to=USD&q=1
url = f'''https://qq.ip138.com/hl.asp?from={currency}&to={result2}&q=1'''
# 等待页面加载完成
try:
self.driver.get(url)
wait = WebDriverWait(self.driver, 300)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(1)
doc_resp = pq(self.driver.page_source)
money = doc_resp('table tr:nth-child(3) td:nth-child(3)').text()
if money == '1':
money_result = money
else:
try:
money_result = round(float(money), 4)
except:
continue
if i == 0:
to_USD = money_result
else:
to_CNY = money_result
except Exception as e:
try:
self.driver.close()
self.driver.quit()
except Exception as e:
print(e)
self.driver=self.get_webdriver()
now = datetime.datetime.now()
now_time = now.strftime('%Y-%m-%d')
if to_USD == '' or to_CNY == '':
continue
result_dict = {
'币种': currency_name,
'币简称': currency,
'对美元': to_USD,
'对人民币': to_CNY,
'更新时间': now_time }
print(result_dict)
rate={
"currencyName": currency_name,
"currencyCode": currency,
"rateToUSD": to_USD,
"rateToCNY": to_CNY,
"createDate": now_time
}
rateList.append(rate)
# market_url = f'http://192.168.1.39:8088/sync/currencyRate'
market_url = f'http://114.115.236.206:8088/sync/currencyRate'
try:
resp = requests.post(market_url,json=rateList)
# 检查响应状态码
if resp.status_code == 200:
print("请求成功")
# 打印响应内容
print(resp.content)
else:
print("请求失败")
except Exception as e:
print(e)
if __name__ == '__main__':
result_list1 = [
[
'人民币',
'CNY'],
[
'美元',
'USD'],
[
'欧元',
'EUR'],
[
'瑞士法郎',
'CHF'],
[
'加元',
'CAD'],
[
'波兰兹罗提',
'PLN'],
[
'英镑',
'GBP'],
[
'澳元',
'AUD'],
[
'泰铢',
'THB'],
[
'沙特里亚尔',
'SAR'],
[
'巴西里亚伊',
'BRL'],
[
'新土耳其新里拉',
'TRY'],
[
'新台币',
'TWD'],
[
'印度卢比',
'INR'],
[
'墨西哥比索',
'MXN'],
[
'日元',
'JPY'],
[
'瑞典克朗',
'SEK'],
[
'韩元',
'KRW'],
[
'俄罗斯卢布',
'RUB'],
[
'新加坡元',
'SGD'],
[
'港币',
'HKD']]
result_list2 = [
'USD',
'CNY']
currenRate=CurrencyRate()
currenRate.getRate()
currenRate.driver.quit()
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import datetime
import xlrd
from selenium.webdriver.support.wait import WebDriverWait
from tqdm import tqdm
import pymongo
import pymysql
import time
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
import json
from requests.packages import urllib3
urllib3.disable_warnings()
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import re
from BaseCore import BaseCore
import configparser
import redis
class Shizhi(object):
def __init__(self):
self.config = configparser.ConfigParser()
# 读取配置文件
self.config.read('config.ini')
baseCore=BaseCore()
self.logger=baseCore.getLogger()
self.r = redis.Redis(host=self.config.get('redis', 'host'),
port=self.config.get('redis', 'port'),
password=self.config.get('redis', 'pass'), db=6)
self.driver=self.get_webdriver()
def get_webdriver(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument('--headless')
chrome_options.binary_location = self.config.get('selenium', 'binary_location')
executable_path =self.config.get('selenium', 'chrome_driver')
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver
def conn11(self):
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
return conn,cursor
def getmarketCap(self):
conn,cursor=self.conn11()
try:
sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4','5','6') """ # and stock_code = "SYNH"
cursor.execute(sql1)
result_data = cursor.fetchall()
except Exception as e:
self.logger.info("数据查询异常!")
return
for data in result_data:
try:
data_list = list(data)
print(data_list)
social_credit_code = data_list[0]
stock = data_list[1]
securities_short_name = data_list[2] if data_list[2] is not None else ""
# content_sql = ''
stock2=str(stock)
if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
stock2=stock2[1:]
# https://finance.yahoo.com/quote/032830.KS?p=032830.KS
url = f'https://finance.yahoo.com/quote/{stock2}?p={stock2}'
try:
self.logger.info(f'正在采集:{url}')
self.driver.get(url)
# 等待页面加载完成
wait = WebDriverWait(self.driver, 300)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(5)
doc_resp = pq(self.driver.page_source)
unit=doc_resp('div[id="quote-header-info"]>div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
currency = unit.text().split("Currency in ")[1]
market_cap=doc_resp('td[data-test="MARKET_CAP-value"]')
marketcap=market_cap.text()
if marketcap and marketcap!='N/A':
# 获取当前时间
current_time = datetime.datetime.now()
currentdate = current_time.strftime("%Y-%m-%d")
print(f'信用代码:{social_credit_code} 股票代码:{stock} 币种:{currency} 市值:{marketcap} 日期:{currentdate}')
# market_url = f'http://192.168.1.39:8088/sync/marketValue'
market_url = f'http://114.115.236.206:8088/sync/marketValue'
param= {
"socialCreditCode": social_credit_code,
"stockCode": stock,
"marketValue": marketcap,
"originalUnit": currency,
"valueTime": currentdate
}
try:
resp = requests.post(market_url,json=param)
# 检查响应状态码
if resp.status_code == 200:
print("请求成功")
# 打印响应内容
print(resp.content)
else:
print("请求失败")
except:
with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
f.write(stock + '\n')
except Exception as e:
self.driver.close()
self.driver.quit()
self.driver=self.get_webdriver()
print(e)
except Exception as e:
print(e)
self.driver.close()
self.driver.quit()
self.driver=self.get_webdriver()
if __name__ == '__main__':
shizhi=Shizhi()
shizhi.getmarketCap()
\ No newline at end of file
# -*- coding: utf-8 -*-
import datetime
import time
import pymysql
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from pyquery import PyQuery as pq
from openpyxl import Workbook
import pandas as pd
class WanfangSpider(object):
def __init__(self):
pass
def req(self,url):
header={
"accept":"*/*",
"connection":"Keep-Alive",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
res = requests.get(url,headers=header)
if res.status_code==200:
text=res.text
print('请求成功!')
else:
text=''
print('请求失败!')
return text
# 将html中的相对地址转换成绝对地址
def paserUrl(self,html,listurl):
soup = BeautifulSoup(html, 'html.parser')
# 获取所有的<a>标签和<img>标签
links = soup.find_all(['a', 'img'])
# 遍历标签,将相对地址转换为绝对地址
for link in links:
if 'href' in link.attrs:
link['href'] = urljoin(listurl, link['href'])
elif 'src' in link.attrs:
link['src'] = urljoin(listurl, link['src'])
return soup
def pageList(self):
listmsg=[]
for num in range(1,73):
url=f'https://kms.wanfangdata.com.cn/IndustryYJ/Search/Cecdb?q=%E5%86%B6%E9%87%91%2B%E5%86%B6%E7%82%BC%20%E6%9C%BA%E6%9E%84%3Acsi&f=Inst.Type&PageNumber={num}'
html=self.req(url)
soup=self.paserUrl(html,url)
text=str(soup.prettify())
doc=pq(text)
liTag=doc('li[class="rt-wrap"]')
# print(liTag)
for li in liTag:
lidoc=pq(li)
title=lidoc('a[class="title"]').text()
turl=lidoc('a[class="title"]').attr('href')
msg={
'title':title,
'turl':turl
}
listmsg.append(msg)
return listmsg
def detailMsg(self,msg):
detailList=[]
turl = msg['turl']
title = msg['title']
html=self.req(turl)
soup=self.paserUrl(html,turl)
dtext=str(soup.prettify())
ddoc=pq(dtext)
a1=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(1)').text().replace(":","")
institutionType=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(2)').text()
a2=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(1)').text().replace(":","")
formerName=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(2)').text()
a3=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(1)').text().replace(":","")
leader=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(2)').text()
a4=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(1)').text().replace(":","")
establishmentDate=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(2)').text()
a5=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(1)').text().replace(":","")
introduction=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(2)').text()
a6=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(1)').text().replace(":","")
classification=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(2)').text()
a7=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(1)').text().replace(":","")
keywords=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(2)').text()
a8=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(1)').text().replace(":","")
researchEquipment=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(2)').text()
a9=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(1)').text().replace(":","")
researchAreas=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(2)').text()
a10=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(1)').text().replace(":","")
awards=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(2)').text()
a11=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(1)').text().replace(":","")
internalDepartments=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(2)').text()
a12=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(1)').text().replace(":","")
subsidiaryInstitutions=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(2)').text()
a13=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(1)').text().replace(":","")
productInformation=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(2)').text()
a14=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(1)').text().replace(":","")
publicationJournals=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(2)').text()
a15=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(1)').text().replace(":","")
mailingAddress=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(2)').text()
a16=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(1)').text().replace(":","")
tel=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(2)').text()
a17=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(1)').text().replace(":","")
faxNumber=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(2)').text()
a18=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(1)').text().replace(":","")
email=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(2)').text()
a19=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(1)').text().replace(":","")
website=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(2)').text()
detailmsg={
'title':title,
a1:institutionType,
a2:formerName,
a3:leader,
a4:establishmentDate,
a5:introduction,
a6:classification,
a7:keywords,
a8:researchEquipment,
a9:researchAreas,
a10:awards,
a11:internalDepartments,
a12:subsidiaryInstitutions,
a13:productInformation,
a14:publicationJournals,
a15:mailingAddress,
a16:tel,
a17:faxNumber,
a18:email,
a19:website
}
detailList.append(detailmsg)
self.writerToExcel(detailList)
def conn144(self):
conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
charset='utf8')
cursor = conn.cursor()
return conn,cursor
def dataToSql(self,detailmsg):
conn,cursor=self.conn144()
try:
# 检查记录是否存在
# 获取当前时间
current_time = datetime.datetime.now()
# 将时间转换为字符串
currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
print('+++++')
finally:
cursor.close()
conn.close()
# 将数据追加到excel
def writerToExcel(self,detailList):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
if __name__ == '__main__':
filename='机构.xlsx'
# # 创建一个工作簿
workbook = Workbook()
workbook.save(filename)
wanfang=WanfangSpider()
lsitmsg=wanfang.pageList()
for msg in lsitmsg:
wanfang.detailMsg(msg)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论