# 雅虎财经企业动态获取
import time

import pandas as pd
import pymysql
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By

from selenium import webdriver

from base.BaseCore import BaseCore

baseCore = BaseCore()
log= BaseCore.getLogger()
#获取资讯详情
def getZx(xydm,url,title,cnx):
    start_time_content= time.time()
    try:
        path = r'E:\chromedriver_win32\chromedriver.exe'
        driverContent = baseCore.buildDriver(path)
        driverContent.get(url)
        try:
            clickButton = driverContent.find_element(By.CLASS_NAME,"collapse-button")
            clickButton.click()
        except Exception as e:
            pass
        time.sleep(0.5)

        authorElement = driverContent.find_element(By.CLASS_NAME,"caas-author-byline-collapse")

        timeElement = driverContent.find_element(By.CLASS_NAME,"caas-attr-time-style").find_element(By.TAG_NAME,"time")

        contentElement = driverContent.find_element(By.CLASS_NAME,"caas-body")

        author = authorElement.text.lstrip().strip().replace("'","''")

        pub_time = timeElement.get_attribute("datetime").lstrip().strip().replace("'","''").replace("T"," ")
        pub_time = pub_time[0:19]
        content = contentElement.text.lstrip().strip().replace("'","''")

        driverContent.close()
        # 动态信息列表
        list_info = [
            xydm,
            title,
            '',
            content,
            pub_time,
            url,
            '雅虎财经',
            author,
            '2',
            'zh'
        ]
        with cnx.cursor() as cursor:
            try:
                insert_sql = '''insert into brpa_source_article(social_credit_code,title,summary,content,publish_date,source_address,origin,author,type,lang) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
                cursor.execute(insert_sql, tuple(list_info))
                cnx.commit()

            except Exception as e1:
                log.error("保存数据库失败")
                e1 = str(e1) + '.........保存数据库失败'
                return e1

        log.info(f"文章耗时，耗时{baseCore.getTimeCost(start_time_content,time.time())}")
    except Exception  as e:
        log.error("获取正文失败")
        e = str(e)+'.........获取正文失败'
        return e
    return ''


path = r'E:\chromedriver_win32\chromedriver.exe'
driver = baseCore.buildDriver(path)
cnx = pymysql.connect(host='114.116.44.11', user='root', password='f7s0&7qqtK', db='dbScore', charset='utf8mb4')


# 拖拽30次获取企业新闻
def scroll(driver):
    for i in range(0,30):
        #js = "window.scrollTo(0,document.body.scrollHeight)"
        js = "var q=document.documentElement.scrollTop=100000"
        driver.execute_script(js)
        time.sleep(0.1)




while True:
    # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
    social_code = ''
    # 判断 如果Redis中已经没有数据，则等待
    if social_code == '':
        time.sleep(20)
        continue
    data = baseCore.getInfomation(social_code)
    name = data[1]
    enname = data[5]
    gpdm = data[3]
    xydm = data[2]
    taskType = '企业动态'
    # 获取该企业对应项目的采集次数
    count = data[17]
    start_time = time.time()
    if(gpdm==''):
        log.error(f"{name}--股票代码为空 跳过")
        e = '.......股票代码为空 跳过'
        state = 0
        takeTime = baseCore.getTimeCost(start_time,time.time())
        baseCore.recordLog(xydm,taskType,state,takeTime,'',e)
        continue
    url=f"https://finance.yahoo.com/quote/{gpdm}/press-releases?p={gpdm}"
    driver.get(url)
    scroll(driver)
    try:
        news_div = driver.find_element(By.ID, 'summaryPressStream-0-Stream')
    except Exception as e:
        log.error(f"{name}--{gpdm}--没找到新闻元素")
        e = str(e) + '.......没找到新闻元素'
        state = 0
        takeTime = baseCore.getTimeCost(start_time,time.time())
        baseCore.recordLog(xydm,taskType,state,takeTime,url,e)
        continue
    news_lis =  news_div.find_elements(By.XPATH,"./ul/li")
    log.info(f"{name}--{gpdm}--{len(news_lis)}条信息")
    for i in range(0,len(news_lis)):
        try:
            a_ele= news_lis[i].find_element(By.XPATH,"./div[1]/div[1]/div[2]/h3[1]/a")
        except Exception as e:
            log.error(f"{name}--{gpdm}--{i}----a标签没找到")
            e = str(e) + '.......a标签没找到'
            state = 0
            takeTime = baseCore.getTimeCost(start_time,time.time())
            baseCore.recordLog(xydm,taskType,state,takeTime,url,e)
            continue
        news_url = a_ele.get_attribute("href").lstrip().strip().replace("'","''")
        if(news_url.startswith("https://finance.yahoo.com")):
            pass
        else:
            continue
        #判断url是否已经存在
        with cnx.cursor() as cursor:
            sel_sql = '''select social_credit_code from brpa_source_article where source_address = %s and social_credit_code=%s '''
            cursor.execute(sel_sql, (news_url,xydm))
            selects = cursor.fetchall()
            if selects:
                log.error(f"{name}--{gpdm}--网址已经存在----{news_url}")
                e = '网址已存在'
                state = 0
                takeTime = baseCore.getTimeCost(start_time,time.time())
                baseCore.recordLog(xydm,taskType,state,takeTime,news_url,e)
                continue
        title = a_ele.text.lstrip().strip().replace("'","''")
        e = getZx(xydm,news_url,title,cnx)
        if e == '':
            state = 1
        else:
            state = 0
        takeTime = baseCore.getTimeCost(start_time,time.time())
        baseCore.recordLog(xydm,taskType,state,takeTime,news_url,e)
        log.info(f"{name}--{gpdm}--{i}----{news_url}----------{news_url}")

    log.info(f"{name}--{gpdm}--企业整体，耗时{baseCore.getTimeCost(start_time,time.time())}")

    # 信息采集完成后将该企业的采集次数更新
    runType = 'NewsRunCount'
    count += 1
    baseCore.updateRun(social_code,runType,count)

#释放资源
baseCore.close()