5/10

e694b41b · XveLingKun · 6da55a3e · e694b41b · e694b41b · e694b41b
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -891,6 +891,7 @@ class BaseCore:
                page_size = doc.page_count
                for page in doc.pages():
                    retData['content'] += page.get_text()
        except:
            log = self.getLogger()
            log.error(f'文件损坏')

--- a/base/RedisPPData.py
+++ b/base/RedisPPData.py
@@ -324,7 +324,7 @@ def AnnualEnterprise():
    gn_social_list = [item[0] for item in gn_result]
    print('=======')
    for item in gn_social_list:
-        r.rpush('AnnualEnterprise:gnqy_socialCode', item)
+        r.rpush('AnnualEnterprise:zjh_socialCode', item)
    closeSql(cnx,cursor)
 #企业年报定时任务
@@ -514,7 +514,7 @@ def NQEnterprise():
    for item in nq_social_list:
        #新三板企业财务数据 上市信息 核心人员已采集  企业动态、企业公告未采集 企业公告脚本已开发，企业动态需要每天放入redis
        # r.rpush('NQEnterprise:nq_Ipo', item)
-        r.rpush('NQEnterprise:nq_finance',item)
+        r.rpush('NQEnterprise:nq_finance',  item)
        # r.rpush('NQEnterprise:nq_notice',item)
    closeSql(cnx_, cursor_)
@@ -674,10 +674,11 @@ if __name__ == "__main__":
    # BaseInfoEnterprise()
    # FBS()
    # MengZhi()
-    NQEnterprise()
+    # NQEnterprise()
    # SEC_CIK()
    # dujioashou()
    # omeng()
+    AnnualEnterprise()
    # AnnualEnterpriseUS()
    # NoticeEnterprise_task()
    # AnnualEnterprise_task()

--- a/comData/Tyc/CorePerson.py
+++ b/comData/Tyc/CorePerson.py
@@ -88,6 +88,12 @@ def doJob():
            'version': 'TYC-Web'
        }
        cookies_list, id_cookie, user_name = token.get_cookies()
+        if cookies_list:
+            pass
+        else:
+            log.info("没有账号了,等待30分钟")
+            time.sleep(30 * 60)
+            return '', '', ''
        log.info(f'=====当前使用的是{user_name}的cookie======')
        cookies = {}
        for cookie in cookies_list:
@@ -97,7 +103,7 @@ def doJob():
        # 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
        # social_code = baseCore.redicPullData('CorPersonEnterprise:gnqy_socialCode')
        # 判断 如果Redis中已经没有数据，则等待
-        social_code = '91110108780992804C'
+        social_code = '91370212MA3MJMA0XW'
        if social_code == None:
            time.sleep(20)
            continue

--- a/comData/Tyc/get_tyc_cookies.py
+++ b/comData/Tyc/get_tyc_cookies.py
@@ -26,7 +26,7 @@ if __name__ == "__main__":
    name = input('所属用户:')
    driver = create_driver()
    driver.get(url)
-    time.sleep(80)
+    time.sleep(60)
    cookies = driver.get_cookies()
    # print(driver.get_cookies())

--- a/comData/policylaw/gwyRelevantDocuments.bat
+++ b/comData/policylaw/gwyRelevantDocuments.bat
+title gwyRelevantDocuments
+chcp 65001
+cd /d %~dp0
+python38 gwyRelevantDocuments.py
\ No newline at end of file
--- a/comData/policylaw/gwyfile.bat
+++ b/comData/policylaw/gwyfile.bat
+title gwyfile
+chcp 65001
+cd /d %~dp0
+python38 gwyfile.py
\ No newline at end of file
--- a/comData/policylaw/gwyparts.bat
+++ b/comData/policylaw/gwyparts.bat
+title gwyparts
+chcp 65001
+cd /d %~dp0
+python38 gwyparts.py
--- a/comData/weixin_solo/get_fail_link.py
+++ b/comData/weixin_solo/get_fail_link.py
+import redis
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+cnx = baseCore.cnx
+cursor = baseCore.cursor
+r = redis.Redis(host="114.116.90.53", port=6380, password='clbzzsn', db=6)
+def query():
+    sql = "select id from wx_link where state = '300' order by publish_time desc"
+    cursor.execute(sql)
+    result = cursor.fetchall()
+    return result
+if __name__ == "__main__":
+    result = query()
+    # result = ['1990264','1990265','1998085','1998086','2039312','2067942','2087699','2087700','2087701','2087774','2087775','2087776','2087777','2088091','2088092','2088093','2088445','2088446','2088447','2088455','2121977','2385237','2385238','2386227','2678376','2678377','2678421','2678422','2678425','2731944','2731945','2731946','2732184','2732185','2732205','2732206','2732313','2732314','2732317','2732318','2732319','2732320','2732321','2732323','2732438','2732439','2732440','2732453','2732455','2732456','2732483','2732497','2732910','2732911','2732912','2732913','2732915','2732918','2732952','2732953','2732958','2732959','2732960','2733052','2733053','2733097','2733100','2733101','2733120','2733121','2733123','2733124','2733127','2733128','2733130','2733146','2733147','2733148','2733149','2733150','2733151','2733152','2733153','2733154','2733155','2733156','2733157','2733328','2733345','2733346','2733515','2733518','2733519','2733534','2733536','2733537','2733565','2733566','2733595','2733596','2733598','2733627','2733703','2733705','2733706','2733814','2733958','2733959','2733960','2734035','2734062','2734113','2734180','2734182','2734270','2734271','2734272','2734273','2734274','2734275','2734276','2734307','2734308','2734311','2734312','2734313','2734314','2734315','2734316','2734317','2734324','2734325','2734326','2734328','2734329','2734330','2734339','2734340','2734341','2734388','2734389','2734536','2734537','2734538','2735181','2735182','2735183','2735184','2735185','2735186','2735187','2735188','2735190','2735191','2735194','2735196','2735266','2735267','2735268','2735269','2735270','2735271','2735272','2735276','2735277','2735278','2735279','2735280','2735281','2735282','2735283','2735297','2735561','2735625','2735627','2735628','2735662','2735663','2736211','2736212','2736213','2736214','2736215','2736216','2736544','2736545','2736546','2736559','2736677','2736678','2736817','2736819','2736820','2736821','2736823','2736824','2736825','2736828','2736905','2736906','2736907','2736912','2736913','2736914','2736915','2736916','2736917','2736963','2736964','2736988','2736989','2736990','2736991','2737108','2737111','2737600','2737601','2737604','2737701','2737702','2737703','2737759','2737928','2737930','2737931','2738046','2738050','2738051','2738052','2738053','2738356','2738357','2738358','2738456','2738460','2738461','2738485','2738486','2738607','2738608','2738609','2739613','2739614','2739615','2739649','2739650','2739651','2739908','2739909','2739910','2739911','2739912','2739913','2740019','2740022','2740023','2740123','2740207','2740208','2740209','2740252','2740255','2740256','2740269','2740270','2740271','2740412','2740413','2740485','2740486','2740487','2740535','2740536','2740537','2740538','2740539','2740540','2740541','2740542','2740543','2740544','2740545','2740546','2740547','2740548','2740549','2740659','2740660','2740661','2740662','2740663','2740664','2740924','2740926','2740927','2740964','2740965','2740966','2741091','2741092','2741093','2741098','2741099','2741100','2741129','2741130','2741131','2744702','2744703','2744704','2744705','2759363','2759364','2759365','2759546','2759547','2759548','2759549','2759550','2759551','2759552','2759553','2759554','2759555','2759556','2759800','2759801','2759802','2759803','2759805','2759806','2759829','2760062','2760063','2760064','2760729','2760730','2760733','2760899','2760900','2760902','2760903','2760904','2760905','2761327','2761328','2761332','2761783','2761784','2761785','2761795','2761797','2761799','2761805','2761819','2761820','2761821','2761822','2761891','2761892','2761893','2761894','2761895','2761896','2761897','2761900','2762070','2762071','2762072','2762073','2762074','2762075','2762076','2762077','2762078','2762079','2762080','2762081','2762082','2762083','2762084','2762085','2762087','2762088','2762089','2762090','2762091','2762092','2762093','2762125','2762126','2762127','2762137','2762138','2762139','2762160','2762161','2762162','2762195','2762196','2762197','2762410','2762411','2762419','2762470','2762471','2762472','2762873','2762875','2762877','2762930','2762931','2762937','2762938','2762939','2762940','2762941','2763276','2763277','2763278','2763304','2763305','2763307','2763308','2763309','2763310','2763312','2763697','2763698','2763699','2763700','2763701','2763702','2763703','2764035','2764036','2764037','2764039','2764040','2764041','2764042','2764043','2764044','2764045','2764046','2764047','2764048','2764049','2764050','2764051','2764055','2764056','2764057','2764059','2764060','2764062','2764063','2764064','2764065','2764164','2764165','2764369','2764370','2764567','2764568','2764570','2764618','2764619','2764620','2764744','2764745','2764748','2764770','2764771','2764772','2764869','2764870','2764871','2764898','2764899','2764900','2764901','2764902','2764903','2764904','2764905','2764906','2764907','2764908','2764909','2764910','2764911','2764912','2764913','2764914','2764915','2764916','2764917','2764918','2764933','2764934','2764935','2764936','2764937','2764938','2764939','2764957','2764958','2764959','2764960','2764961','2764963','2764964','2764965','2764966','2765020','2765021','2765022','2765023','2765024','2765026','2765229','2765230','2765231','2765232','2765233','2765293','2765294','2765295','2765296','2765297','2765298','2765299','2765300','2765301','2765302','2765303','2765304','2765305','2765306','2765307','2765308','2765414','2765416','2765424','2765571','2765572','2765573','2765796','2765797','2765798','2765804','2765805','2765807','2765808','2765809','2765810','2765811','2765812','2765813','2765814','2765815','2765816','2765820','2765821','2765822','2766021','2766022','2766023','2766024','2766025','2766048','2766060','2766061','2766062','2766063','2766064','2766066','2766068','2766069','2766071','2766072','2766073','2766074','2766075','2766169','2766194','2766195','2766197','2766208','2766209','2766244','2766245','2766246','2766536','2766537','2766538','2766539','2766540','2766547','2766669','2766670','2766671','2766673','2766674','2766675','2766676','2766677','2766678','2766679','2766680','2766681','2766682','2766790','2766792','2766826','2766827','2767032','2767120','2767121','2767122','2767123','2767126','2767127','2767128','2767129','2767130','2767131','2767132','2767133','2767134','2767135','2767136','2767137','2767138','2767139','2767173','2767174','2767408','2767409','2767410','2767411','2767502','2767503','2767534','2767535','2767545','2767546','2767547','2767548','2767600','2767602','2767642','2767643','2767655','2767656','2767717','2767718','2767719','2767720','2767732','2767740','2767741','2767756','2767758','2767766','2767767','2767807','2767808','2767809','2767810','2767817','2767818','2767825','2767827','2767828','2767829','2767840','2767887','2767898','2767899','2767900','2767901','2767902','2767903','2767906','2767907','2767908','2767955','2768155','2768156','2768166','2768167','2768168','2768170','2768284','2768360','2768368','2768378','2768826','2768827','2768845','2768846','2768847','2768848','2768849','2768850','2768851','2768852','2768871','2768872','2768877','2768878','2768879','2768880','2768888','2768912','2768913','2768914','2768916','2768917','2768918','2768919','2768920','2768921','2768922','2768923','2768924','2769071','2769075','2769240','2769258','2769270','2769271','2769272','2769399','2769400','2769427','2769428','2769429','2769430','2769489','2769491','2769542','2769543','2769568','2769569','2770721','2770722','2770723','2770724','2770725','2770726','2770728','2770729','2770730','2770731','2770732','2770733','2770734','2770738','2770739','2770793','2770796','2770903','2770904','2770906','2770907','2770909','2770977','2770978','2771278','2771280','2771661','2771662','2771929','2771932','2772086','2772087','2772088','2772089','2772090','2772130','2772132','2772319','2772320','2772409','2772410','2772423','2772424','2772425','2772426','2772630','2772632','2772721','2772723','2772724','2772725','2772737','2772738','2772749','2772750','2772751','2772752','2772753','2773224','2773227','2773253','2773254','2773287','2773289','2773346','2773347','2773348','2773349','2773350','2773351','2773385','2773386','2773387','2773388','2773558','2773559','2773563','2773564','2773565','2773566','2773567','2773568','2773777','2773778','2773805','2773806','2773808','2773834','2773836','2773837','2773838','2773839','2773841','2774222','2774223','2774302','2774303','2774319','2774320','2774334','2774335','2774440','2774512','2774513','2774527','2774528','2774529','2774531','2774660','2774807','2774830','2774847','2775022','2775023','2775164','2775165','2775350','2775351','2775386','2775387','2775482','2775483','2775677','2775680','2776678','2776681','2776869','2776872','2776873','2776881','2776882','2776883','2776884','2776885','2776993','2776994','2776996','2776997','2777106','2777115','2777123','2777124','2777125','2777126','2777127','2777128','2777129','2777130','2777131','2777132','2777237','2777240','2777814','2777815','2777817','2777819','2777820','2777821','2777822','2777823','2777824','2777825','2777826','2777827','2777828','2777829','2777830','2778099','2778100','2778267','2778268','2778484','2778486','2778601','2778603','2778707','2778708','2779029','2779030','2779031','2779032','2779033','2779034','2779035','2779047','2779048','2779253','2779369','2779371','2779528','2779529','2779530','2779533','2779591','2779592','2779790','2779791','2780196','2780501','2780504']
+    for id in result:
+        # 放入redis
+        r.rpush("WeiXinGZH:linkid_fail", id[0])
+        # r.rpush("WeiXinGZH:linkid_fail", id)
\ No newline at end of file
--- a/comData/weixin_solo/get_fail_list.py
+++ b/comData/weixin_solo/get_fail_list.py
+"""获取每天失败的列表--返回给数据组"""
+import datetime
+import time
+import pandas as pd
+import smtplib
+from email.header import Header
+from email.mime.application import MIMEApplication
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import pymysql
+from base.BaseCore import BaseCore
+baseCore = BaseCore()
+log = baseCore.getLogger()
+# cnx = baseCore.cnx
+# cursor = baseCore.cursor
+def sendEmail(file_name, receiver, filename):
+    file = open(file_name, 'rb').read()
+    # 发送邮箱地址
+    sender = '1195236739@qq.com'
+    # 发送邮箱登录 账户 密码
+    username = '1195236739@qq.com'
+    password = 'gatvszshadvpgjci'
+    smtpserver = 'smtp.qq.com'
+    # # 接收邮箱地址
+    # receiver = 'fujunxue@ciglobal.cn'
+    maile_title = filename
+    message = MIMEMultipart()
+    message['From'] = sender
+    message['To'] = receiver
+    message['Subject'] = Header(maile_title, 'utf-8')
+    message.attach(MIMEText(filename, 'plain', 'utf-8'))
+    xlsxApart = MIMEApplication(file)
+    xlsxApart.add_header('Content-Disposition', 'attachment', filename=filename)
+    message.attach(xlsxApart)
+    smtpObj = smtplib.SMTP_SSL(smtpserver)  # 注意：如果遇到发送失败的情况（提示远程主机拒接连接），这里要使用SMTP_SSL方法
+    smtpObj.connect(smtpserver, port=465)
+    smtpObj.login(username, password)
+    smtpObj.sendmail(sender, receiver, message.as_string())
+    print("邮件发送成功！！！")
+    smtpObj.quit()
+# 解析失败
+def get_failed_list(today_time):
+    cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                          charset='utf8mb4')
+    cursor = cnx.cursor()
+    sql = f"select * from wx_link where (state = '300' or state = '600' and state = '200') and create_time >= '{today_time}' "
+    # sql = f"select * from wx_link where state='800' "
+    print(sql)
+    cursor.execute(sql)
+    result = cursor.fetchall()
+    cursor.close()
+    cnx.close()
+    return result
+# 发布内容不存在
+def get_null_list(today_time):
+    cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                          charset='utf8mb4')
+    cursor = cnx.cursor()
+    sql = f"select * from wx_link where (state='800' or state='500') and create_time >= '{today_time}' "
+    cursor.execute(sql)
+    result = cursor.fetchall()
+    cursor.close()
+    cnx.close()
+    return result
+def get_info(result):
+    fail_list = []
+    for info in result:
+        site_name = info[3]  # 公众号
+        info_source_code = info[4]
+        title = info[5]
+        publish_time = info[6]
+        link = info[7]
+        # 写入detaframe
+        # 创建一个字典，其中包含当前行的数据
+        row = {
+            '公众号': site_name,
+            '公众号编码': info_source_code,
+            '标题': title,
+            '发布时间': publish_time,
+            '链接': link
+        }
+        fail_list.append(row)
+    return fail_list
+if __name__ == "__main__":
+    # 创建一个空的DataFrame，其中包含你需要的列名
+    while True:
+        # 计算今天的时间
+        now = datetime.datetime.now()
+        print(now)
+        time.sleep(1)
+        print(datetime.datetime.now().replace(hour=23, minute=59, second=59, microsecond=0))
+        if now >= datetime.datetime.now().replace(hour=23, minute=59, second=59, microsecond=0):
+            pass
+        else:
+            continue
+        today_time = str(now.strftime("%Y-%m-%d 00:00:00"))
+        print(today_time)
+        result = get_failed_list(today_time)
+        result_null = get_null_list(today_time)
+        if result:
+            fail_list = get_info(result)
+            result_df = pd.DataFrame(fail_list)
+            result_df.to_excel(f"./database/{today_time[:10]}_微信公众号采集失败列表.xlsx", index=False)
+            sendEmail(f"./database/{today_time[:10]}_微信公众号采集失败列表.xlsx", "fujunxue@ciglobal.cn", "微信公众号采集失败列表")
+            sendEmail(f"./database/{today_time[:10]}_微信公众号采集失败列表.xlsx", "mr@ciglobal.cn", "微信公众号采集失败列表")
+        else:
+            log.info(f'{today_time} 没有采集失败的文章')
+        if result_null:
+            fail_list = get_info(result)
+            null_list = get_info(result_null)
+            null_df = pd.DataFrame(fail_list)
+            null_df.to_excel(f"./database/{today_time[:10]}_微信公众号文章内容为空列表.xlsx", index=False)
+            sendEmail(f"./database/{today_time[:10]}_微信公众号文章内容为空列表.xlsx", "fujunxue@ciglobal.cn", "微信公众号文章内容为空列表")
+            sendEmail(f"./database/{today_time[:10]}_微信公众号文章内容为空列表.xlsx", "mr@ciglobal.cn", "微信公众号文章内容为空列表")
+        else:
+            log.info(f'{today_time} 没有采集到空的文章')
--- a/comData/weixin_solo/oneWeixin2.py
+++ b/comData/weixin_solo/oneWeixin2.py
 # -*- coding: utf-8 -*-
 '''
-成功100 发送数据失败200  请求失败400  文章内容为空500  处理style标签失败700
+成功100 发送数据失败200  请求失败400  文章内容为空500  处理style标签失败700  发布内容不存在800 图片处理失败300、600
 '''
 import re
@@ -118,6 +118,10 @@ def get_info(dict_json, linkid):
        #     updatewxLink(url_news, info_source_code, 400)
        return False
    soup_news = BeautifulSoup(res_news.content, 'html.parser')
+    if '此内容发送失败无法查看' in soup_news.text or '该页面不存在' in soup_news.text or '该内容已被发布者删除' in soup_news.text or '此内容因违规无法查看' in soup_news.text:
+        log.info(f'--errorCode:800--{origin}---{news_date}---{news_title}----内容无法查看')
+        updatewxLink(url_news, info_source_code, 800)
+        return False
    try:
        news_html = soup_news.find('div', {'id': 'js_content'})
        news_html['style'] = 'width: 814px ; margin: 0 auto;'
@@ -228,7 +232,7 @@ def get_info(dict_json, linkid):
    }
    for nnn in range(0, 3):
        try:
-            producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+            producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 7, 0))
            kafka_result = producer.send("crawlerInfo", json.dumps(dic_info, ensure_ascii=False).encode('utf8'))
            kafka_time_out = kafka_result.get(timeout=10)
            # add_url(sid, url_news)
@@ -252,7 +256,7 @@ def get_info(dict_json, linkid):
    }
    for nnn2 in range(0, 3):
        try:
-            producer2 = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
+            producer2 = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2,7,0))
            kafka_result2 = producer2.send("collectionAndDispatcherInfo",
                                           json.dumps(dic_info2, ensure_ascii=False).encode('utf8'))
            break

--- a/comData/weixin_solo/test1.py
+++ b/comData/weixin_solo/test1.py
-import time
+import re
-import pandas as pd
+import requests
+from bs4 import BeautifulSoup
-# def writeaa():
+from retry import retry
-#     detailList=[]
-#     aa={
-#         'id':3,
-#         'name':'qqqwe'
-#     }
-#     detailList.append(aa)
-#     writerToExcel(detailList)
-# 将数据追加到excel
-# def writerToExcel(detailList):
-#     # filename='baidu搜索.xlsx'
-#     # 读取已存在的xlsx文件
-#     existing_data = pd.read_excel(filename,engine='openpyxl')
-#     # 创建新的数据
-#     new_data = pd.DataFrame(data=detailList)
-#     # 将新数据添加到现有数据的末尾
-#     combined_data = existing_data.append(new_data, ignore_index=True)
-#     # 将结果写入到xlsx文件
-#     combined_data.to_excel(filename, index=False)
-#
-# from openpyxl import Workbook
-#
-# if __name__ == '__main__':
-#     filename='test1.xlsx'
-#     # # 创建一个工作簿
-#     workbook = Workbook(filename)
-#     workbook.save(filename)
-#     writeaa()
-# gpdm = '01109.HK'
-# if 'HK' in str(gpdm):
-#     tmp_g = str(gpdm).split('.')[0]
-#     if len(tmp_g) == 5:
-#         gpdm = str(gpdm)[1:]
-#         print(gpdm)
-#     else:
-#         pass
 from base.BaseCore import BaseCore
 baseCore = BaseCore()
-r = baseCore.r
+@retry(tries=3,delay=2)
+def getrequest(url_news):
-# #自增并设置过期时间
-# while True:
+    ip = baseCore.get_proxy()
-#     key = 'mykey'
+    res_news = requests.get(url_news, proxies=ip, timeout=20)
-#     expiration_time = 60  # 设置过期时间 60秒
+    if res_news.status_code != 200:
-#     #设置自增
+        raise
-#     r.incr(key)
+    return res_news
-#
-#
-#
+def rm_style_attr(soup):
-#     value = int(r.get(key).decode())
+    # 查找所有含有style属性的标签
-#
+    style_tags = soup.find_all(style=True)
-#     if value > 10:
+    # 遍历每个style标签
-#         print(value)
+    for style_tag in style_tags:
-#         # 设置过期时间
+        try:
-#         r.expire(key, expiration_time)
+            # 使用正则表达式替换
-#
+            styleattr = style_tag['style']
-#         time.sleep(70)
+            styleattr = re.sub(r'visibility:(?s).{1,}?;', '', styleattr)
-#         print('------------------')
+            styleattr = re.sub(r'font-family:(?s).{1,}?;', '', styleattr)
-#         continue
+            styleattr = re.sub(r'color:(?s).{1,}?;', '', styleattr)
-#     # print(value)
+            styleattr = re.sub(r'font-size:(?s).{1,}?;', '', styleattr)
-#
+            style_tag['style'] = styleattr
-#     print("==========")
+        except:
-# expiration_time = 60
+            continue
-# # 创建PubSub对象
-# p = r.pubsub()
+    # first_div = soup.select('div[id="js_content"]')
-#
+    # # 设置style属性
-# # 订阅过期事件
+    # first_div['style'] = 'width: 814px ; margin: 0 auto;'
-# p.psubscribe('__keyevent@6__:expired')
-# aa = p.listen()
+    first_div = soup.select('div[id="js_content"]')
-# # 监听过期事件
+    if first_div:
-# for message in p.listen():
+        first_div = first_div[0]  # 获取第一个匹配的元素
-#     if message['type'] == 'pmessage':
+        first_div['style'] = 'width: 814px ; margin: 0 auto;'  # 设置style属性
-#         expired_key = message['data'].decode()
-#         print('过期的key:', expired_key)
+    return soup
-#         if expired_key == 'counter':
-#             # 执行重置操作
-#             r.set('counter', 0)
+if __name__ == "__main__":
-#             print('计数器已重置为0')
+    # url_news = "http://mp.weixin.qq.com/s?__biz=MjM5NDMxOTMwNg==&mid=2653175413&idx=1&sn=8c0853ddab6e27799c4452e0b6e63156&chksm=bd5900d08a2e89c698de51f102b7423b33a27522966ca2218ca1b8ef290837b0087173c74bcb#rd"
-#             # 设置自增
+    url_news = "http://mp.weixin.qq.com/s?__biz=MzU4ODQwNTIxMw==&mid=2247528290&idx=4&sn=370655b44dfd31b99984e2eeeb4868e0&chksm=fddf6fd0caa8e6c63a0b5e4fece250415fcb56f03f305450b1434978769b443eaa416342326e#rd"
-#             r.incr('counter')
+    # 修改请求方法,retry 3次
-#             # 设置过期时间
+    try:
-#             r.expire('counter', expiration_time)
+        res_news = getrequest(url_news)
+        # print(res_news)
-for i in range(0, 24, 5):
+    except:
-    print(i)
+        try:
\ No newline at end of file
+            res_news = requests.get(url_news, timeout=20)
+            print('请求成功')
+        except:
+            res_news = None
+            pass
+    soup_news = BeautifulSoup(res_news.content, 'html.parser')
+    if '此内容发送失败无法查看' in soup_news.text or '该页面不存在' in soup_news.text or '该内容已被发布者删除' in soup_news.text or '此内容因违规无法查看' in soup_news.text:
+        print('失败')
+    try:
+        news_html = soup_news.find('div', {'id': 'js_content'})
+        news_html['style'] = 'width: 814px ; margin: 0 auto;'
+        #del news_html['style']
+        news_html = rm_style_attr(news_html)
+        # print(news_html)
+        del news_html['id']
+        del news_html['class']
+    except Exception as e:
+        print(e)
+        news_html = None
+    # print(news_html)
+    news_content = news_html.text
+    list_img = news_html.find_all('img')
+    for num_img in range(len(list_img)):
+        img_one = list_img[num_img]
+        url_src = img_one.get('data-src')
+        # print(url_src)
+        if url_src and 'gif' in url_src:
+            url_img = ''
+            img_one.extract()
+        else:
+            try:
+                try:
+                    name_img = url_src.split('/')[-2] + '.' + url_src.split('wx_fmt=')[1]
+                except:
+                    img_one.extract()
+                    continue
+                try:
+                    res = requests.get(url_src, timeout=20)
+                except:
+                    img_one.extract()
+                    continue
+            except Exception as e:
+                print(f'--error--{url_news}-----------{e}')
+    for tag in news_html.descendants:
+        try:
+            del tag['style']
+        except:
+            pass
+    list_section = news_html.find_all('section')
+    for section in list_section:
+        section.name = 'div'
+    print(news_html)
\ No newline at end of file
--- a/test.py
+++ b/test.py
--- a/test/test.py
+++ b/test/test.py