REITs政策法规

88209302 · 薛凌堃 · 049e8a5e · 88209302 · 88209302 · 88209302
--- a/REITs_policyData/policy_chongqing.py
+++ b/REITs_policyData/policy_chongqing.py
@@ -13,8 +13,8 @@ from reits import Policy
 policy = Policy()
-topic = 'policy'
+topic = 'research_center_fourth'
-webname = '重庆市人民政府'
+webname = '重庆市人民政府_'
 headers = {
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
@@ -124,7 +124,10 @@ def getContent(url):
        contentWithTag = soup.find('div', class_='view')
        if not contentWithTag:
            contentWithTag = soup.find('div',class_='document')
+            try:
                contentWithTag.find('div',class_='item').decompose()
+            except:
+                pass
    try:
        scripts = contentWithTag.find_all('script')
        for script in scripts:
@@ -168,14 +171,17 @@ def getData(data_, num):
    content, contentWithTag = getContent(href)
    contentWithTag_str = str(contentWithTag)
    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    lang = baseCore.detect_language(content)
    dic_info = {
        'attachmentIds': [],
+        'subjectId': '1729315113088765953',
+        'lang': lang,
        'author': '',
        'content': content,
        'contentWithTag': contentWithTag_str,
        'deleteFlag': 0,
        'checkStatus': 1,
-        'id': '',
+        'id': '1729315113088765953'+str(int(time.time())),
        'title': title,
        'publishDate': publishDate,
        'origin': origin,

--- a/REITs_policyData/policy_guangdong.py
+++ b/REITs_policyData/policy_guangdong.py
@@ -19,7 +19,7 @@ from reits import Policy
 policy = Policy()
-topic = 'policy'
+topic = 'research_center_fourth'
 webname = '广东省人民政府'
 headers = {
    'Content-Type': 'application/json',
@@ -144,14 +144,17 @@ def getData(data_, num,sid):
    content, contentWithTag, id_list = getContent(href, publishDate, num)
    contentWithTag_str = str(contentWithTag)
    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    lang = baseCore.detect_language(content)
    dic_info = {
        'attachmentIds': id_list,
+        'subjectId': '1729315113088765953',
+        'lang': lang,
        'author': '',
        'content': content,
        'contentWithTag': contentWithTag_str,
        'deleteFlag': 0,
        'checkStatus': 1,
-        'id': '',
+        'id': '1729315113088765953'+str(int(time.time())),
        'title': title,
        'publishDate': publishDate,
        'origin': origin,

--- a/REITs_policyData/policy_jiangxi.py
+++ b/REITs_policyData/policy_jiangxi.py
@@ -15,7 +15,7 @@ from reits import Policy
 policy = Policy()
-topic = 'policy'
+topic = 'research_center_fourth'
 webname = '江西省人民政府'
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
@@ -130,14 +130,17 @@ def doJob():
        num += 1
        contentWithTag_str = str(contentWithTag)
        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        lang = baseCore.detect_language(content)
        dic_info = {
            'attachmentIds': id_list,
+            'subjectId': '1729315113088765953',
+            'lang': lang,
            'author': '',
            'content': content,
            'contentWithTag': contentWithTag_str,
            'deleteFlag': 0,
            'checkStatus': 1,
-            'id': '',
+            'id': '1729315113088765953'+str(int(time.time())),
            'title': title,
            'publishDate': publishDate,
            'origin': origin,
@@ -150,6 +153,8 @@ def doJob():
            'createDate': time_now,
            'sid': '1729043445107838978'
        }
+        # print(dic_info['id'])
+        # print(publishDate)
        try:
            baseCore.sendkafka(dic_info, topic)
            baseCore.r.sadd('REITs::' + webname, href)