提交 5cebbe1d 作者: 薛凌堃

REITs专题脚本维护

上级 2e31675f
import re import re
...@@ -89,6 +89,7 @@ def doJob(): ...@@ -89,6 +89,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -133,6 +134,7 @@ def doJob(): ...@@ -133,6 +134,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': '', 'contentWithTag': '',
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
......
import os import os
...@@ -142,6 +142,7 @@ def doJob(): ...@@ -142,6 +142,7 @@ def doJob():
'content': fjcontent, 'content': fjcontent,
'contentWithTag': '', 'contentWithTag': '',
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -172,6 +173,7 @@ def doJob(): ...@@ -172,6 +173,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': str(contentWithTag), 'contentWithTag': str(contentWithTag),
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
......
import os import os
...@@ -117,6 +117,7 @@ def doJob(): ...@@ -117,6 +117,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': str(contentWithTag), 'contentWithTag': str(contentWithTag),
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
......
import os import os
...@@ -79,6 +79,7 @@ def doJob(): ...@@ -79,6 +79,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': str(contentWithTag), 'contentWithTag': str(contentWithTag),
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
......
import os import os
...@@ -129,8 +129,8 @@ def getFjContent(url): ...@@ -129,8 +129,8 @@ def getFjContent(url):
# 北京市人民政府 https://www.beijing.gov.cn/so/s?siteCode=1100000088&tab=zcfg&qt=REITs # 北京市人民政府 https://www.beijing.gov.cn/so/s?siteCode=1100000088&tab=zcfg&qt=REITs
def beijing(): def beijing():
if not os.path.exists('./相关政策/北京市人民政府/政策文件'): # if not os.path.exists('./相关政策/北京市人民政府/政策文件'):
os.makedirs('./相关政策/北京市人民政府/政策文件') # os.makedirs('./相关政策/北京市人民政府/政策文件')
policy1 = Policy1() policy1 = Policy1()
url = 'https://www.beijing.gov.cn/so/ss/query/s' url = 'https://www.beijing.gov.cn/so/ss/query/s'
payload = { payload = {
...@@ -288,6 +288,7 @@ def beijing(): ...@@ -288,6 +288,7 @@ def beijing():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -299,7 +300,7 @@ def beijing(): ...@@ -299,7 +300,7 @@ def beijing():
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729041207245328385', 'sid': '1729041207245328385'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
...@@ -311,6 +312,6 @@ def beijing(): ...@@ -311,6 +312,6 @@ def beijing():
time.sleep(random.randint(10, 20)) time.sleep(random.randint(10, 20))
num += 1 num += 1
if __name__ == '__main__': # if __name__ == '__main__':
beijing() # beijing()
baseCore.close() # baseCore.close()
import json import json
...@@ -174,6 +174,7 @@ def getData(data_, num): ...@@ -174,6 +174,7 @@ def getData(data_, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -185,7 +186,7 @@ def getData(data_, num): ...@@ -185,7 +186,7 @@ def getData(data_, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729045755020103681', 'sid': '1729045755020103681'
} }
try: try:
......
import time import time
...@@ -148,6 +148,7 @@ def doJob(): ...@@ -148,6 +148,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -159,7 +160,7 @@ def doJob(): ...@@ -159,7 +160,7 @@ def doJob():
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729043067106865154', 'sid': '1729043067106865154'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import datetime import datetime
...@@ -150,6 +150,7 @@ def getData(data_, num,sid): ...@@ -150,6 +150,7 @@ def getData(data_, num,sid):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -161,7 +162,7 @@ def getData(data_, num,sid): ...@@ -161,7 +162,7 @@ def getData(data_, num,sid):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': sid, 'sid': sid
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import json import json
...@@ -123,6 +123,7 @@ def getData(data_, num): ...@@ -123,6 +123,7 @@ def getData(data_, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -134,7 +135,7 @@ def getData(data_, num): ...@@ -134,7 +135,7 @@ def getData(data_, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729045187128119298', 'sid': '1729045187128119298'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -156,6 +156,7 @@ def getData(data_, num): ...@@ -156,6 +156,7 @@ def getData(data_, num):
'content': content, 'content': content,
'contentWithTag': str(contentWithTag), 'contentWithTag': str(contentWithTag),
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -167,7 +168,7 @@ def getData(data_, num): ...@@ -167,7 +168,7 @@ def getData(data_, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729028548502597633', 'sid': '1729028548502597633'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import time import time
...@@ -114,6 +114,7 @@ def getData(div, num): ...@@ -114,6 +114,7 @@ def getData(div, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -125,7 +126,7 @@ def getData(div, num): ...@@ -125,7 +126,7 @@ def getData(div, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729045345312100353', 'sid': '1729045345312100353'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
#coding=utf-8 #coding=utf-8
...@@ -110,6 +110,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type): ...@@ -110,6 +110,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -121,7 +122,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type): ...@@ -121,7 +122,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729042585839841281', 'sid': '1729042585839841281'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -120,6 +120,7 @@ def getData(driver, data_, num): ...@@ -120,6 +120,7 @@ def getData(driver, data_, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -131,7 +132,7 @@ def getData(driver, data_, num): ...@@ -131,7 +132,7 @@ def getData(driver, data_, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary.replace('</em>', '').replace('<em>', ''), 'summary': summary.replace('</em>', '').replace('<em>', ''),
'createDate': time_now, 'createDate': time_now,
'sid': '1729044085724860418', 'sid': '1729044085724860418'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -91,6 +91,7 @@ def getContentA(url, num, publishDate, title, origin, summary): ...@@ -91,6 +91,7 @@ def getContentA(url, num, publishDate, title, origin, summary):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -102,7 +103,7 @@ def getContentA(url, num, publishDate, title, origin, summary): ...@@ -102,7 +103,7 @@ def getContentA(url, num, publishDate, title, origin, summary):
'issuedNumber': '', 'issuedNumber': '',
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729042894974537730', 'sid': '1729042894974537730'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import time import time
...@@ -136,6 +136,7 @@ def doJob(): ...@@ -136,6 +136,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -147,7 +148,7 @@ def doJob(): ...@@ -147,7 +148,7 @@ def doJob():
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729043445107838978', 'sid': '1729043445107838978'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -161,6 +161,7 @@ def getData(num, title, url, origin, publishDate, summary): ...@@ -161,6 +161,7 @@ def getData(num, title, url, origin, publishDate, summary):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -172,7 +173,7 @@ def getData(num, title, url, origin, publishDate, summary): ...@@ -172,7 +173,7 @@ def getData(num, title, url, origin, publishDate, summary):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729042375596158978', 'sid': '1729042375596158978'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import time import time
...@@ -69,6 +69,7 @@ def doJob(): ...@@ -69,6 +69,7 @@ def doJob():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -80,7 +81,7 @@ def doJob(): ...@@ -80,7 +81,7 @@ def doJob():
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729042213737967618', 'sid': '1729042213737967618'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -194,6 +194,7 @@ def getContent(num, data): ...@@ -194,6 +194,7 @@ def getContent(num, data):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -205,7 +206,7 @@ def getContent(num, data): ...@@ -205,7 +206,7 @@ def getContent(num, data):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729041959772860417', 'sid': '1729041959772860417'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import time import time
...@@ -137,6 +137,7 @@ def getData(soup, num): ...@@ -137,6 +137,7 @@ def getData(soup, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -148,7 +149,7 @@ def getData(soup, num): ...@@ -148,7 +149,7 @@ def getData(soup, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729043593615560705', 'sid': '1729043593615560705'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import json import json
...@@ -117,6 +117,7 @@ def getData(data_, driver, num): ...@@ -117,6 +117,7 @@ def getData(data_, driver, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -128,7 +129,7 @@ def getData(data_, driver, num): ...@@ -128,7 +129,7 @@ def getData(data_, driver, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729042751554506754', 'sid': '1729042751554506754'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -136,6 +136,7 @@ def getContent(num, data): ...@@ -136,6 +136,7 @@ def getContent(num, data):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -147,7 +148,7 @@ def getContent(num, data): ...@@ -147,7 +148,7 @@ def getContent(num, data):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729041791539326977', 'sid': '1729041791539326977'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import time import time
...@@ -112,6 +112,7 @@ def getData(data_, num): ...@@ -112,6 +112,7 @@ def getData(data_, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -123,7 +124,7 @@ def getData(data_, num): ...@@ -123,7 +124,7 @@ def getData(data_, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729046053927178241', 'sid': '1729046053927178241'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -143,6 +143,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary): ...@@ -143,6 +143,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': pub_time, 'publishDate': pub_time,
...@@ -154,7 +155,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary): ...@@ -154,7 +155,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729041400674045953', 'sid': '1729041400674045953'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -155,6 +155,7 @@ def getData(div, num): ...@@ -155,6 +155,7 @@ def getData(div, num):
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -166,7 +167,7 @@ def getData(div, num): ...@@ -166,7 +167,7 @@ def getData(div, num):
'issuedNumber': pub_hao, 'issuedNumber': pub_hao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729046848292892673', 'sid': '1729046848292892673'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -508,6 +508,7 @@ def getDatas(page): ...@@ -508,6 +508,7 @@ def getDatas(page):
'content': content, 'content': content,
'contentWithTag': contentWithTag, 'contentWithTag': contentWithTag,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -519,7 +520,7 @@ def getDatas(page): ...@@ -519,7 +520,7 @@ def getDatas(page):
'issuedNumber': '', 'issuedNumber': '',
'summary': '', 'summary': '',
'createDate': time_now, 'createDate': time_now,
'sid': '1730472253306552321', 'sid': '1730472253306552321'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
......
import os import os
...@@ -284,6 +284,7 @@ def reform(): ...@@ -284,6 +284,7 @@ def reform():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate_, 'publishDate': publishDate_,
...@@ -295,7 +296,7 @@ def reform(): ...@@ -295,7 +296,7 @@ def reform():
'issuedNumber': pubHao, 'issuedNumber': pubHao,
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729029275400646658', 'sid': '1729029275400646658'
} }
# DataList.append(dic_info) # DataList.append(dic_info)
try: try:
...@@ -421,6 +422,7 @@ def zhengquanqihuo(): ...@@ -421,6 +422,7 @@ def zhengquanqihuo():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -432,7 +434,7 @@ def zhengquanqihuo(): ...@@ -432,7 +434,7 @@ def zhengquanqihuo():
'summary': '', 'summary': '',
'topicClassification': '', 'topicClassification': '',
'createDate': time_now, 'createDate': time_now,
'sid': '1729030277461815298', 'sid': '1729030277461815298'
} }
try: try:
baseCore.sendkafka(dic_info, topic) baseCore.sendkafka(dic_info, topic)
...@@ -482,7 +484,8 @@ def sse(): ...@@ -482,7 +484,8 @@ def sse():
# if not os.path.exists(path): # if not os.path.exists(path):
# os.makedirs(path) # os.makedirs(path)
for page in range(0, int(total_page)): for page in range(0, int(total_page)):
url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C8685%2C9348%2C12632%2C12768%2C12769%2C12770%2C12771%2C12772%2C12773%2C12774%2C12775%2C12776%2C12777%2C12778%2C12779%2C12780%2C12781%2C12782%2C12783%2C12784%2C12785%2C12786%2C12787%2C12788%2C12789%2C12790%2C12791%2C12792%2C12793%2C12794%2C12795%2C12796%2C12797%2C12798%2C12799%2C12800%2C12801%2C12802%2C12803%2C12804%2C12805%2C12806%2C12807%2C12808%2C12809%2C12810%2C12811%2C12812%2C13061%2C13282%2C13283%2C13284%2C13285%2C13286%2C13287%2C13288%2C13289%2C13294%2C13364%2C13365%2C13366%2C13367%2C14595%2C14596%2C14597%2C14598%2C14599%2C14600%2C14601%2C14602%2C14603%2C14604%2C14605%2C14606&trackId=50619067167713018335655119683810&_=1699508921761' t = int(time.time())
url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C12632&trackId=24278800487459370386559742313666&_={t}'
data = policy.getrequest_json(headers, url_page) data = policy.getrequest_json(headers, url_page)
newslist = data['data']['knowledgeList'] newslist = data['data']['knowledgeList']
# print(newslist) # print(newslist)
...@@ -534,6 +537,7 @@ def sse(): ...@@ -534,6 +537,7 @@ def sse():
'content': content, 'content': content,
'contentWithTag': '', 'contentWithTag': '',
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -545,7 +549,7 @@ def sse(): ...@@ -545,7 +549,7 @@ def sse():
'issuedNumber': '', 'issuedNumber': '',
'summary': summary, 'summary': summary,
'createDate': time_now, 'createDate': time_now,
'sid': '1729035244826374145', 'sid': '1729035244826374145'
} }
# DataList.append(dic_info) # DataList.append(dic_info)
try: try:
...@@ -603,6 +607,7 @@ def sse(): ...@@ -603,6 +607,7 @@ def sse():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title, 'title': title,
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -625,7 +630,7 @@ def sse(): ...@@ -625,7 +630,7 @@ def sse():
except Exception as e: except Exception as e:
log.info(f"error!!!{newsUrl}") log.info(f"error!!!{newsUrl}")
log.info(e) log.info(e)
log.info(f'====第{page}页====处理结束,已采集{num}条数据=================') log.info(f'====第{page}页====处理结束,================')
# 河北省人民政府 # 河北省人民政府
...@@ -636,7 +641,7 @@ def hebei(): ...@@ -636,7 +641,7 @@ def hebei():
num = 0 num = 0
webname = '河北省人民政府' webname = '河北省人民政府'
url = "https://www.hebei.gov.cn/search/pcRender?pageId=b97a38833f7343cebc31dec44544f684" url = "https://www.hebei.gov.cn/search/pcRender?pageId=b97a38833f7343cebc31dec44544f684"
appNames = ['信息公开'] appNames = ['信息公开', '热点专题']
for appName in appNames: for appName in appNames:
payload = {'qAnd': ' ', payload = {'qAnd': ' ',
'qOr': ' ', 'qOr': ' ',
...@@ -820,6 +825,9 @@ def hebei(): ...@@ -820,6 +825,9 @@ def hebei():
'attachmentIds':id_list, 'attachmentIds':id_list,
'author': '', 'author': '',
'content': content, 'content': content,
'checkStatus': 1,
'deleteFlag': 0,
'id': '',
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'title': title.replace('\n', ''), 'title': title.replace('\n', ''),
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -933,6 +941,7 @@ def guizhou(): ...@@ -933,6 +941,7 @@ def guizhou():
'content': content, 'content': content,
'contentWithTag': contentWithTag_str, 'contentWithTag': contentWithTag_str,
'deleteFlag': 0, 'deleteFlag': 0,
'checkStatus': 1,
'id': '', 'id': '',
'title': title.replace('\n', ''), 'title': title.replace('\n', ''),
'publishDate': publishDate, 'publishDate': publishDate,
...@@ -966,7 +975,10 @@ if __name__=="__main__": ...@@ -966,7 +975,10 @@ if __name__=="__main__":
reform() reform()
# shenzhen() # shenzhen()
zhengquanqihuo() zhengquanqihuo()
try:
sse() sse()
except:
pass
hebei() hebei()
guizhou() guizhou()
......
import reits
import reits
import policy_beijing, policy_chongqing, policy_fujian, policy_guangdong
import policy_guangxi, policy_gwy, policy_hainan, policy_heilongjiang, policy_hubei, policy_jiangsu
import policy_jiangxi, policy_jilin, policy_liaoning, policy_neimenggu, policy_shandong, policy_hubei
import policy_shanxi, policy_sichuan, policy_tianjin, policy_yunnan, policy_zhejiang
import RuleGuide_shanghai, RuleGuide_shenzhen
import LawRules_shenzhen, LawRules_2_shenzhen
if __name__ == "__mian__":
policy_beijing.beijing()
reits.sse()
reits.reform()
reits.hebei()
reits.guizhou()
reits.zhengquanqihuo()
policy_chongqing.doJob()
policy_fujian.doJob()
policy_guangdong.doJob()
policy_guangxi.doJob()
policy_gwy.doJob()
policy_hainan.doJob()
policy_heilongjiang.doJob()
policy_hubei.doJob()
policy_jiangsu.doJob()
policy_jiangxi.doJob()
policy_jilin.doJob()
policy_liaoning.doJob()
policy_neimenggu.doJob()
policy_shandong.doJob()
policy_hubei.doJob()
policy_shanxi.doJob()
policy_sichuan.doJob()
policy_tianjin.doJob()
policy_yunnan.doJob()
policy_zhejiang.doJob()
RuleGuide_shanghai.doJob()
RuleGuide_shenzhen.doJob()
LawRules_shenzhen.doJob()
LawRules_2_shenzhen.doJob()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论