提交 5cebbe1d 作者: 薛凌堃

REITs专题脚本维护

上级 2e31675f
import re
import re
......@@ -89,6 +89,7 @@ def doJob():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -133,6 +134,7 @@ def doJob():
'content': content,
'contentWithTag': '',
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......
import os
import os
......@@ -142,6 +142,7 @@ def doJob():
'content': fjcontent,
'contentWithTag': '',
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -172,6 +173,7 @@ def doJob():
'content': content,
'contentWithTag': str(contentWithTag),
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......
import os
import os
......@@ -117,6 +117,7 @@ def doJob():
'content': content,
'contentWithTag': str(contentWithTag),
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......
import os
import os
......@@ -79,6 +79,7 @@ def doJob():
'content': content,
'contentWithTag': str(contentWithTag),
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......
import os
import os
......@@ -129,8 +129,8 @@ def getFjContent(url):
# 北京市人民政府 https://www.beijing.gov.cn/so/s?siteCode=1100000088&tab=zcfg&qt=REITs
def beijing():
if not os.path.exists('./相关政策/北京市人民政府/政策文件'):
os.makedirs('./相关政策/北京市人民政府/政策文件')
# if not os.path.exists('./相关政策/北京市人民政府/政策文件'):
# os.makedirs('./相关政策/北京市人民政府/政策文件')
policy1 = Policy1()
url = 'https://www.beijing.gov.cn/so/ss/query/s'
payload = {
......@@ -288,6 +288,7 @@ def beijing():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -299,7 +300,7 @@ def beijing():
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729041207245328385',
'sid': '1729041207245328385'
}
try:
baseCore.sendkafka(dic_info, topic)
......@@ -311,6 +312,6 @@ def beijing():
time.sleep(random.randint(10, 20))
num += 1
if __name__ == '__main__':
beijing()
baseCore.close()
# if __name__ == '__main__':
# beijing()
# baseCore.close()
import json
import json
......@@ -174,6 +174,7 @@ def getData(data_, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -185,7 +186,7 @@ def getData(data_, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729045755020103681',
'sid': '1729045755020103681'
}
try:
......
import time
import time
......@@ -148,6 +148,7 @@ def doJob():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -159,7 +160,7 @@ def doJob():
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729043067106865154',
'sid': '1729043067106865154'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import datetime
import datetime
......@@ -150,6 +150,7 @@ def getData(data_, num,sid):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -161,7 +162,7 @@ def getData(data_, num,sid):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': sid,
'sid': sid
}
try:
baseCore.sendkafka(dic_info, topic)
......
import json
import json
......@@ -123,6 +123,7 @@ def getData(data_, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -134,7 +135,7 @@ def getData(data_, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729045187128119298',
'sid': '1729045187128119298'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -156,6 +156,7 @@ def getData(data_, num):
'content': content,
'contentWithTag': str(contentWithTag),
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -167,7 +168,7 @@ def getData(data_, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729028548502597633',
'sid': '1729028548502597633'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import time
import time
......@@ -114,6 +114,7 @@ def getData(div, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -125,7 +126,7 @@ def getData(div, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729045345312100353',
'sid': '1729045345312100353'
}
try:
baseCore.sendkafka(dic_info, topic)
......
#coding=utf-8
#coding=utf-8
......@@ -110,6 +110,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -121,7 +122,7 @@ def getContent(num, title, publishDate, summary, id, pub_hao, organ,type):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729042585839841281',
'sid': '1729042585839841281'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -120,6 +120,7 @@ def getData(driver, data_, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -131,7 +132,7 @@ def getData(driver, data_, num):
'issuedNumber': pub_hao,
'summary': summary.replace('</em>', '').replace('<em>', ''),
'createDate': time_now,
'sid': '1729044085724860418',
'sid': '1729044085724860418'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -91,6 +91,7 @@ def getContentA(url, num, publishDate, title, origin, summary):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -102,7 +103,7 @@ def getContentA(url, num, publishDate, title, origin, summary):
'issuedNumber': '',
'summary': summary,
'createDate': time_now,
'sid': '1729042894974537730',
'sid': '1729042894974537730'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import time
import time
......@@ -136,6 +136,7 @@ def doJob():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -147,7 +148,7 @@ def doJob():
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729043445107838978',
'sid': '1729043445107838978'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -161,6 +161,7 @@ def getData(num, title, url, origin, publishDate, summary):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -172,7 +173,7 @@ def getData(num, title, url, origin, publishDate, summary):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729042375596158978',
'sid': '1729042375596158978'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import time
import time
......@@ -69,6 +69,7 @@ def doJob():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -80,7 +81,7 @@ def doJob():
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729042213737967618',
'sid': '1729042213737967618'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -194,6 +194,7 @@ def getContent(num, data):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -205,7 +206,7 @@ def getContent(num, data):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729041959772860417',
'sid': '1729041959772860417'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import time
import time
......@@ -137,6 +137,7 @@ def getData(soup, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -148,7 +149,7 @@ def getData(soup, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729043593615560705',
'sid': '1729043593615560705'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import json
import json
......@@ -117,6 +117,7 @@ def getData(data_, driver, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -128,7 +129,7 @@ def getData(data_, driver, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729042751554506754',
'sid': '1729042751554506754'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -136,6 +136,7 @@ def getContent(num, data):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -147,7 +148,7 @@ def getContent(num, data):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729041791539326977',
'sid': '1729041791539326977'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import time
import time
......@@ -112,6 +112,7 @@ def getData(data_, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -123,7 +124,7 @@ def getData(data_, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729046053927178241',
'sid': '1729046053927178241'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -143,6 +143,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': pub_time,
......@@ -154,7 +155,7 @@ def getContent(num, title, pub_time, origin, organ, url, pub_hao, summary):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729041400674045953',
'sid': '1729041400674045953'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -155,6 +155,7 @@ def getData(div, num):
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -166,7 +167,7 @@ def getData(div, num):
'issuedNumber': pub_hao,
'summary': summary,
'createDate': time_now,
'sid': '1729046848292892673',
'sid': '1729046848292892673'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -508,6 +508,7 @@ def getDatas(page):
'content': content,
'contentWithTag': contentWithTag,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -519,7 +520,7 @@ def getDatas(page):
'issuedNumber': '',
'summary': '',
'createDate': time_now,
'sid': '1730472253306552321',
'sid': '1730472253306552321'
}
try:
baseCore.sendkafka(dic_info, topic)
......
import os
import os
......@@ -284,6 +284,7 @@ def reform():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate_,
......@@ -295,7 +296,7 @@ def reform():
'issuedNumber': pubHao,
'summary': summary,
'createDate': time_now,
'sid': '1729029275400646658',
'sid': '1729029275400646658'
}
# DataList.append(dic_info)
try:
......@@ -421,6 +422,7 @@ def zhengquanqihuo():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -432,7 +434,7 @@ def zhengquanqihuo():
'summary': '',
'topicClassification': '',
'createDate': time_now,
'sid': '1729030277461815298',
'sid': '1729030277461815298'
}
try:
baseCore.sendkafka(dic_info, topic)
......@@ -482,7 +484,8 @@ def sse():
# if not os.path.exists(path):
# os.makedirs(path)
for page in range(0, int(total_page)):
url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C8685%2C9348%2C12632%2C12768%2C12769%2C12770%2C12771%2C12772%2C12773%2C12774%2C12775%2C12776%2C12777%2C12778%2C12779%2C12780%2C12781%2C12782%2C12783%2C12784%2C12785%2C12786%2C12787%2C12788%2C12789%2C12790%2C12791%2C12792%2C12793%2C12794%2C12795%2C12796%2C12797%2C12798%2C12799%2C12800%2C12801%2C12802%2C12803%2C12804%2C12805%2C12806%2C12807%2C12808%2C12809%2C12810%2C12811%2C12812%2C13061%2C13282%2C13283%2C13284%2C13285%2C13286%2C13287%2C13288%2C13289%2C13294%2C13364%2C13365%2C13366%2C13367%2C14595%2C14596%2C14597%2C14598%2C14599%2C14600%2C14601%2C14602%2C14603%2C14604%2C14605%2C14606&trackId=50619067167713018335655119683810&_=1699508921761'
t = int(time.time())
url_page = f'http://query.sse.com.cn/search/getESSearchDoc.do?page={page}&limit=10&publishTimeEnd=&publishTimeStart=&orderByDirection=DESC&orderByKey=score&searchMode=fuzzy&spaceId=3&keyword=REITs&siteName=sse&keywordPosition=title%2Cpaper_content&channelId=10001&channelCode=8640%2C8641%2C8642%2C8643%2C8644%2C8645%2C8646%2C8647%2C8648%2C8649%2C8650%2C8651%2C8652%2C8653%2C8654%2C8655%2C8656%2C8657%2C8658%2C8659%2C8660%2C8661%2C12632&trackId=24278800487459370386559742313666&_={t}'
data = policy.getrequest_json(headers, url_page)
newslist = data['data']['knowledgeList']
# print(newslist)
......@@ -534,6 +537,7 @@ def sse():
'content': content,
'contentWithTag': '',
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -545,7 +549,7 @@ def sse():
'issuedNumber': '',
'summary': summary,
'createDate': time_now,
'sid': '1729035244826374145',
'sid': '1729035244826374145'
}
# DataList.append(dic_info)
try:
......@@ -603,6 +607,7 @@ def sse():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title,
'publishDate': publishDate,
......@@ -625,7 +630,7 @@ def sse():
except Exception as e:
log.info(f"error!!!{newsUrl}")
log.info(e)
log.info(f'====第{page}页====处理结束,已采集{num}条数据=================')
log.info(f'====第{page}页====处理结束,================')
# 河北省人民政府
......@@ -636,7 +641,7 @@ def hebei():
num = 0
webname = '河北省人民政府'
url = "https://www.hebei.gov.cn/search/pcRender?pageId=b97a38833f7343cebc31dec44544f684"
appNames = ['信息公开']
appNames = ['信息公开', '热点专题']
for appName in appNames:
payload = {'qAnd': ' ',
'qOr': ' ',
......@@ -820,6 +825,9 @@ def hebei():
'attachmentIds':id_list,
'author': '',
'content': content,
'checkStatus': 1,
'deleteFlag': 0,
'id': '',
'contentWithTag': contentWithTag_str,
'title': title.replace('\n', ''),
'publishDate': publishDate,
......@@ -933,6 +941,7 @@ def guizhou():
'content': content,
'contentWithTag': contentWithTag_str,
'deleteFlag': 0,
'checkStatus': 1,
'id': '',
'title': title.replace('\n', ''),
'publishDate': publishDate,
......@@ -966,7 +975,10 @@ if __name__=="__main__":
reform()
# shenzhen()
zhengquanqihuo()
sse()
try:
sse()
except:
pass
hebei()
guizhou()
......
import reits
import reits
import policy_beijing, policy_chongqing, policy_fujian, policy_guangdong
import policy_guangxi, policy_gwy, policy_hainan, policy_heilongjiang, policy_hubei, policy_jiangsu
import policy_jiangxi, policy_jilin, policy_liaoning, policy_neimenggu, policy_shandong, policy_hubei
import policy_shanxi, policy_sichuan, policy_tianjin, policy_yunnan, policy_zhejiang
import RuleGuide_shanghai, RuleGuide_shenzhen
import LawRules_shenzhen, LawRules_2_shenzhen
if __name__ == "__mian__":
policy_beijing.beijing()
reits.sse()
reits.reform()
reits.hebei()
reits.guizhou()
reits.zhengquanqihuo()
policy_chongqing.doJob()
policy_fujian.doJob()
policy_guangdong.doJob()
policy_guangxi.doJob()
policy_gwy.doJob()
policy_hainan.doJob()
policy_heilongjiang.doJob()
policy_hubei.doJob()
policy_jiangsu.doJob()
policy_jiangxi.doJob()
policy_jilin.doJob()
policy_liaoning.doJob()
policy_neimenggu.doJob()
policy_shandong.doJob()
policy_hubei.doJob()
policy_shanxi.doJob()
policy_sichuan.doJob()
policy_tianjin.doJob()
policy_yunnan.doJob()
policy_zhejiang.doJob()
RuleGuide_shanghai.doJob()
RuleGuide_shenzhen.doJob()
LawRules_shenzhen.doJob()
LawRules_2_shenzhen.doJob()
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论