提交 7bf6c3cf 作者: 薛凌堃

1/24

上级 0ce37662
...@@ -932,7 +932,7 @@ class BaseCore: ...@@ -932,7 +932,7 @@ class BaseCore:
# 发送邮箱地址 # 发送邮箱地址
sender = '1195236739@qq.com' sender = '1195236739@qq.com'
# 接收邮箱地址 # 接收邮箱地址
receiver = '1007765445@qq.com' receiver = 'fujunxue@ciglobal.cn'
smtpserver = 'smtp.qq.com' smtpserver = 'smtp.qq.com'
# 发送邮箱登录 账户 密码 # 发送邮箱登录 账户 密码
username = '1195236739@qq.com' username = '1195236739@qq.com'
......
...@@ -902,7 +902,7 @@ def qianyanzhishiku(): ...@@ -902,7 +902,7 @@ def qianyanzhishiku():
def shijiejingjiluntan(): def shijiejingjiluntan():
allnum = {'一': '01', '二': '02', '三': '03', '四': '04', '五': '05', '六': '06', '七': '07', '八': '08', '九': '09', '十': '10', '十一': '11', '十二': '12'} allnum = {'一': '01', '二': '02', '三': '03', '四': '04', '五': '05', '六': '06', '七': '07', '八': '08', '九': '09', '十': '10', '十一': '11', '十二': '12'}
for i in range(2, 3): for i in range(1, 3):
# res = requests.get(url) # res = requests.get(url)
# soup = BeautifulSoup(res.content,'html.parser') # soup = BeautifulSoup(res.content,'html.parser')
......
...@@ -169,7 +169,7 @@ def get_content1(): ...@@ -169,7 +169,7 @@ def get_content1():
'id': '', # 'id': '', #
'labels': [{'relationId': "1766", 'relationName': "国务院文件", 'labelMark': "policy"}], 'labels': [{'relationId': "1766", 'relationName': "国务院文件", 'labelMark': "policy"}],
# 关联标签id 关联标签名称 关联标签标识 # 关联标签id 关联标签名称 关联标签标识
'origin': '', # 政策发布机关 'origin': '中华人民共和国中央人民政府', # 政策发布机关
'organ': pub_org, # 政策发文机关 'organ': pub_org, # 政策发文机关
'topicClassification': child_type, # 政策文件分类 'topicClassification': child_type, # 政策文件分类
'issuedNumber': pub_code, # 发文字号 'issuedNumber': pub_code, # 发文字号
......
...@@ -151,7 +151,7 @@ def get_content2(): ...@@ -151,7 +151,7 @@ def get_content2():
'id': '', # 'id': '', #
'labels': [{'relationId': "1699", 'relationName': "国务院各部委文件", 'labelMark': "policy"}], 'labels': [{'relationId': "1699", 'relationName': "国务院各部委文件", 'labelMark': "policy"}],
# 关联标签id 关联标签名称 关联标签标识 # 关联标签id 关联标签名称 关联标签标识
'origin': '', # 政策发布机关 'origin': '中华人民共和国中央人民政府', # 政策发布机关
'organ': pub_org, # 政策发文机关 'organ': pub_org, # 政策发文机关
'topicClassification': child_type, # 政策文件分类 'topicClassification': child_type, # 政策文件分类
'issuedNumber': pub_code, # 发文字号 'issuedNumber': pub_code, # 发文字号
......
...@@ -119,15 +119,15 @@ if __name__=='__main__': ...@@ -119,15 +119,15 @@ if __name__=='__main__':
# or '中共' in author or '记者' in author or '新闻社' in author\ # or '中共' in author or '记者' in author or '新闻社' in author\
# or '党委' in author or '调研组' in author or '研究中心' in author\ # or '党委' in author or '调研组' in author or '研究中心' in author\
# or '委员会' in author or '博物' in author or '大学' in author or '联合会' in author : # or '委员会' in author or '博物' in author or '大学' in author or '联合会' in author :
if '(' in author or '本刊' in author \ # if '(' in author or '本刊' in author \
or '记者' in author or '新闻社' in author \ # or '记者' in author or '新闻社' in author \
or '”' in author\ # or '”' in author\
or '大学' in author or '洛桑江村' in author: # or '大学' in author or '洛桑江村' in author:
continue
# if '国资委党委' in author:
# pass
# else:
# continue # continue
if '国资委党委' in author:
pass
else:
continue
new_href = new.find('a')['href'] new_href = new.find('a')['href']
is_member = r.sismember('qiushileaderspeech_two::' + period_title, new_href) is_member = r.sismember('qiushileaderspeech_two::' + period_title, new_href)
if is_member: if is_member:
......
# from baiduSpider import BaiduSpider from baiduSpider import BaiduSpider
# from baiduSpider import BaiduSpider from baiduSpider import BaiduSpider
# searchkw, wordsCode, sid = '', '', '' searchkw, wordsCode, sid = '', '', ''
# baidu = BaiduSpider(searchkw, wordsCode, sid) baidu = BaiduSpider(searchkw, wordsCode, sid)
import requests import requests
# url = 'https://baijiahao.baidu.com/s?id=1784907851792547880&wfr=spider&for=pc' # url = 'https://baijiahao.baidu.com/s?id=1784907851792547880&wfr=spider&for=pc'
# title = '“一带一路”商学院联盟副秘书长解奕炯:临沂在国际化物流建设中一定能“先行一步”' url = 'https://www.163.com/dy/article/IKD3M2P20514IPKH.html'
# try: title = '“一带一路”商学院联盟副秘书长解奕炯:临沂在国际化物流建设中一定能“先行一步”'
# detailurl = url try:
# title = title detailurl = url
# content, contentWithTag = baidu.extractorMsg(detailurl, title) title = title
# contentWithTag = baidu.rmTagattr(contentWithTag, detailurl) content, contentWithTag = baidu.extractorMsg(detailurl, title)
# except Exception as e: contentWithTag = baidu.rmTagattr(contentWithTag, detailurl)
# content = '' except Exception as e:
# contentWithTag = '' content = ''
# contentWithTag = ''
#
# detailmsg = {
# 'title': title,
# 'detailurl': url,
# 'content': content,
# 'contentHtml': contentWithTag,
# }
# print(detailmsg)
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Host': 'search-api-web.eastmoney.com',
'Pragma': 'no-cache',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
url = 'https://search-api-web.eastmoney.com/search/jsonp?cb=jQuery35103326233792363984_1702455623969&param=%7B%22uid%22%3A%22%22%2C%22keyword%22%3A%22%E7%A7%91%E8%BE%BE%E8%87%AA%E6%8E%A7%22%2C%22type%22%3A%5B%22researchReport%22%5D%2C%22client%22%3A%22web%22%2C%22clientVersion%22%3A%22curr%22%2C%22clientType%22%3A%22web%22%2C%22param%22%3A%7B%22researchReport%22%3A%7B%22client%22%3A%22web%22%2C%22pageSize%22%3A10%2C%22pageIndex%22%3A1%7D%7D%7D&_=1702455623970'
# res = requests.get(url).text[1:-1]
res = requests.get(url=url, headers=headers)
res_json = res.text
print(res_json) detailmsg = {
\ No newline at end of file 'title': title,
'detailurl': url,
'content': content,
'contentHtml': contentWithTag,
}
print(detailmsg)
# headers = {
# 'Accept': '*/*',
# 'Accept-Encoding': 'gzip, deflate, br',
# 'Accept-Language': 'zh-CN,zh-TW;q=0.9,zh;q=0.8',
# 'Cache-Control': 'no-cache',
# 'Connection': 'keep-alive',
# 'Host': 'search-api-web.eastmoney.com',
# 'Pragma': 'no-cache',
# 'Sec-Fetch-Dest': 'script',
# 'Sec-Fetch-Mode': 'no-cors',
# 'Sec-Fetch-Site': 'same-site',
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
# 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
# 'sec-ch-ua-mobile': '?0',
# 'sec-ch-ua-platform': '"Windows"'
# }
# url = 'https://search-api-web.eastmoney.com/search/jsonp?cb=jQuery35103326233792363984_1702455623969&param=%7B%22uid%22%3A%22%22%2C%22keyword%22%3A%22%E7%A7%91%E8%BE%BE%E8%87%AA%E6%8E%A7%22%2C%22type%22%3A%5B%22researchReport%22%5D%2C%22client%22%3A%22web%22%2C%22clientVersion%22%3A%22curr%22%2C%22clientType%22%3A%22web%22%2C%22param%22%3A%7B%22researchReport%22%3A%7B%22client%22%3A%22web%22%2C%22pageSize%22%3A10%2C%22pageIndex%22%3A1%7D%7D%7D&_=1702455623970'
# # res = requests.get(url).text[1:-1]
# res = requests.get(url=url, headers=headers)
#
# res_json = res.text
# print(res_json)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论