提交 95091024 作者: LiJunMing

智能解析笔记

上级 e4cf70b6
"""sorted(title_extracted_by_h,key = lambda x:similarity(x,title_extracted_by_title),reverse=True)
from base import BaseCore
# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
import time
baseCore = BaseCore.BaseCore()
# log = baseCore.getLogger()
r = baseCore.r
# social_code = baseCore.redicPullData('NewsEnterprise:gwqy_socialCode')
#
# # 判断 如果Redis中已经没有数据,则等待
# if not social_code:
#
# print('1:')
# if social_code == 'None':
#
# print('2')
#
# if social_code == None:
# print('3:')
#9131000010000595XD 91100000100003962T
# item = '91110000101690725E'
item = '91100000100003962T'
etree.strip_elements(elements,tag) #tag是要删除的标签名
for child in children(element): #children(element)是一个函数,获取当前HTML元素的所有子元素
child.tag.lower() #获取标签名称
etree.strip_tags(child,'span') #删除标签对
list = ['86',
'122',
'142',
'168',
'270',
'282',
'364',
'407',
'416',
'563',
'566',
'588',
'595',
'597',
'737',
'750',
'810',
'838',
'860',
'875',
'885',
'886',
'1003',
'1250',
'1272',
'1362',
'1379',
'1381',
'1382',
'1392',
'1476',
'1583',
'1639',
'1748',
'1764',
'1775',
'1801',
'1839',
'2018',
'2260',
'2356',
'2471',
'2563',
'2703',
'2800',
'2815',
'2934',
'3162',
'3376',
'3474',
'3737',
'3782',
'3939',
'4118',
'4509',
'4675',
'4801',
'4818',
'4943',
'5149',
'5195',
'5429',
'7023',
'7025',
'7026',
'7039',
'7053',
'7058',
'7059',
'7060',
'7062',
'7066',
'7067',
'7069',
'7073',
'7077',
'7083',
'7091',
'7095',
'7103',
'7105',
'7107',
'7109',
'7110',
'7113',
'7131',
'7135',
'7136',
'7138',
'7140',
'7141',
'7142',
'7143',
'7144',
'7145',
'7147',
'7150',
'7151',
'7152',
'7156',
'7157',
'7160',
'7162',
'7165',
'7169',
'7174',
'7180',
'7187',
'7193',
'7197',
'7198',
'7199',
'7201',
'7202',
'7203',
'7204',
'7205',
'7206',
'7208',
'7209',
'7211',
'7212',
'7213',
'7214']
for item in list:
r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
\ No newline at end of file
element.getparent() #获取给定元素的父元素
"""
# from io import StringIO
#
# import pandas as pd
# data = '"1234","456\r7","897"'
# print(data)
# aa = pd.read_csv(StringIO(data),escapechar='\r')
# print(aa)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论