智能解析笔记

95091024 · LiJunMing · e4cf70b6 · 95091024
--- a/test.py
+++ b/test.py

+"""sorted(title_extracted_by_h,key = lambda x:similarity(x,title_extracted_by_title),reverse=True)

-from base import BaseCore
-# 根据从Redis中拿到的社会信用代码,在数据库中获取对应基本信息
-import time
-baseCore = BaseCore.BaseCore()
-# log = baseCore.getLogger()
-r = baseCore.r
-# social_code = baseCore.redicPullData('NewsEnterprise:gwqy_socialCode')
-#
-# # 判断 如果Redis中已经没有数据，则等待
-# if not social_code:
-#
-#     print('1:')
-# if social_code == 'None':
-#
-#     print('2')
-#
-# if social_code == None:
-#     print('3:')
-#9131000010000595XD 91100000100003962T
-# item = '91110000101690725E'
-item = '91100000100003962T'
+etree.strip_elements(elements,tag) #tag是要删除的标签名
+for child in children(element):  #children(element)是一个函数，获取当前HTML元素的所有子元素
+    child.tag.lower() #获取标签名称
+    etree.strip_tags(child,'span') #删除标签对

-list = ['86',
-'122',
-'142',
-'168',
-'270',
-'282',
-'364',
-'407',
-'416',
-'563',
-'566',
-'588',
-'595',
-'597',
-'737',
-'750',
-'810',
-'838',
-'860',
-'875',
-'885',
-'886',
-'1003',
-'1250',
-'1272',
-'1362',
-'1379',
-'1381',
-'1382',
-'1392',
-'1476',
-'1583',
-'1639',
-'1748',
-'1764',
-'1775',
-'1801',
-'1839',
-'2018',
-'2260',
-'2356',
-'2471',
-'2563',
-'2703',
-'2800',
-'2815',
-'2934',
-'3162',
-'3376',
-'3474',
-'3737',
-'3782',
-'3939',
-'4118',
-'4509',
-'4675',
-'4801',
-'4818',
-'4943',
-'5149',
-'5195',
-'5429',
-'7023',
-'7025',
-'7026',
-'7039',
-'7053',
-'7058',
-'7059',
-'7060',
-'7062',
-'7066',
-'7067',
-'7069',
-'7073',
-'7077',
-'7083',
-'7091',
-'7095',
-'7103',
-'7105',
-'7107',
-'7109',
-'7110',
-'7113',
-'7131',
-'7135',
-'7136',
-'7138',
-'7140',
-'7141',
-'7142',
-'7143',
-'7144',
-'7145',
-'7147',
-'7150',
-'7151',
-'7152',
-'7156',
-'7157',
-'7160',
-'7162',
-'7165',
-'7169',
-'7174',
-'7180',
-'7187',
-'7193',
-'7197',
-'7198',
-'7199',
-'7201',
-'7202',
-'7203',
-'7204',
-'7205',
-'7206',
-'7208',
-'7209',
-'7211',
-'7212',
-'7213',
-'7214']
-for item in list:
-    r.rpush('NewsEnterpriseFbs:gnqy_socialCode', item)
\ No newline at end of file
+element.getparent() #获取给定元素的父元素
+
+
+"""
+# from io import StringIO
+#
+# import pandas as pd
+# data = '"1234","456\r7","897"'
+# print(data)
+# aa = pd.read_csv(StringIO(data),escapechar='\r')
+# print(aa)
\ No newline at end of file