
import json

"""
Elasticsearch 安装
pip install elasticsearch==7.8.1 版本的
使用时参考文章 
https://blog.csdn.net/yangbisheng1121/article/details/128528112
https://blog.csdn.net/qiuweifan/article/details/128610083
"""
from elasticsearch import Elasticsearch

class EsMethod(object):

    def __init__(self):
        # 创建Elasticsearch对象，并提供账号信息
        self.es = Elasticsearch(['http://114.116.19.92:9700'],  http_auth=('elastic', 'zzsn9988'),timeout=300 )
        self.index_name='researchreportdata'



    '''
    模糊
    # 查询方法：模糊查询（会被分词）。
    # 比如 我爱你中国，会查到只包含：“我爱你”， “中国”的内容
    '''
    def match(self,index_name,pnum):
        body = {
            'query':{
                'match':{
                    'title' : '.pdf',
                    'origin' : '雪球网',
                    'type' : '1',
                }
            },
            'from' : pnum,
            'size' : 20,
        }
        filter_path=['hits.hits._source.title',  # 字段1
                     'hits.hits._source.id']   # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)

    '''
    包含查询
    # 查询方法：模糊查询（不会被分词）。会查到包含：“我爱你中国”的内容
    '''
    def match_phrase(self,index_name):
        body = {
            'query':{
                'match_phrase':{
                    'm_ext1' : 'XXXXXX'   #keyword
                }
            }
        }
        filter_path=['hits.hits._source.title',  # 字段1
                     'hits.hits._source.id',
                     'hits.hits._source.sourceAddress',
                     'hits.hits._source.publishDate'
                     ]  # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)


    '''
    精准查询
    '''
    def term(self,index_name):
        body = {
            'query':{
                'term':{
                    'm_slhm' : 'XXXXXX'
                }
            }
        }
        filter_path=['hits.hits._source.m_ext1',  # 字段1
                     'hits.hits._source.m_ext2']   # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)

    '''
    多个条件精准查询
    '''
    def terms(self,index_name):
        body = {
            'query':{
                'terms':{
                    'm_slhm' : ['13XXXXXX ','13XXXXXX']
                }
            }
        }
        filter_path=['hits.hits._source.m_ext1',  # 字段1
                     'hits.hits._source.m_slhm']  # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)

    '''
    多条件 and 查询 
    '''
    def multi_must(self,index_name):
        body = {
            'query': {
                'bool': {
                    'must':[
                        {'term':{'m_slhm' : '13XXXXXXX'}},
                        {'terms':{'m_slhm' : ['13XXXXXX']}},
                    ]
                }
            }
        }
        filter_path=['hits.hits._source.m_ext1',  # 字段1
                     'hits.hits._source.m_slhm']  # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)


    '''
    更新
    '''
    def update(self,index_name):
        result = self.es.update(index=index_name
                                ,id='20220901-XXXXX'
                                ,body={'serialno': 'XXXXXX' })
        print('更新结果:%s' % result)

    '''
    新增
    '''
    def add(self,index_name):
        result = self.es.index(index=index_name
                               ,id='20220901-XXXXXX'
                               ,body={'serialno': 'XXXXXX' })
        print('新增结果:%s' % result)
    '''
    删除
    '''
    def delete(self,index_name):
        result = self.es.delete(index=index_name
                                ,doc_type="_doc"
                                ,id='20220901-XXXXXX')
        print('删除结果 %s' % result)

    '''
    多条件 or 查询 
    '''
    def multi_should(self,index_name,pnum):
        body = {
            'query': {
                'bool': {
                    'should':[
                        {'term':{'origin' : '雪球网'}},
                        {'term':{'type' : 1}},
                    ],
                    'must': [
                        {'match': {'title': '.pdf'}}
                    ]
                }
            },
            'from' : pnum,
            'size' : 6000,
        }

        filter_path=['hits.hits._source.title',  # 字段1
                     'hits.hits._source.id',
                     'hits.total.value',
                     ]   # 字段2
        result = self.es.search(index=index_name
                                ,doc_type='_doc'
                                ,filter_path = filter_path
                                ,body=body)
        print(result)
        return result
    '''
    更新
    '''
    def updateaunn(self,index_name,id,utitle):
        body = {
            'doc': {
                'title': utitle
            }
        }
        result = self.es.update(index=index_name
                                ,id=id
                                ,body=body)
        print('更新结果:%s' % result)
        
    def getFileds(self,index_name):
        mapping = self.es.indices.get_mapping(index=index_name)
        fields = mapping[index_name]['mappings']['properties'].keys()

        print(fields)


if __name__ == '__main__':
    esMethod=EsMethod()
    # esMethod.getFileds(index_name=esMethod.index_name)
    num=1
    for pnum in range(0,num):
        p=pnum*20
        print(f'第{pnum}页数据')
        result=esMethod.multi_should(index_name=esMethod.index_name,pnum=p)
        msglist=result['hits']['hits']
        print(msglist)
        for mms in msglist:
            id=mms['_source']['id']
            title=mms['_source']['title']
            utitle=title.replace('.pdf','')
            print(f'id:{id}---title:{title}--utitle:{utitle}')
            esMethod.updateaunn(esMethod.index_name,str(id),utitle)
            print('跟新成功！！')












