Merge remote-tracking branch 'origin/master'

01e0d716 · 薛凌堃 · 32c31bd1 · 8be2a4ec · 01e0d716 · 01e0d716
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -369,7 +369,7 @@ class BaseCore:
        if beginStr=='':
            pass
        else:
-            begin=str.find(beginStr)
+            begin=str.rfind(beginStr)
            if begin==-1:
                begin=0
            str=str[begin:]
@@ -425,11 +425,18 @@ class BaseCore:
        IP = socket.gethostbyname(socket.gethostname())
        return IP

+    def mkPath(self,path):
+        folder = os.path.exists(path)
+        if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
+            os.makedirs(path)  # makedirs 创建文件时如果路径不存在会创建这个路径
+        else:
+            pass
    # 生成google模拟浏览器  必须传入值为googledriver位置信息
    # headless用于决定是否为无头浏览器,初始默认为无头浏览器
    # 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
    # 无头浏览器用于后续对信息采集时不会有浏览器一直弹出，
    def buildDriver(self, path, headless=True):
+
        service = Service(path)
        chrome_options = webdriver.ChromeOptions()
        if headless:
@@ -442,7 +449,7 @@ class BaseCore:

        chrome_options.add_argument('user-agent=' + self.getRandomUserAgent())
        # 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
-        driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
+        driver = webdriver.Chrome(options=chrome_options, service=service)
        # with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
        #     js = f.read()
        #
@@ -586,3 +593,4 @@ class BaseCore:



+
--- a/comData/yhcj/雅虎财经_企业基本信息_高管信息.py
+++ b/comData/yhcj/雅虎财经_企业基本信息_高管信息.py
-import json
+import json
 import json
 import time
-
-import numpy as np
-import pandas as pd
-import pymysql
 import requests
+import sys
 from bs4 import BeautifulSoup
 from kafka import KafkaProducer
-from NewsYahoo import news
-
-from base.BaseCore import BaseCore
+sys.path.append(r'F:\zzsn\zzsn_spider\base')
+import BaseCore
 import urllib3

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

 taskType = '企业基本信息/雅虎财经'

-baseCore = BaseCore()
+baseCore = BaseCore.BaseCore()
 r = baseCore.r
 log = baseCore.getLogger()
 headers = {
@@ -38,7 +34,7 @@ headers = {


 # 根据股票代码 获取企业基本信息 高管信息
-def getInfo(name,enname,gpdm, xydm, start):
+def getInfo(enname, gpdm, xydm, start):
    if 'HK' in str(gpdm):
        tmp_g = str(gpdm).split('.')[0]
        if len(tmp_g) == 5:
@@ -49,17 +45,9 @@ def getInfo(name,enname,gpdm, xydm, start):
        gpdm_ = gpdm
    retData = {}
    retData['base_info'] = {
-        '公司名称': name,
+        '公司名称': enname,
        '英文名': enname,
        '信用代码': xydm,
-        '股票代码': gpdm,
-        '地址': '',
-        '电话': '',
-        '公司网站': '',
-        '部门': '',
-        '行业': '',
-        '员工人数': '',
-        '公司简介': ''
    }
    retData['people_info'] = []
    # https://finance.yahoo.com/quote/VOW3.DE/profile?p=VOW3.DE
@@ -76,22 +64,36 @@ def getInfo(name,enname,gpdm, xydm, start):
                log.error(f"{gpdm}---第{i}次---获取基本信息接口返回失败：{response.status_code}")
        except:
            continue
-
-    if (response.status_code == 200):
-        pass
-    else:
+    try:
+        if 'lookup' in response.url:
+            log.error(f"{gpdm}------股票代码错误：{response.status_code}")
+            exeception = '股票代码错误'
+            state = 1
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(xydm, taskType, 0, takeTime, url, exeception)
+            return [state, retData]
+        elif response.status_code != 200:
+            log.error(f"{gpdm}------获取基本信息接口重试后依然失败失败：{response.status_code}")
+            exeception = '获取基本信息接口返回失败'
+            state = 0
+            takeTime = baseCore.getTimeCost(start, time.time())
+            baseCore.recordLog(xydm, taskType, state, takeTime, url, exeception)
+            baseCore.rePutIntoR('BaseInfoEnterprise:gwqy_socialCode', xydm)
+            return [state, retData]
+    except:
        log.error(f"{gpdm}------获取基本信息接口重试后依然失败失败：{response.status_code}")
        exeception = '获取基本信息接口返回失败'
        state = 0
        takeTime = baseCore.getTimeCost(start, time.time())
        baseCore.recordLog(xydm, taskType, state, takeTime, url, exeception)
-        rePutIntoR('')
-        return [state,retData]
+        baseCore.rePutIntoR('BaseInfoEnterprise:gwqy_socialCode', xydm)
+        return [state, retData]

    state = 1
    soup = BeautifulSoup(response.content, 'html.parser')
    page = soup.find('div', {'id': 'Col1-0-Profile-Proxy'})
-    name = page.find('h3',{'class':'Fz(m) Mb(10px)'}).text
+    name = page.find('h3', {'class': 'Fz(m) Mb(10px)'}).text
+
    try:
        com_info = page.find('div', {'class': 'Mb(25px)'})
    except:
@@ -126,7 +128,7 @@ def getInfo(name,enname,gpdm, xydm, start):
        com_jianjie = ''
    dic_com_info = {
        '公司名称': name,
-        '英文名': enname,
+        '英文名': name,
        '信用代码': xydm,
        '股票代码': gpdm,
        '地址': com_address,
@@ -189,24 +191,31 @@ def getInfo(name,enname,gpdm, xydm, start):
    retData['people_info'] = retPeople
    log.info(f"获取基本信息--{gpdm}，耗时{baseCore.getTimeCost(start, time.time())}")
    response.close()
-    return [state,retData]
+    return [state, retData]


 # 保存基本信息
-def saveBaseInfo(info,start):
+def saveBaseInfo(info, start):
    # 基本信息发送到kafka
-    company_dict = {
-        'name': info['base_info']['公司名称'],  # 企业名称
-        'shortName': '',  # 企业简称
-        'socialCreditCode': info['base_info']['信用代码'],  # 统一社会信用代码
-        'officialPhone': info['base_info']['电话'],  # 电话
-        'officialUrl': info['base_info']['公司网站'],  # 官网
-        'briefInfo': info['base_info']['公司简介'],  # 简介
-        'industry': info['base_info']['行业'],  # 所属行业
-        'englishName': info['base_info']['英文名'],  # 英文名
-        'address': info['base_info']['地址'],  # 地址
-        'status': 0,  # 状态
-    }
+    try:
+        company_dict = {
+            'name': info['base_info']['公司名称'],  # 企业名称
+            'shortName': '',  # 企业简称
+            'socialCreditCode': info['base_info']['信用代码'],  # 统一社会信用代码
+            'officialPhone': info['base_info']['电话'],  # 电话
+            'officialUrl': info['base_info']['公司网站'],  # 官网
+            'briefInfo': info['base_info']['公司简介'],  # 简介
+            'industry': info['base_info']['行业'],  # 所属行业
+            'englishName': info['base_info']['英文名'],  # 英文名
+            'address': info['base_info']['地址'],  # 地址
+            'status': 0,  # 状态
+        }
+    except:
+        company_dict = {
+            'name': info['base_info']['公司名称'],  # 企业名称
+            'socialCreditCode': info['base_info']['信用代码'],  # 统一社会信用代码
+            'englishName': info['base_info']['英文名'],  # 英文名
+        }
    # print(company_dict)
    producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'], api_version=(2, 0, 2))
    kafka_result = producer.send("regionInfo", json.dumps(company_dict, ensure_ascii=False).encode('utf8'))
@@ -216,7 +225,7 @@ def saveBaseInfo(info,start):


 # 保存高管信息
-def savePeopleInfo(info,start):
+def savePeopleInfo(info, start):
    # 高管信息调用接口
    list_people = info['people_info']
    list_one_info = []
@@ -240,6 +249,7 @@ def savePeopleInfo(info,start):
    json_updata = json.dumps(list_one_info)
    # print(json_updata)
    if json_updata == '[]':
+        log.info("没有高管")
        pass
    else:
        for i in range(0, 3):
@@ -274,18 +284,6 @@ def savePeopleInfo(info,start):
    return state


-def rePutIntoR(item):
-    r.rpush('BaseInfoEnterprise:gwqy_socialCode', item)
-
-
-# def getInfomation(social_code):
-#     sql = f"SELECT * FROM EnterpriseInfo WHERE SocialCode = '{social_code}'"
-#     cursor.execute(sql)
-#     data = cursor.fetchone()
-#     return data
-
-
-
 # 采集工作
 def beginWork():
    while True:
@@ -298,65 +296,66 @@ def beginWork():
            continue
        # 数据库中获取基本信息
        data = baseCore.getInfomation(social_code)
-        name = data[1]
        enname = data[5]
-        gpdm = data[3]
+        gpdm = '0123'
        xydm = data[2]

        # 获取该企业对应项目的采集次数
        count = data[13]
        start_time = time.time()
        # 股票代码为空跳过
-        if gpdm is None:
-            log.error(f"{name}--股票代码为空 跳过")
-            exception = '股票代码为空'
-            state = 0
-            takeTime = baseCore.getTimeCost(start_time, time.time())
-            baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
-            continue
-        try:
-            retData = getInfo(name,enname,gpdm, xydm, start_time)
-            # 基本信息采集成功 进行数据入库,否则不入库
-            if retData[0] == 1:
-                # 企业基本信息入库
-                try:
-                    saveBaseInfo(retData[1],start_time)
-                except:
-                    log.error(f'{name}....企业基本信息Kafka操作失败')
-                    exception = 'Kafka操作失败'
-                    state = 0
-                    takeTime = baseCore.getTimeCost(start_time, time.time())
-                    baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
-                #   企业高管信息入库
-                state = savePeopleInfo(retData[1],start_time)
-                # 只有企业高管信息和企业基本信息都采集到,该企业才算采集成功
-                if state == 1:
-                    takeTime = baseCore.getTimeCost(start_time, time.time())
-                    baseCore.recordLog(xydm, taskType, state, takeTime, '', '')
+        if gpdm == '':
+            info = {"base_info": {'公司名称': enname,'英文名': enname,'信用代码': xydm, }}
+            log.error(f'{xydm}....股票代码为空')
+            try:
+                saveBaseInfo(info, start_time)
+            except:
+                log.error(f'{enname}....企业基本信息Kafka操作失败')
+                exception = 'Kafka操作失败'
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
+        else:
+            try:
+                retData = getInfo(enname, gpdm, xydm, start_time)
+                # 基本信息采集成功 进行数据入库,否则不入库
+                if retData[0] == 1:
+                    # 企业基本信息入库
+                    try:
+                        saveBaseInfo(retData[1], start_time)
+
+                    except:
+                        log.error(f'{enname}....企业基本信息Kafka操作失败')
+                        exception = 'Kafka操作失败'
+                        state = 0
+                        takeTime = baseCore.getTimeCost(start_time, time.time())
+                        baseCore.recordLog(xydm, taskType, state, takeTime, '', exception)
+                    #   企业高管信息入库
+                    state = savePeopleInfo(retData[1], start_time)
+                    # 只有企业高管信息和企业基本信息都采集到,该企业才算采集成功
+                    if state == 1:
+                        takeTime = baseCore.getTimeCost(start_time, time.time())
+                        baseCore.recordLog(xydm, taskType, state, takeTime, '', '')
+                    else:
+                        pass
                else:
                    pass
-            else:
-                pass
-        except Exception as e:
-            # 若出现尚未发现的错误,则保存错误信息以及出错位置
-            ee = e.__traceback__.tb_lineno
-            log.error(f'{name}...{xydm}...{gpdm}.....数据采集失败,原因:{ee}行 {e}')
-            state = 0
-            takeTime = baseCore.getTimeCost(start_time, time.time())
-            baseCore.recordLog(xydm, taskType, state, takeTime, '', f'数据采集失败,原因:{ee}行 {e}')
+            except Exception as e:
+                # 若出现尚未发现的错误,则保存错误信息以及出错位置
+                ee = e.__traceback__.tb_lineno
+                log.error(f'{enname}...{xydm}...{gpdm}.....数据采集失败,原因:{ee}行 {e}')
+                state = 0
+                takeTime = baseCore.getTimeCost(start_time, time.time())
+                baseCore.recordLog(xydm, taskType, state, takeTime, '', f'数据采集失败,原因:{ee}行 {e}')

        # 企业数据采集完成,采集次数加一
        count += 1
        runType = 'BaseInfoRunCount'
-        baseCore.updateRun(social_code,runType,count)
+        baseCore.updateRun(social_code, runType, count)

    # 释放资源
    baseCore.close()


 if __name__ == '__main__':
-    cnx = pymysql.connect(host='114.115.159.144', user='root', password='zzsn9988', db='caiji',charset='utf8mb4')
-    cursor = cnx.cursor()
    beginWork()
-    cursor.close()
-    cnx.close()
--- a/google_comm/googleSpider.py
+++ b/google_comm/googleSpider.py
+from urllib.parse import urljoin

 import langid
 import pymysql
@@ -407,12 +408,45 @@ class GoogleSpider(object):
            else:
                break
                # time.sleep(5)
+
+    def rmTagattr(self,html,url):
+        # 使用BeautifulSoup解析网页内容
+        # soup = BeautifulSoup(html, 'html.parser')
+        soup = self.paserUrl(html,url)
+        # 遍历所有标签，并去掉属性
+        for tag in soup.find_all(True):
+            if tag.name == 'img':
+                tag.attrs = {key: value for key, value in tag.attrs.items() if key == 'src'}
+            elif tag.name !='img':
+                tag.attrs = {key: value for key, value in tag.attrs.items() if key == 'src'}
+            else:
+                tag.attrs = {key: value for key, value in tag.attrs.items()}
+        # 打印去掉属性后的网页内容
+        # print(soup.prettify())
+        html=soup.prettify()
+        return html
+
+    # 将html中的相对地址转换成绝对地址
+    def paserUrl(self,html,listurl):
+        soup = BeautifulSoup(html, 'html.parser')
+        # 获取所有的<a>标签和<img>标签
+        links = soup.find_all(['a', 'img'])
+        # 遍历标签，将相对地址转换为绝对地址
+        for link in links:
+            if 'href' in link.attrs:
+                link['href'] = urljoin(listurl, link['href'])
+            elif 'src' in link.attrs:
+                link['src'] = urljoin(listurl, link['src'])
+
+        return soup
+
    #获取资讯内容信息
    def getDetailmsg(self,detailmsg):
        try:
            detailurl=detailmsg['detailUrl']
            title = detailmsg['title']
            content,contentWithTag=self.extractorMsg(detailurl,title)
+            contentWithTag=self.rmTagattr(contentWithTag)
        except Exception as e:
            content=''
            contentWithTag=''

--- a/sougou_comm/sougouSpider.py
+++ b/sougou_comm/sougouSpider.py
@@ -122,6 +122,7 @@ class SougouSpider(object):
                "user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"
            }
            # url = 'https://www.sogou.com/link?url=hedJjaC291NbWrwHYHKCyPQj_ei8OKC13fJZ5YRQyvgjcXe6RUhCEXfbi95UdEys0ztd7q5nl6o.'
+            url=f"https://www.sogou.com{url}"
            res = requests.get(url,headers=header)
            text=res.text
            # 定义正则表达式

--- a/tmp/__init__.py
+++ b/tmp/__init__.py
--- a/tmp/usVsRussia/__init__.py
+++ b/tmp/usVsRussia/__init__.py
--- a/tmp/usVsRussia/ofac.py
+++ b/tmp/usVsRussia/ofac.py
+#OFAC：美国财政部外国资产控制办公室 (OFAC)，数量在200左右，四个类型里的所有带黑点、PDF文件都要。https://ofac.treasury.gov/
+# 美国对俄罗斯相关制裁
+# 俄罗斯有害外国活动制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/russian-harmful-foreign-activities-sanctions
+# 乌克兰/俄罗斯有害外国活动制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/ukraine-russia-related-sanctions
+# 2017年制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/countering-americas-adversaries-through-sanctions-act-related-sanctions
+# 马格尼茨基制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/the-magnitsky-sanctions
+import os
+
+import pandas as pd
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+from pymysql.converters import escape_string
+from selenium.webdriver.common.by import By
+
+from base.BaseCore import BaseCore
+
+baseCore = BaseCore()
+log =baseCore.getLogger()
+headers = {
+    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+    'accept-encoding': 'gzip, deflate, br',
+    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
+    'cache-control': 'max-age=0',
+    # 'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
+    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': "Windows",
+    'sec-fetch-dest': 'document',
+    'sec-fetch-mode': 'navigate',
+    'sec-fetch-site': 'same-origin',
+    'sec-fetch-user': '?1',
+    'upgrade-insecure-requests': '1',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
+}
+# usvsrussia
+cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                      charset='utf8mb4')
+cursor = cnx.cursor()
+def job1():
+    log.info("开始采集----俄罗斯有害外国活动制裁")
+    path=r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path,headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/russian-harmful-foreign-activities-sanctions'
+    driverContent.get(url)
+    ftype="Russian Harmful Foreign Activities Sanctions"
+    # IMPORTANT ADVISORIES AND INFORMATION 重要建议和信息
+    stype='IMPORTANT ADVISORIES AND INFORMATION'
+    log.info(f"开始采集栏目---{stype}")
+    # //*[@id="node-35986"]/div/ul[1]/li
+    liEles =   driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle =  liEle.find_element(By.TAG_NAME,'a') #a标签
+        text= aEle.text
+        href = aEle.get_attribute('href')
+        time = liEle.text.replace(text,'')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql=f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                  f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                  f"'{href}','{escape_string(text)}','{time}',0)"
+        #log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #Price Cap Policies //*[@id="node-35986"]/div/ul[2]/li
+    stype = 'Price Cap Policies'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # INTERPRETIVE GUIDANCE 解释指导
+    #INTERPRETIVE GUIDANCE 单独处理
+
+    #FREQUENTLY ASKED QUESTIONS 单独处理
+
+    #RUSSIAN HARMFUL FOREIGN ACTIVITIES SANCTIONS DIRECTIVES
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul/li')
+    stype = 'RUSSIAN HARMFUL FOREIGN ACTIVITIES SANCTIONS DIRECTIVES'
+    log.info(f"开始采集栏目---{stype}")
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #APPLYING FOR A SPECIFIC OFAC LICENSE
+
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[6]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #GENERAL LICENSES
+    stype = 'GENERAL LICENSES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #Executive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Determinations
+    stype = 'Determinations'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[10]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Code of Federal Regulations
+
+    #Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[12]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text =liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+def job2():
+    log.info("开始采集----乌克兰-俄罗斯有害外国活动制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/ukraine-russia-related-sanctions'
+    driverContent.get(url)
+    ftype="Ukraine-/Russia-related Sanctions"
+    # IMPORTANT ADVISORIES
+    stype = 'IMPORTANT ADVISORIES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =  baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # IMPORTANT ADVISORIES
+    stype = 'SANCTIONS BROCHURES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = aEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #ADDITIONAL UKRAINE-/RUSSIA-RELATED SANCTIONS INFORMATION
+    #FREQUENTLY ASKED QUESTIONS
+    #SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST
+    stype = 'SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =  baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Archived Directives
+    stype = 'SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST'
+    log.info(f"开始采集栏目---{stype}---Archived Directives")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}','Archived Directives'," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #INTERPRETIVE GUIDANCE
+    stype = 'INTERPRETIVE GUIDANCE'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[5]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # GENERAL LICENSES
+    stype = 'GENERAL LICENSES'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Executive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Determinations
+    stype = 'Determinations'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[10]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[11]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[13]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+
+def job3():
+    log.info("开始采集----2017年制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/countering-americas-adversaries-through-sanctions-act-related-sanctions'
+    driverContent.get(url)
+    ftype="Countering America's Adversaries Through Sanctions Act of 2017 (CAATSA)"
+    stype = 'Countering Americas Adversaries Through Sanctions Act-Related Sanctions'
+    href="https://congress.gov/115/plaws/publ44/PLAW-115publ44.pdf"
+    text="Countering America’s Adversaries Through Sanctions Act” (Public Law 115-44) (CAATSA)"
+    selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+    cursor.execute(selectCountSql)
+    count = cursor.fetchone()[0]
+    if count > 0:
+        log.info("已采集，跳过")
+    else:
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','August 2, 2017',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Other Documents Related to the Implementation of Section 105
+    stype = 'Other Documents Related to the Implementation of Section 105'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Ukraine-/Russia-related Directives
+    stype = 'Ukraine-/Russia-related Directives'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[4]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # ADDITIONAL CAATSA GUIDANCE AND INFORMATION
+    stype = 'ADDITIONAL CAATSA GUIDANCE AND INFORMATION'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[6]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # AExecutive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+def job4():
+    log.info("开始采集----马格尼茨基制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/the-magnitsky-sanctions'
+    driverContent.get(url)
+    ftype = "Magnitsky Sanctions"
+    # INTERPRETIVE GUIDANCE
+    stype = 'INTERPRETIVE GUIDANCE'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[4]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[5]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+
+if __name__ == '__main__':
+    log.info("美国财政部外国资产控制办公室 (OFAC)网站开始采集")
+    job1()
+    job2()
+    job3()
+    job4()
+baseCore.close()
+cursor.close()
+cnx.close()
\ No newline at end of file