美国俄罗斯数据采集

22a5f6f6 · 丁双波 · eaa6815d · 22a5f6f6 · 22a5f6f6 · 22a5f6f6
--- a/base/BaseCore.py
+++ b/base/BaseCore.py
@@ -369,7 +369,7 @@ class BaseCore:
        if beginStr=='':
            pass
        else:
-            begin=str.find(beginStr)
+            begin=str.rfind(beginStr)
            if begin==-1:
                begin=0
            str=str[begin:]
@@ -425,11 +425,18 @@ class BaseCore:
        IP = socket.gethostbyname(socket.gethostname())
        return IP

+    def mkPath(self,path):
+        folder = os.path.exists(path)
+        if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
+            os.makedirs(path)  # makedirs 创建文件时如果路径不存在会创建这个路径
+        else:
+            pass
    # 生成google模拟浏览器  必须传入值为googledriver位置信息
    # headless用于决定是否为无头浏览器,初始默认为无头浏览器
    # 正常浏览器可用于开始对页面解析使用或一些网站无头时无法正常采集
    # 无头浏览器用于后续对信息采集时不会有浏览器一直弹出，
    def buildDriver(self, path, headless=True):
+
        service = Service(path)
        chrome_options = webdriver.ChromeOptions()
        if headless:
@@ -442,7 +449,7 @@ class BaseCore:

        chrome_options.add_argument('user-agent=' + self.getRandomUserAgent())
        # 'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
-        driver = webdriver.Chrome(chrome_options=chrome_options, service=service)
+        driver = webdriver.Chrome(options=chrome_options, service=service)
        # with open(r'F:\zzsn\zzsn_spider\base\stealth.min.js') as f:
        #     js = f.read()
        #
@@ -578,3 +585,4 @@ class BaseCore:



+
--- a/tmp/__init__.py
+++ b/tmp/__init__.py
--- a/tmp/usVsRussia/__init__.py
+++ b/tmp/usVsRussia/__init__.py
--- a/tmp/usVsRussia/ofac.py
+++ b/tmp/usVsRussia/ofac.py
+#OFAC：美国财政部外国资产控制办公室 (OFAC)，数量在200左右，四个类型里的所有带黑点、PDF文件都要。https://ofac.treasury.gov/
+# 美国对俄罗斯相关制裁
+# 俄罗斯有害外国活动制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/russian-harmful-foreign-activities-sanctions
+# 乌克兰/俄罗斯有害外国活动制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/ukraine-russia-related-sanctions
+# 2017年制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/countering-americas-adversaries-through-sanctions-act-related-sanctions
+# 马格尼茨基制裁
+# https://ofac.treasury.gov/sanctions-programs-and-country-information/the-magnitsky-sanctions
+import os
+
+import pandas as pd
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+from pymysql.converters import escape_string
+from selenium.webdriver.common.by import By
+
+from base.BaseCore import BaseCore
+
+baseCore = BaseCore()
+log =baseCore.getLogger()
+headers = {
+    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+    'accept-encoding': 'gzip, deflate, br',
+    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
+    'cache-control': 'max-age=0',
+    # 'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
+    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': "Windows",
+    'sec-fetch-dest': 'document',
+    'sec-fetch-mode': 'navigate',
+    'sec-fetch-site': 'same-origin',
+    'sec-fetch-user': '?1',
+    'upgrade-insecure-requests': '1',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
+}
+# usvsrussia
+cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                      charset='utf8mb4')
+cursor = cnx.cursor()
+def job1():
+    log.info("开始采集----俄罗斯有害外国活动制裁")
+    path=r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path,headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/russian-harmful-foreign-activities-sanctions'
+    driverContent.get(url)
+    ftype="Russian Harmful Foreign Activities Sanctions"
+    # IMPORTANT ADVISORIES AND INFORMATION 重要建议和信息
+    stype='IMPORTANT ADVISORIES AND INFORMATION'
+    log.info(f"开始采集栏目---{stype}")
+    # //*[@id="node-35986"]/div/ul[1]/li
+    liEles =   driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle =  liEle.find_element(By.TAG_NAME,'a') #a标签
+        text= aEle.text
+        href = aEle.get_attribute('href')
+        time = liEle.text.replace(text,'')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql=f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                  f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                  f"'{href}','{escape_string(text)}','{time}',0)"
+        #log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #Price Cap Policies //*[@id="node-35986"]/div/ul[2]/li
+    stype = 'Price Cap Policies'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # INTERPRETIVE GUIDANCE 解释指导
+    #INTERPRETIVE GUIDANCE 单独处理
+
+    #FREQUENTLY ASKED QUESTIONS 单独处理
+
+    #RUSSIAN HARMFUL FOREIGN ACTIVITIES SANCTIONS DIRECTIVES
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul/li')
+    stype = 'RUSSIAN HARMFUL FOREIGN ACTIVITIES SANCTIONS DIRECTIVES'
+    log.info(f"开始采集栏目---{stype}")
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #APPLYING FOR A SPECIFIC OFAC LICENSE
+
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[6]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #GENERAL LICENSES
+    stype = 'GENERAL LICENSES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    #Executive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text,'(',')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Determinations
+    stype = 'Determinations'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[10]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Code of Federal Regulations
+
+    #Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-35986"]/div/ul[12]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text =liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{ftype}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+def job2():
+    log.info("开始采集----乌克兰-俄罗斯有害外国活动制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/ukraine-russia-related-sanctions'
+    driverContent.get(url)
+    ftype="Ukraine-/Russia-related Sanctions"
+    # IMPORTANT ADVISORIES
+    stype = 'IMPORTANT ADVISORIES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =  baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # IMPORTANT ADVISORIES
+    stype = 'SANCTIONS BROCHURES'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = aEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #ADDITIONAL UKRAINE-/RUSSIA-RELATED SANCTIONS INFORMATION
+    #FREQUENTLY ASKED QUESTIONS
+    #SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST
+    stype = 'SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST'
+    log.info(f"开始采集栏目---{stype}")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul[1]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time =  baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Archived Directives
+    stype = 'SECTORAL SANCTIONS IDENTIFICATIONS (SSI) LIST'
+    log.info(f"开始采集栏目---{stype}---Archived Directives")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="directives"]/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}','Archived Directives'," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #INTERPRETIVE GUIDANCE
+    stype = 'INTERPRETIVE GUIDANCE'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[5]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # GENERAL LICENSES
+    stype = 'GENERAL LICENSES'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Executive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Determinations
+    stype = 'Determinations'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[10]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[11]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6416"]/div/ul[13]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+
+def job3():
+    log.info("开始采集----2017年制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/countering-americas-adversaries-through-sanctions-act-related-sanctions'
+    driverContent.get(url)
+    ftype="Countering America's Adversaries Through Sanctions Act of 2017 (CAATSA)"
+    stype = 'Countering Americas Adversaries Through Sanctions Act-Related Sanctions'
+    href="https://congress.gov/115/plaws/publ44/PLAW-115publ44.pdf"
+    text="Countering America’s Adversaries Through Sanctions Act” (Public Law 115-44) (CAATSA)"
+    selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+    cursor.execute(selectCountSql)
+    count = cursor.fetchone()[0]
+    if count > 0:
+        log.info("已采集，跳过")
+    else:
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','August 2, 2017',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #Other Documents Related to the Implementation of Section 105
+    stype = 'Other Documents Related to the Implementation of Section 105'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Ukraine-/Russia-related Directives
+    stype = 'Ukraine-/Russia-related Directives'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[4]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # ADDITIONAL CAATSA GUIDANCE AND INFORMATION
+    stype = 'ADDITIONAL CAATSA GUIDANCE AND INFORMATION'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[6]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # AExecutive Orders
+    stype = 'Executive Orders'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[8]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        # time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-7161"]/div/ul[9]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        #time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+def job4():
+    log.info("开始采集----马格尼茨基制裁")
+    path = r'E:\chromedriver_win32\115\chromedriver.exe'
+    driverContent = baseCore.buildDriver(path, headless=False)
+    url='https://ofac.treasury.gov/sanctions-programs-and-country-information/the-magnitsky-sanctions'
+    driverContent.get(url)
+    ftype = "Magnitsky Sanctions"
+    # INTERPRETIVE GUIDANCE
+    stype = 'INTERPRETIVE GUIDANCE'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[2]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        time = baseCore.getSubStr(text, '(', ')')
+        #time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    #GUIDANCE ON OFAC LICENSING POLICY
+    stype = 'GUIDANCE ON OFAC LICENSING POLICY'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[4]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Statutes
+    stype = 'Statutes'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[5]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+
+    # Federal Register Notices
+    stype = 'Federal Register Notices'
+    log.info(f"开始采集栏目---{stype}---")
+    liEles = driverContent.find_elements(By.XPATH, '//*[@id="node-6306"]/div/ul[7]/li')
+    for liEle in liEles:
+        aEle = liEle.find_element(By.TAG_NAME, 'a')  # a标签
+        text = liEle.text
+        href = aEle.get_attribute('href')
+        # time = baseCore.getSubStr(text, '(', ')')
+        time = ''
+        selectCountSql = f"select count(1) from usvsrussia where ftype='{escape_string(ftype)}' and stype='{stype}' and url='{href} '"
+        cursor.execute(selectCountSql)
+        count = cursor.fetchone()[0]
+        if count > 0:
+            log.info("已采集，跳过")
+            continue
+        else:
+            pass
+        insertSql = f"insert into  usvsrussia (website,ftype,stype,ttype,url,title,pub_time,state) values (" \
+                    f"'美国财政部外国资产控制办公室','{escape_string(ftype)}','{stype}',''," \
+                    f"'{href}','{escape_string(text)}','{time}',0)"
+        # log.info(insertSql)
+        cursor.execute(insertSql)
+        cnx.commit()
+    driverContent.close()
+
+if __name__ == '__main__':
+    log.info("美国财政部外国资产控制办公室 (OFAC)网站开始采集")
+    job1()
+    job2()
+    job3()
+    job4()
+baseCore.close()
+cursor.close()
+cnx.close()
\ No newline at end of file