下载pdf文件

0c9bb0cd · 丁双波 · 8be2a4ec · 0c9bb0cd · 0c9bb0cd
--- a/tmp/usVsRussia/downPdf.py
+++ b/tmp/usVsRussia/downPdf.py
+#下载pdf文件
+import os
+from datetime import time
+
+import pymysql
+import requests
+import urllib3
+from pymysql.converters import escape_string
+
+from base.BaseCore import BaseCore
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+headers = {
+    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+    'accept-encoding': 'gzip, deflate, br',
+    'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
+    'cache-control': 'max-age=0',
+    # 'cookie': 'maex=%7B%22v2%22%3A%7B%7D%7D; GUC=AQEBBwFjY49jkEIa8gQo&s=AQAAABw20C7P&g=Y2JIFQ; A1=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A3=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc; A1S=d=AQABBBIpnmICEOnPTXZVmK6DESXgxq3niTMFEgEBBwGPY2OQYysNb2UB_eMBAAcIEimeYq3niTM&S=AQAAAobGawhriFKqJdu9-rSz9nc&j=WORLD; PRF=t%3D6954.T%252BTEL%252BSOLB.BR%252BSTM%252BEMR%252BGT%252BAMD%252BSYM.DE%252BPEMEX%252BSGO.PA%252BLRLCF%252BSYNH%252B001040.KS; cmp=t=1669714927&j=0&u=1---',
+    'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': "Windows",
+    'sec-fetch-dest': 'document',
+    'sec-fetch-mode': 'navigate',
+    'sec-fetch-site': 'same-origin',
+    'sec-fetch-user': '?1',
+    'upgrade-insecure-requests': '1',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
+}
+
+baseCore = BaseCore()
+log =baseCore.getLogger()
+cnx = pymysql.connect(host='114.115.159.144', user='caiji', password='zzsn9988', db='caiji',
+                      charset='utf8mb4')
+cursor = cnx.cursor()
+
+def get_file_name(headers):
+    filename = ''
+    if 'Content-Disposition' in headers and headers['Content-Disposition']:
+        disposition_split = headers['Content-Disposition'].split(';')
+        if len(disposition_split) > 1:
+            if disposition_split[1].strip().lower().startswith('filename='):
+                file_name = disposition_split[1].split('=')
+                if len(file_name) > 1:
+                    filename = file_name[1]
+    if not filename:
+        return baseCore.getNextSeq()+".pdf"
+    return filename
+
+def downFile(url,path):
+    try:
+        baseCore.mkPath(path)
+        proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
+        response = requests.get(url, proxies=proxy, headers=headers, verify=False,timeout=10)
+        fileName = get_file_name(response.headers)
+        with open(os.path.join(path, fileName), "wb") as pyFile:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    pyFile.write(chunk)
+    except Exception as e:
+        log.error(f"出错了----------{e}")
+        return False
+    return fileName
+if __name__ == '__main__':
+    while True :
+        selectSql = f"select id,url,website,ftype,stype,ttype from usvsrussia where state=0  order by id asc limit 1"
+        cursor.execute(selectSql)
+        data = cursor.fetchone()
+        if data:
+            id=data[0]
+            url=data[1]
+            website=data[2]
+            ftype=data[3]
+            stype=data[4]
+            ttype=data[5]
+            path=r'D:\美国VS俄罗斯制裁'
+            log.info(f"开始处理{url}----")
+            if website:
+                path = os.path.join(path, website)
+            if ftype:
+                path = os.path.join(path, ftype)
+            if stype:
+                path = os.path.join(path, stype)
+            if ttype:
+                path = os.path.join(path, ttype)
+
+            fileName = downFile(url,path)
+            if fileName:
+                updateSql = f"update usvsrussia set state=1,pdf_name='{fileName}' ,pdf_path='{escape_string(path)}' where id={id}"
+                log.info(f"开始处理{url}----处理ok")
+            else:
+                updateSql = f"update usvsrussia set state=2 where id={id}"
+                log.info(f"开始处理{url}----处理error")
+            cursor.execute(updateSql)
+            cnx.commit()
+        else:
+            log.info("数据处理完毕，程序退出")
+            break
+    url = 'https://ofac.treasury.gov/media/931946/download?inline'
+    log.info(f"{url}----开始下载")
+    downFile(url)
+    log.info(f"{url}----开始下载，下载完成")
+    baseCore.close()
+    cursor.close()
+    cnx.close()
\ No newline at end of file
--- a/tmp/usVsRussia/ofac.py
+++ b/tmp/usVsRussia/ofac.py