提交 fe5f0932 作者: 刘伟刚

python相关脚本提交

上级 a5dbff9a
...@@ -15,6 +15,23 @@ def conn11(): ...@@ -15,6 +15,23 @@ def conn11():
return conn,cursor return conn,cursor
#企业公告 #企业公告
def shizhiCodeFromSql():
conn,cursor=conn11()
try:
gn_query = "select securities_code from sys_base_enterprise_ipo where category in ('4','5','6') "
cursor.execute(gn_query)
gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result]
print('shizhiCodeFromSql开始将股票代码放入redis=======')
for item in gn_social_list:
r.rpush('NoticeEnterprise:shizhi_code', item)
print('shizhiCodeFromSql将股票代码放入redis结束')
except Exception as e:
log.info("数据查询异常")
finally:
cursor.close()
conn.close()
#企业公告
def yahooCodeFromSql(): def yahooCodeFromSql():
conn,cursor=conn11() conn,cursor=conn11()
try: try:
...@@ -22,10 +39,10 @@ def yahooCodeFromSql(): ...@@ -22,10 +39,10 @@ def yahooCodeFromSql():
cursor.execute(gn_query) cursor.execute(gn_query)
gn_result = cursor.fetchall() gn_result = cursor.fetchall()
gn_social_list = [item[0] for item in gn_result] gn_social_list = [item[0] for item in gn_result]
print('=======') print('yahooCodeFromSql开始将股票代码放入redis=======')
for item in gn_social_list: for item in gn_social_list:
r.rpush('NoticeEnterprise:securities_code', item) r.rpush('NoticeEnterprise:securities_code', item)
print('将股票代码放入redis结束') print('yahooCodeFromSql将股票代码放入redis结束')
except Exception as e: except Exception as e:
log.info("数据查询异常") log.info("数据查询异常")
finally: finally:
...@@ -38,28 +55,23 @@ def yahooCode_task(): ...@@ -38,28 +55,23 @@ def yahooCode_task():
# 每天执行一次 # 每天执行一次
# scheduler.add_job(yahooCodeFromSql, 'cron', hour=0,minute=0) # scheduler.add_job(yahooCodeFromSql, 'cron', hour=0,minute=0)
#3天执行一次 #3天执行一次
scheduler.add_job(yahooCodeFromSql, 'interval', days=3) scheduler.add_job(yahooCodeFromSql, 'cron', day='*/3', hour=0, minute=0)
# 每天执行一次
scheduler.add_job(shizhiCodeFromSql, 'cron', hour=10,minute=0)
try: try:
yahooCodeFromSql() # 定时开始前执行一次 yahooCodeFromSql() # 定时开始前执行一次
shizhiCodeFromSql() # 定时开始前执行一次
scheduler.start() scheduler.start()
except Exception as e: except Exception as e:
print('定时采集异常', e) print('定时采集异常', e)
pass pass
if __name__ == "__main__": if __name__ == "__main__":
start = time.time() start = time.time()
# NoticeEnterprise()
# AnnualEnterpriseIPO()
# AnnualEnterprise()
# BaseInfoEnterpriseAbroad()
# NewsEnterprise_task()
# NewsEnterprise()
# BaseInfoEnterprise()
# FBS()
# NoticeEnterprise_task()
# AnnualEnterprise_task()
# NoticeEnterprise()
yahooCode_task() yahooCode_task()
# yahooShizhiCode_task()
log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===') log.info(f'====={basecore.getNowTime(1)}=====添加数据成功======耗时:{basecore.getTimeCost(start,time.time())}===')
# cnx.close() # cnx.close()
# cursor.close() # cursor.close()
......
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# @Author: MENG
# @Time : 2022-4-9
import xlrd
from selenium.webdriver.support.wait import WebDriverWait
from tqdm import tqdm
import pymongo
import pymysql
import time
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
import json
from requests.packages import urllib3
urllib3.disable_warnings()
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import re
import datetime
"""
将请求链接分成三个
https://finance.yahoo.com/quote/WMT/financials?p=WMT
https://finance.yahoo.com/quote/WMT/balance-sheet?p=WMT
https://finance.yahoo.com/quote/WMT/cash-flow?p=WMT
雅虎财务数据流程修改
1.从sys_base_enterprise_ipo获取到国外上市和台湾企业的股票代码 category 5,6
2.从雅虎财经上请求获取财务数据和币种单位。
3.将数据插入更新到表config_finance_data_sync
信息更新的字段
social_credit_code
name
stock_code
content
level_relation
unit
create_time
4.将采集结果通知接口进行拉取数据处理
"""
# 雅虎财经处理表格
def deal_table(doc_resp):
all_dict = {}
resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(3)>div>div').children()
catalogue_title = pq(resp1_table[0]).text().split('\n')
doc_items = pq(resp1_table[1]).children()
if len(doc_items)<1:
resp1_table = doc_resp('#Col1-1-Financials-Proxy section div:nth-child(4)>div>div').children()
catalogue_title = pq(resp1_table[0]).text().split('\n')
doc_items = pq(resp1_table[1]).children()
catalogue_dict = {}
content_dict = {}
for doc_item in doc_items:
if pq(doc_item).text() == '':
continue
a = pq(pq(doc_item).children()[0]).text().split('\n')[0]
a_list = pq(pq(doc_item).children()[0]).text().split('\n')[1:]
content_dict[a] = a_list
b_dict = {}
for doc_item1 in pq(doc_item).children()[1]:
b = pq(pq(doc_item1).children()[0]).text().split('\n')[0]
if not b:
continue
b_list = pq(pq(doc_item1).children()[0]).text().split('\n')[1:]
content_dict[b] = b_list
c_dict = {}
for doc_item2 in pq(doc_item1).children()[1]:
c = pq(pq(doc_item2).children()[0]).text().split('\n')[0]
if not c:
continue
c_list = pq(pq(doc_item2).children()[0]).text().split('\n')[1:]
content_dict[c] = c_list
d_dict = {}
for doc_item3 in pq(doc_item2).children()[1]:
d = pq(pq(doc_item3).children()[0]).text().split('\n')[0]
if not d:
continue
d_list = pq(pq(doc_item3).children()[0]).text().split('\n')[1:]
content_dict[d] = d_list
e_dict = {}
for doc_item4 in pq(doc_item3).children()[1]:
e = pq(pq(doc_item4).children()[0]).text().split('\n')[0]
if not e:
continue
e_list = pq(pq(doc_item4).children()[0]).text().split('\n')[1:]
content_dict[e] = e_list
f_dict = {}
for doc_item5 in pq(doc_item4).children()[1]:
f = pq(pq(doc_item5).children()[0]).text().split('\n')[0]
if not f:
continue
f_list = pq(pq(doc_item5).children()[0]).text().split('\n')[1:]
content_dict[f] = f_list
g_dict = {}
for doc_item6 in pq(doc_item5).children()[1]:
g = pq(pq(doc_item6).children()[0]).text().split('\n')[0]
if not g:
continue
g_list = pq(pq(doc_item6).children()[0]).text().split('\n')[1:]
content_dict[g] = g_list
g_dict[g] = {}
f_dict[f] = g_dict
e_dict[e] = f_dict
d_dict[d] = e_dict
c_dict[c] = d_dict
b_dict[b] = c_dict
catalogue_dict[a] = b_dict
all_dict['表头'] = catalogue_title
all_dict['目录'] = catalogue_dict
all_dict['内容'] = content_dict
return all_dict
def get_webdriver():
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")
chrome_options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
# chrome_options.add_argument('--headless')
executable_path = r'D:\yahoo\yh2\cmd6\chromedriver.exe'
# 创建Chrome浏览器选项对象
# 设置浏览器应用程序路径
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver
# 雅虎财经
def get_content2():
driver=get_webdriver()
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
# sql1 = """SELECT id, stock_code, content from config_finance_data_sync WHERE origin_type = 1""" # and stock_code = "SYNH"
sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4') order by social_credit_code desc """ # and stock_code = "SYNH"
# sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code in ('005930.KS','1802.T','1803.T','028260.KS') """ # and stock_code = "SYNH"
cursor.execute(sql1)
result_data = cursor.fetchall()
for data in result_data:
try:
data_list = list(data)
print(data_list)
social_credit_code = data_list[0]
stock = data_list[1]
securities_short_name = data_list[2] if data_list[2] is not None else ""
# content_sql = ''
stock2=str(stock)
if stock2.upper().endswith("HK") and stock2.upper().startswith("0") :
stock2=stock2[1:]
url = f'https://finance.yahoo.com/quote/{stock2}/financials?p={stock2}'
try:
print(f'正在采集:{url}')
driver.get(url)
# 等待页面加载完成
wait = WebDriverWait(driver, 300)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(2)
try:
# driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
driver.find_element(By.XPATH,'//div[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span[text()="Expand All"]').click()
wait = WebDriverWait(driver, 60)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(2)
except Exception as e:
print(e)
pass
doc_resp1 = pq(driver.page_source)
unit=get_unit(doc_resp1)
financials1 = deal_table(doc_resp1)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5)
try:
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
time.sleep(5)
except Exception as e:
print(e)
pass
doc_resp2 = pq(driver.page_source)
financials2 = deal_table(doc_resp2)
driver.find_element(By.XPATH,'//div/span[text()="Balance Sheet"]').click()
time.sleep(5)
try: #//*[@id="Col1-1-Financials-Proxy"]/section/div[2]/button/div/span
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
time.sleep(5)
except Exception as e:
print(e)
pass
doc_resp3 = pq(driver.page_source)
financials3 = deal_table(doc_resp3)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5)
try:
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
time.sleep(5)
except Exception as e:
print(e)
pass
doc_resp4 = pq(driver.page_source)
financials4 = deal_table(doc_resp4)
driver.find_element(By.XPATH,'//div/span[text()="Cash Flow"]').click()
time.sleep(5)
try:
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
time.sleep(5)
except Exception as e:
print(e)
pass
doc_resp5 = pq(driver.page_source)
financials5 = deal_table(doc_resp5)
driver.find_element(By.XPATH,'//div/span[text()="Quarterly"]').click()
time.sleep(5)
try:
driver.find_element(By.XPATH,'//div/span[text()="Expand All"]').click()
time.sleep(5)
except Exception as e:
print(e)
pass
doc_resp6 = pq(driver.page_source)
financials6 = deal_table(doc_resp6)
financials_dict = {
'表1': financials1,
'表2': financials2,
'表3': financials3,
'表4': financials4,
'表5': financials5,
'表6': financials6,
}
mu_lus = ''
for i in range(1, 7):
mu_lu = financials_dict[f'表{i}']['目录']
mu_lu = json.dumps(mu_lu, ensure_ascii=False, indent=4)
mu_lus += mu_lu + '&&&&'
level_relation = mu_lus[:-4]
financials = ''
for i in range(1, 7):
a_list = financials_dict[f'表{i}']['表头']
for a in a_list:
financials += a + '\n'
b_dict = financials_dict[f'表{i}']['内容']
for key, values in b_dict.items():
financials += key + '\n'
for b in values:
financials += b + '\n'
financials += '&&&&' + '\n'
financials = financials.strip()
content = financials[:-4].strip().replace('\n&&&&\n', '&&&&')
# if content[:100] in str(content_sql).replace("\\n","\n"):
# print(f"{orc_id}:无最新数据")
# continue
# sql = "UPDATE config_finance_data_sync SET level_relation=%s, content=%s WHERE ID = %s"
# val = (level_relation, content, orc_id)
# cursor.execute(sql, val)
# conn.commit()
ipo_data={
'social_credit_code':social_credit_code,
'stock':stock,
'securities_short_name':securities_short_name,
'content':content,
'level_relation':level_relation,
'unit':unit,
'origin_type':1
}
flag=dataToSql(conn,cursor,ipo_data)
if flag:
# get_url = f'http://192.168.1.49:8088/sync/finance/yh?securitiesCode={stock}'
get_url = f'http://114.115.236.206:8088/sync/finance/yh?securitiesCode={stock}'
try:
resp = requests.get(get_url)
print(resp.text)
print('调用接口成功!!')
except:
with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f:
f.write(stock + '\n')
except Exception as e:
print(e)
print(f'采集:{url}失败')
driver.quit()
time.sleep(10)
driver=get_webdriver()
except:
# time.sleep(60 * 10)
driver.quit()
time.sleep(10)
driver=get_webdriver()
print('出错,重试中!')
continue
driver.close()
def dataToSql(conn,cursor,ipo_data):
try:
social_credit_code=ipo_data['social_credit_code']
stock=ipo_data['stock']
securities_short_name=ipo_data['securities_short_name']
content=ipo_data['content']
level_relation=ipo_data['level_relation']
unit=ipo_data['unit']
origin_type=ipo_data['origin_type']
if len(unit) == 0:
return False
if len(content) == 0:
return False
if len(level_relation) == 0:
return False
# 检查记录是否存在
select_sql=f"SELECT * FROM config_finance_data_sync WHERE stock_code='{stock}'"
cursor.execute(select_sql)
existing_record = cursor.fetchone()
# 获取当前时间
current_time = datetime.datetime.now()
# 将时间转换为字符串
currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
if existing_record:
# 记录已存在,执行更新操作
update_param=(social_credit_code,content,level_relation,origin_type,currentdate,stock)
update_sql=f"UPDATE config_finance_data_sync SET social_credit_code=%s , content=%s , level_relation=%s,origin_type=%s ,create_time=%s WHERE stock_code=%s "
cursor.execute(update_sql,update_param)
print('更新成功')
else:
insert_param=(social_credit_code,content,level_relation,unit,stock,origin_type,currentdate)
insert_sql=f"INSERT INTO config_finance_data_sync (social_credit_code, content,level_relation,unit,stock_code,origin_type,create_time) VALUES ( %s,%s, %s, %s, %s, %s, %s)"
# 记录不存在,执行插入操作
cursor.execute(insert_sql,insert_param)
print('插入成功')
# 提交事务
conn.commit()
except Exception as e:
return False
return True
def get_unit(doc_resp):
try:
resp1_table = doc_resp('#quote-header-info >div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
currency = pq(resp1_table[0]).text()
if 'Currency in' in currency:
result = re.findall(r'(?<=Currency in\s).*', currency)
currency=result[0]+'(千)'
except Exception as e:
currency=''
return currency
if __name__ == '__main__':
# parse_excel()
#get_content1()
get_content2()
# conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
# charset='utf8')
# cursor = conn.cursor()
# ipo_data={'social_credit_code': 'ZZSN22080900000014', 'stock': '005930.KS', 'securities_short_name': '', 'content': "Breakdown\nttm\n12/30/2022\n12/30/2021\n12/30/2020\n12/30/2019\nTotal Revenue\n302,231,360,000\n302,231,360,000\n279,604,799,000\n236,806,988,000\n230,400,881,000\nOperating Revenue\n302,231,360,000\n302,231,360,000\n279,604,799,000\n236,806,988,000\n230,400,881,000\nCost of Revenue\n190,041,770,000\n190,041,770,000\n166,411,342,000\n144,488,296,000\n147,239,549,000\nGross Profit\n112,189,590,000\n112,189,590,000\n113,193,457,000\n92,318,692,000\n83,161,332,000\nOperating Expense\n68,812,960,000\n68,812,960,000\n61,559,601,000\n56,324,816,000\n55,392,823,000\nSelling General and Administrative\n33,560,956,000\n33,560,956,000\n29,528,020,000\n26,084,793,000\n27,022,180,000\nGeneral & Administrative Expense\n5,993,246,000\n5,993,246,000\n4,840,946,000\n4,688,270,000\n5,172,398,000\nSelling & Marketing Expense\n27,567,710,000\n27,567,710,000\n24,687,074,000\n26,084,793,000\n27,022,180,000\nResearch & Development\n24,919,198,000\n24,919,198,000\n22,401,726,000\n21,111,490,000\n19,907,236,000\nOperating Income\n43,376,630,000\n43,376,630,000\n51,633,856,000\n35,993,876,000\n27,768,509,000\nNet Non Operating Interest Income Expense\n1,957,464,000\n1,957,464,000\n846,738,000\n1,391,445,000\n1,973,668,000\nInterest Income Non Operating\n2,720,479,000\n2,720,479,000\n1,278,278,000\n1,974,458,000\n2,660,024,000\nInterest Expense Non Operating\n763,015,000\n763,015,000\n431,540,000\n583,013,000\n686,356,000\nPretax Income\n46,440,474,000\n46,440,474,000\n53,351,827,000\n36,345,117,000\n30,432,189,000\nTax Provision\n-9,213,603,000\n-9,213,603,000\n13,444,377,000\n9,937,285,000\n8,693,324,000\nNet Income Common Stockholders\n54,730,018,000\n54,730,018,000\n39,243,791,000\n22,929,390,000\n18,899,137,000\nNet Income\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nNet Income Including Non-Controlling Interests\n55,654,077,000\n55,654,077,000\n39,907,450,000\n26,407,832,000\n21,738,865,000\nNet Income Continuous Operations\n55,654,077,000\n55,654,077,000\n39,907,450,000\n26,407,832,000\n21,738,865,000\nMinority Interests\n-924,059,000\n-924,059,000\n-663,659,000\n-316,986,000\n-233,811,000\nPreferred Stock Dividends\n-\n-\n-\n3,161,456,000\n2,605,917,000\nOtherunder Preferred Stock Dividend\n-\n-\n-\n0\n0\nDiluted NI Available to Com Stockholders\n-\n-\n-\n22,929,390,000\n18,899,137,000\nBasic EPS\n-\n8.06k\n5.78k\n3.84k\n3.17k\nDiluted EPS\n-\n8.06k\n5.78k\n3.84k\n3.17k\nBasic Average Shares\n-\n5,969,783\n6,793,109\n6,792,722\n6,792,500\nDiluted Average Shares\n-\n5,969,783\n6,793,109\n6,792,722\n6,792,500\nTotal Operating Income as Reported\n43,376,630,000\n43,376,630,000\n51,633,856,000\n35,993,876,000\n27,768,509,000\nTotal Expenses\n258,854,730,000\n258,854,730,000\n227,970,943,000\n200,813,112,000\n202,632,372,000\nNet Income from Continuing & Discontinued Operation\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nNormalized Income\n54,785,448,400\n54,785,448,400\n39,284,034,434\n25,897,644,758\n21,359,049,769\nInterest Income\n2,720,479,000\n2,720,479,000\n1,278,278,000\n1,974,458,000\n2,660,024,000\nInterest Expense\n763,015,000\n763,015,000\n431,540,000\n583,013,000\n686,356,000\nNet Interest Income\n1,957,464,000\n1,957,464,000\n846,738,000\n1,391,445,000\n1,973,668,000\nEBIT\n47,203,489,000\n47,203,489,000\n53,783,367,000\n36,928,130,000\n31,118,545,000\nEBITDA\n86,311,148,000\n-\n-\n-\n-\nReconciled Cost of Revenue\n190,041,770,000\n190,041,770,000\n166,411,342,000\n144,488,296,000\n147,239,549,000\nReconciled Depreciation\n39,107,659,000\n39,107,659,000\n34,247,361,000\n30,335,616,000\n29,597,638,000\nNet Income from Continuing Operation Net Minority Interest\n54,730,018,000\n54,730,018,000\n39,243,791,000\n26,090,846,000\n21,505,054,000\nTotal Unusual Items Excluding Goodwill\n-92,384,000\n-92,384,000\n-53,801,000\n265,903,000\n204,391,000\nTotal Unusual Items\n-92,384,000\n-92,384,000\n-53,801,000\n265,903,000\n204,391,000\nNormalized EBITDA\n86,403,532,000\n86,403,532,000\n88,084,529,000\n66,997,843,000\n60,511,792,000\nTax Rate for Calcs\n0\n0\n0\n0\n0\nTax Effect of Unusual Items\n-36,953,600\n-36,953,600\n-13,557,566\n72,701,758\n58,386,769&&&&Breakdown\nttm\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Revenue\n302,231,360,000\n63,745,371,000\n70,464,575,000\n76,781,680,000\n77,203,607,000\nOperating Revenue\n302,231,360,000\n63,745,371,000\n70,464,575,000\n76,781,680,000\n77,203,607,000\nCost of Revenue\n190,041,770,000\n46,007,093,000\n48,627,728,000\n48,072,237,000\n46,269,748,000\nGross Profit\n112,189,590,000\n17,738,278,000\n21,836,847,000\n28,709,443,000\n30,933,859,000\nOperating Expense\n68,812,960,000\n17,098,100,000\n17,530,716,000\n17,857,398,000\n16,836,814,000\nSelling General and Administrative\n33,560,956,000\n7,804,860,000\n8,474,365,000\n8,961,809,000\n8,078,864,000\nGeneral & Administrative Expense\n5,993,246,000\n1,446,500,000\n1,627,015,000\n1,576,562,000\n1,516,418,000\nSelling & Marketing Expense\n27,567,710,000\n6,358,360,000\n6,847,350,000\n7,385,247,000\n6,562,446,000\nResearch & Development\n24,919,198,000\n6,578,734,000\n6,472,511,000\n6,269,581,000\n6,254,874,000\nOperating Income\n43,376,630,000\n640,178,000\n4,306,131,000\n10,852,045,000\n14,097,045,000\nNet Non Operating Interest Income Expense\n1,957,464,000\n768,769,000\n790,554,000\n582,979,000\n364,130,000\nInterest Income Non Operating\n2,720,479,000\n1,060,446,000\n1,064,811,000\n799,442,000\n487,194,000\nInterest Expense Non Operating\n763,015,000\n291,677,000\n274,257,000\n216,463,000\n123,064,000\nTotal Other Finance Cost\n-\n-\n-\n-657,417,000\n-240,906,000\nPretax Income\n46,440,474,000\n1,826,380,000\n5,054,918,000\n11,854,958,000\n14,460,758,000\nTax Provision\n-9,213,603,000\n251,780,000\n-18,786,528,000\n2,465,760,000\n3,361,953,000\nNet Income Common Stockholders\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNet Income\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNet Income Including Non-Controlling Interests\n55,654,077,000\n1,574,600,000\n23,841,446,000\n9,389,198,000\n11,098,805,000\nNet Income Continuous Operations\n55,654,077,000\n1,574,600,000\n23,841,446,000\n9,389,198,000\n11,098,805,000\nMinority Interests\n-924,059,000\n-173,348,000\n-338,937,000\n-245,298,000\n-144,290,000\nBasic EPS\n-\n206.00\n-\n1.35k\n1.61k\nDiluted EPS\n-\n206.00\n-\n1.35k\n1.61k\nBasic Average Shares\n-\n5,969,783\n-\n5,969,783\n5,969,783\nDiluted Average Shares\n-\n5,969,783\n-\n5,969,783\n5,969,783\nTotal Operating Income as Reported\n43,376,630,000\n640,178,000\n4,306,131,000\n10,852,045,000\n14,097,045,000\nTotal Expenses\n258,854,730,000\n63,105,193,000\n66,158,444,000\n65,929,635,000\n63,106,562,000\nNet Income from Continuing & Discontinued Operation\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nNormalized Income\n54,785,448,400\n1,305,451,577\n23,577,614,600\n9,179,200,500\n10,939,510,141\nInterest Income\n2,720,479,000\n1,060,446,000\n1,064,811,000\n799,442,000\n487,194,000\nInterest Expense\n763,015,000\n291,677,000\n274,257,000\n216,463,000\n123,064,000\nNet Interest Income\n1,957,464,000\n768,769,000\n790,554,000\n582,979,000\n364,130,000\nEBIT\n47,203,489,000\n2,118,057,000\n5,329,175,000\n12,071,421,000\n14,583,822,000\nEBITDA\n86,311,148,000\n-\n-\n-\n-\nReconciled Cost of Revenue\n190,041,770,000\n46,007,093,000\n48,627,728,000\n48,072,237,000\n46,269,748,000\nReconciled Depreciation\n39,107,659,000\n9,586,493,000\n9,649,058,000\n9,837,727,000\n9,841,366,000\nNet Income from Continuing Operation Net Minority Interest\n54,730,018,000\n1,401,252,000\n23,502,509,000\n9,143,900,000\n10,954,515,000\nTotal Unusual Items Excluding Goodwill\n-92,384,000\n111,119,000\n-125,176,000\n-44,571,000\n19,550,000\nTotal Unusual Items\n-92,384,000\n111,119,000\n-125,176,000\n-44,571,000\n19,550,000\nNormalized EBITDA\n86,403,532,000\n11,593,431,000\n15,103,409,000\n21,953,719,000\n24,405,638,000\nTax Rate for Calcs\n0\n0\n0\n0\n0\nTax Effect of Unusual Items\n-36,953,600\n15,318,577\n-50,070,400\n-9,270,500\n4,545,141&&&&Breakdown\n12/30/2022\n12/30/2021\n12/30/2020\n12/30/2019\nTotal Assets\n448,424,507,000\n426,621,158,000\n378,235,718,000\n352,564,497,000\nCurrent Assets\n218,470,581,000\n218,163,185,000\n198,215,579,000\n181,385,260,000\nCash, Cash Equivalents & Short Term Investments\n115,227,286,000\n124,150,192,000\n124,652,843,000\n108,779,703,000\nCash And Cash Equivalents\n49,680,710,000\n39,031,415,000\n29,382,578,000\n26,885,999,000\nCash\n-\n-\n-\n32,861,000\nCash Equivalents\n49,680,710,000\n39,031,415,000\n29,382,578,000\n26,853,138,000\nOther Short Term Investments\n65,546,576,000\n85,118,777,000\n95,270,265,000\n81,893,704,000\nInventory\n52,187,866,000\n41,384,404,000\n32,043,145,000\n26,766,464,000\nRaw Materials\n16,268,974,000\n14,864,486,000\n10,837,169,000\n8,764,714,000\nWork in Process\n21,612,965,000\n13,967,331,000\n11,818,090,000\n9,886,634,000\nFinished Goods\n18,625,019,000\n14,445,566,000\n9,387,886,000\n8,115,116,000\nInventories Adjustments Allowances\n-4,319,092,000\n-1,892,979,000\n-1,324,492,000\n-1,424,906,000\nPrepaid Assets\n2,867,823,000\n2,336,252,000\n2,266,100,000\n3,833,053,000\nAssets Held for Sale Current\n-\n-\n929,432,000\n-\nOther Current Assets\n6,316,834,000\n5,081,665,000\n7,359,001,000\n6,874,697,000\nTotal non-current assets\n229,953,926,000\n208,457,973,000\n180,020,139,000\n171,179,237,000\nNet PPE\n168,045,388,000\n149,928,539,000\n128,952,892,000\n119,825,474,000\nGross PPE\n427,595,058,000\n377,471,994,000\n328,606,313,000\n304,075,769,000\nProperties\n0\n0\n0\n0\nLand And Improvements\n10,024,569,000\n9,943,570,000\n9,850,942,000\n9,828,309,000\nBuildings And Improvements\n67,713,808,000\n62,651,459,000\n55,026,369,000\n48,839,439,000\nMachinery Furniture Equipment\n303,000,627,000\n274,909,571,000\n233,056,501,000\n211,416,021,000\nOther Properties\n13,248,490,000\n11,958,070,000\n10,496,584,000\n10,061,981,000\nConstruction in Progress\n33,607,564,000\n18,009,324,000\n20,175,917,000\n23,930,019,000\nAccumulated Depreciation\n-259,549,670,000\n-227,543,455,000\n-199,653,421,000\n-184,250,295,000\nGoodwill And Other Intangible Assets\n20,217,754,000\n20,236,244,000\n18,468,502,000\n20,703,504,000\nGoodwill\n6,014,422,000\n5,844,259,000\n5,673,642,000\n6,250,439,000\nOther Intangible Assets\n14,203,332,000\n14,391,985,000\n12,794,860,000\n14,453,065,000\nDefined Pension Benefit\n5,851,972,000\n2,809,590,000\n1,355,502,000\n589,832,000\nOther Non Current Assets\n6,012,671,000\n5,571,099,000\n5,113,279,000\n7,994,050,000\nTotal Liabilities Net Minority Interest\n93,674,903,000\n121,721,227,000\n102,287,702,000\n89,684,076,000\nCurrent Liabilities\n78,344,852,000\n88,117,133,000\n75,604,351,000\n63,782,764,000\nCurrent Provisions\n5,844,907,000\n5,372,872,000\n4,349,563,000\n4,068,627,000\nCurrent Debt And Capital Lease Obligation\n6,236,477,000\n15,017,761,000\n17,269,528,000\n15,239,558,000\nCurrent Debt\n5,362,458,000\n14,205,859,000\n16,558,747,000\n14,393,468,000\nCurrent Capital Lease Obligation\n874,019,000\n811,902,000\n710,781,000\n805,068,000\nOther Current Liabilities\n1,951,354,000\n1,492,239,000\n1,127,719,000\n1,037,030,000\nTotal Non Current Liabilities Net Minority Interest\n15,330,051,000\n33,604,094,000\n26,683,351,000\n25,901,312,000\nLong Term Provisions\n1,928,518,000\n2,306,994,000\n1,051,428,000\n611,100,000\nLong Term Debt And Capital Lease Obligation\n4,096,765,000\n3,374,388,000\n2,947,853,000\n3,172,479,000\nLong Term Debt\n569,939,000\n509,732,000\n948,137,000\n975,298,000\nLong Term Capital Lease Obligation\n3,526,826,000\n2,864,656,000\n1,999,716,000\n2,197,181,000\nTradeand Other Payables Non Current\n2,753,305,000\n2,991,440,000\n1,682,910,000\n2,184,249,000\nOther Non Current Liabilities\n1,171,761,000\n1,267,183,000\n1,725,857,000\n2,408,896,000\nTotal Equity Gross Minority Interest\n354,749,604,000\n304,899,931,000\n275,948,016,000\n262,880,421,000\nStockholders' Equity\n345,186,142,000\n296,237,697,000\n267,670,331,000\n254,915,472,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n897,514,000\n897,514,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n337,946,407,000\n293,064,763,000\n271,068,211,000\n254,582,894,000\nOther Equity Interest\n-\n-\n26,726,000\n60,429,000\nMinority Interest\n9,563,462,000\n8,662,234,000\n8,277,685,000\n7,964,949,000\nTotal Capitalization\n345,756,081,000\n296,747,429,000\n268,618,468,000\n255,890,770,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n345,066,675,000\n296,118,230,000\n267,670,331,000\n254,915,472,000\nCapital Lease Obligations\n4,400,845,000\n3,676,558,000\n2,710,497,000\n2,197,181,000\nNet Tangible Assets\n324,968,388,000\n276,001,453,000\n249,201,829,000\n234,211,968,000\nWorking Capital\n140,125,729,000\n130,046,052,000\n122,611,228,000\n117,602,496,000\nInvested Capital\n350,999,072,000\n310,833,821,000\n285,177,215,000\n270,284,238,000\nTangible Book Value\n324,848,921,000\n275,881,986,000\n249,201,829,000\n234,211,968,000\nTotal Debt\n10,333,242,000\n18,392,149,000\n20,217,381,000\n18,412,037,000\nShare Issued\n6,792,669\n5,969,783\n5,969,783\n5,970,148\nOrdinary Shares Number\n6,792,669\n5,969,783\n5,969,783\n5,969,783\nPreferred Shares Number\n822,887\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0&&&&Breakdown\n3/30/2023\n12/30/2022\n9/29/2022\n6/29/2022\nTotal Assets\n454,091,777,000\n448,424,507,000\n470,278,409,000\n448,040,650,000\nCurrent Assets\n214,442,141,000\n218,470,581,000\n250,880,637,000\n236,287,491,000\nCash, Cash Equivalents & Short Term Investments\n108,182,472,000\n115,227,286,000\n128,816,099,000\n125,320,664,000\nCash And Cash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nCash Equivalents\n72,949,377,000\n49,680,710,000\n44,515,421,000\n39,583,141,000\nOther Short Term Investments\n35,233,095,000\n65,546,576,000\n84,300,678,000\n85,737,523,000\nInventory\n54,419,586,000\n52,187,866,000\n57,319,848,000\n52,092,241,000\nRaw Materials\n16,195,959,000\n16,268,974,000\n20,342,418,000\n18,865,194,000\nWork in Process\n25,120,646,000\n21,612,965,000\n19,542,019,000\n16,005,040,000\nFinished Goods\n19,498,692,000\n18,625,019,000\n21,498,093,000\n20,192,551,000\nInventories Adjustments Allowances\n-6,395,711,000\n-4,319,092,000\n-4,062,682,000\n-2,970,544,000\nPrepaid Assets\n3,416,279,000\n2,867,823,000\n3,837,266,000\n3,141,748,000\nOther Current Assets\n6,063,246,000\n6,316,834,000\n7,514,181,000\n6,016,127,000\nTotal non-current assets\n239,649,636,000\n229,953,926,000\n219,397,772,000\n211,753,159,000\nNet PPE\n171,857,516,000\n168,045,388,000\n160,343,568,000\n154,254,576,000\nGross PPE\n171,857,516,000\n427,595,058,000\n160,343,568,000\n154,254,576,000\nProperties\n-\n0\n-\n-\nLand And Improvements\n-\n10,024,569,000\n-\n-\nBuildings And Improvements\n-\n67,713,808,000\n-\n-\nMachinery Furniture Equipment\n-\n303,000,627,000\n-\n-\nOther Properties\n171,857,516,000\n13,248,490,000\n160,343,568,000\n154,254,576,000\nConstruction in Progress\n-\n33,607,564,000\n-\n-\nAccumulated Depreciation\n-\n-259,549,670,000\n-\n-\nGoodwill And Other Intangible Assets\n23,617,703,000\n20,217,754,000\n21,484,857,000\n20,096,926,000\nGoodwill\n-\n6,014,422,000\n-\n-\nOther Intangible Assets\n23,617,703,000\n14,203,332,000\n21,484,857,000\n20,096,926,000\nDefined Pension Benefit\n5,268,877,000\n5,851,972,000\n1,805,994,000\n2,131,474,000\nOther Non Current Assets\n7,935,941,000\n6,012,671,000\n6,635,316,000\n6,780,128,000\nTotal Liabilities Net Minority Interest\n94,292,361,000\n93,674,903,000\n125,371,520,000\n120,133,986,000\nCurrent Liabilities\n76,057,448,000\n78,344,852,000\n85,285,669,000\n83,362,268,000\nCurrent Provisions\n7,011,788,000\n5,844,907,000\n5,965,963,000\n5,995,790,000\nCurrent Debt And Capital Lease Obligation\n5,640,162,000\n6,236,477,000\n8,671,917,000\n13,941,999,000\nCurrent Debt\n4,692,764,000\n5,362,458,000\n7,766,222,000\n13,093,570,000\nCurrent Capital Lease Obligation\n947,398,000\n874,019,000\n905,695,000\n848,429,000\nOther Current Liabilities\n1,895,698,000\n1,951,354,000\n2,236,141,000\n1,859,476,000\nTotal Non Current Liabilities Net Minority Interest\n18,234,913,000\n15,330,051,000\n40,085,851,000\n36,771,718,000\nLong Term Provisions\n2,180,502,000\n1,928,518,000\n2,367,173,000\n2,278,231,000\nLong Term Debt And Capital Lease Obligation\n4,301,816,000\n4,096,765,000\n3,790,088,000\n3,496,991,000\nLong Term Debt\n4,301,816,000\n569,939,000\n622,640,000\n562,283,000\nLong Term Capital Lease Obligation\n-\n3,526,826,000\n3,167,448,000\n2,934,708,000\nTradeand Other Payables Non Current\n5,108,064,000\n2,753,305,000\n3,128,781,000\n2,871,992,000\nOther Non Current Liabilities\n1,771,822,000\n1,171,761,000\n1,327,785,000\n1,265,979,000\nTotal Equity Gross Minority Interest\n359,799,416,000\n354,749,604,000\n344,906,889,000\n327,906,664,000\nStockholders' Equity\n350,019,928,000\n345,186,142,000\n335,470,176,000\n318,830,612,000\nCapital Stock\n897,514,000\n897,514,000\n897,514,000\n897,514,000\nPreferred Stock\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock\n778,047,000\n778,047,000\n778,047,000\n778,047,000\nAdditional Paid in Capital\n4,403,893,000\n4,403,893,000\n4,403,893,000\n4,403,893,000\nRetained Earnings\n336,881,852,000\n337,946,407,000\n316,890,272,000\n310,216,785,000\nMinority Interest\n9,779,488,000\n9,563,462,000\n9,436,713,000\n9,076,052,000\nTotal Capitalization\n354,321,744,000\n345,756,081,000\n336,092,816,000\n319,392,895,000\nPreferred Stock Equity\n119,467,000\n119,467,000\n119,467,000\n119,467,000\nCommon Stock Equity\n349,900,461,000\n345,066,675,000\n335,350,709,000\n318,711,145,000\nCapital Lease Obligations\n947,398,000\n4,400,845,000\n4,073,143,000\n3,783,137,000\nNet Tangible Assets\n326,402,225,000\n324,968,388,000\n313,985,319,000\n298,733,686,000\nWorking Capital\n138,384,693,000\n140,125,729,000\n165,594,968,000\n152,925,223,000\nInvested Capital\n358,895,041,000\n350,999,072,000\n343,739,571,000\n332,366,998,000\nTangible Book Value\n326,282,758,000\n324,848,921,000\n313,865,852,000\n298,614,219,000\nTotal Debt\n9,941,978,000\n10,333,242,000\n12,462,005,000\n17,438,990,000\nShare Issued\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nOrdinary Shares Number\n6,792,669\n6,792,669\n5,969,783\n5,969,783\nPreferred Shares Number\n0\n822,887\n822,887\n822,887\nTreasury Shares Number\n0\n0\n0\n0", 'level_relation': '{\n "Total Revenue": {\n "Operating Revenue": {}\n },\n "Cost of Revenue": {},\n "Gross Profit": {},\n "Operating Expense": {\n "Selling General and Administrative": {\n "General & Administrative Expense": {},\n "Selling & Marketing Expense": {}\n },\n "Research & Development": {}\n },\n "Operating Income": {},\n "Net Non Operating Interest Income Expense": {\n "Interest Income Non Operating": {},\n "Interest Expense Non Operating": {}\n },\n "Pretax Income": {},\n "Tax Provision": {},\n "Net Income Common Stockholders": {\n "Net Income": {\n "Net Income Including Non-Controlling Interests": {\n "Net Income Continuous Operations": {}\n },\n "Minority Interests": {}\n },\n "Preferred Stock Dividends": {},\n "Otherunder Preferred Stock Dividend": {}\n },\n "Diluted NI Available to Com Stockholders": {},\n "Basic EPS": {},\n "Diluted EPS": {},\n "Basic Average Shares": {},\n "Diluted Average Shares": {},\n "Total Operating Income as Reported": {},\n "Total Expenses": {},\n "Net Income from Continuing & Discontinued Operation": {},\n "Normalized Income": {},\n "Interest Income": {},\n "Interest Expense": {},\n "Net Interest Income": {},\n "EBIT": {},\n "EBITDA": {},\n "Reconciled Cost of Revenue": {},\n "Reconciled Depreciation": {},\n "Net Income from Continuing Operation Net Minority Interest": {},\n "Total Unusual Items Excluding Goodwill": {},\n "Total Unusual Items": {},\n "Normalized EBITDA": {},\n "Tax Rate for Calcs": {},\n "Tax Effect of Unusual Items": {}\n}&&&&{\n "Total Revenue": {\n "Operating Revenue": {}\n },\n "Cost of Revenue": {},\n "Gross Profit": {},\n "Operating Expense": {\n "Selling General and Administrative": {\n "General & Administrative Expense": {},\n "Selling & Marketing Expense": {}\n },\n "Research & Development": {}\n },\n "Operating Income": {},\n "Net Non Operating Interest Income Expense": {\n "Interest Income Non Operating": {},\n "Interest Expense Non Operating": {},\n "Total Other Finance Cost": {}\n },\n "Pretax Income": {},\n "Tax Provision": {},\n "Net Income Common Stockholders": {\n "Net Income": {\n "Net Income Including Non-Controlling Interests": {\n "Net Income Continuous Operations": {}\n },\n "Minority Interests": {}\n }\n },\n "Basic EPS": {},\n "Diluted EPS": {},\n "Basic Average Shares": {},\n "Diluted Average Shares": {},\n "Total Operating Income as Reported": {},\n "Total Expenses": {},\n "Net Income from Continuing & Discontinued Operation": {},\n "Normalized Income": {},\n "Interest Income": {},\n "Interest Expense": {},\n "Net Interest Income": {},\n "EBIT": {},\n "EBITDA": {},\n "Reconciled Cost of Revenue": {},\n "Reconciled Depreciation": {},\n "Net Income from Continuing Operation Net Minority Interest": {},\n "Total Unusual Items Excluding Goodwill": {},\n "Total Unusual Items": {},\n "Normalized EBITDA": {},\n "Tax Rate for Calcs": {},\n "Tax Effect of Unusual Items": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash": {},\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Assets Held for Sale Current": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {},\n "Other Equity Interest": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}&&&&{\n "Total Assets": {\n "Current Assets": {\n "Cash, Cash Equivalents & Short Term Investments": {\n "Cash And Cash Equivalents": {\n "Cash Equivalents": {}\n },\n "Other Short Term Investments": {}\n },\n "Inventory": {\n "Raw Materials": {},\n "Work in Process": {},\n "Finished Goods": {},\n "Inventories Adjustments Allowances": {}\n },\n "Prepaid Assets": {},\n "Other Current Assets": {}\n },\n "Total non-current assets": {\n "Net PPE": {\n "Gross PPE": {\n "Properties": {},\n "Land And Improvements": {},\n "Buildings And Improvements": {},\n "Machinery Furniture Equipment": {},\n "Other Properties": {},\n "Construction in Progress": {}\n },\n "Accumulated Depreciation": {}\n },\n "Goodwill And Other Intangible Assets": {\n "Goodwill": {},\n "Other Intangible Assets": {}\n },\n "Defined Pension Benefit": {},\n "Other Non Current Assets": {}\n }\n },\n "Total Liabilities Net Minority Interest": {\n "Current Liabilities": {\n "Current Provisions": {},\n "Current Debt And Capital Lease Obligation": {\n "Current Debt": {},\n "Current Capital Lease Obligation": {}\n },\n "Other Current Liabilities": {}\n },\n "Total Non Current Liabilities Net Minority Interest": {\n "Long Term Provisions": {},\n "Long Term Debt And Capital Lease Obligation": {\n "Long Term Debt": {},\n "Long Term Capital Lease Obligation": {}\n },\n "Tradeand Other Payables Non Current": {},\n "Other Non Current Liabilities": {}\n }\n },\n "Total Equity Gross Minority Interest": {\n "Stockholders\' Equity": {\n "Capital Stock": {\n "Preferred Stock": {},\n "Common Stock": {}\n },\n "Additional Paid in Capital": {},\n "Retained Earnings": {}\n },\n "Minority Interest": {}\n },\n "Total Capitalization": {},\n "Preferred Stock Equity": {},\n "Common Stock Equity": {},\n "Capital Lease Obligations": {},\n "Net Tangible Assets": {},\n "Working Capital": {},\n "Invested Capital": {},\n "Tangible Book Value": {},\n "Total Debt": {},\n "Share Issued": {},\n "Ordinary Shares Number": {},\n "Preferred Shares Number": {},\n "Treasury Shares Number": {}\n}', 'unit': 'KRW(千)'}
# dataToSql(conn,cursor,ipo_data)
...@@ -115,69 +115,155 @@ class CurrencyRate(object): ...@@ -115,69 +115,155 @@ class CurrencyRate(object):
if __name__ == '__main__': if __name__ == '__main__':
result_list1 = [ result_list1 = [
[ ['人民币','CNY'],
'人民币', ['港元','HKD'],
'CNY'], ['台币','TWD'],
[ ['欧元','EUR'],
'美元', ['美元','USD'],
'USD'], ['英镑','GBP'],
[ ['澳元','AUD'],
'欧元', ['韩元','KRW'],
'EUR'], ['日元','JPY'],
[ ['澳元','AUD'],
'瑞士法郎', ['阿尔巴尼亚列克','ALL'],
'CHF'], ['阿尔及利亚第纳尔','DZD'],
[ ['阿根廷比索','ARS'],
'加元', ['阿鲁巴岛弗罗林','AWG'],
'CAD'], ['澳元','AUD'],
[ ['埃及镑','EGP'],
'波兰兹罗提', ['埃塞俄比亚比尔','ETB'],
'PLN'], ['澳门元','MOP'],
[ ['阿曼里亚尔','OMR'],
'英镑', ['阿联酋迪拉姆','AED'],
'GBP'], ['巴哈马元','BSD'],
[ ['巴林第纳尔','BHD'],
'澳元', ['巴巴多斯元','BBD'],
'AUD'], ['白俄罗斯卢布','BYR'],
[ ['伯利兹元','BZD'],
'泰铢', ['百慕大元','BMD'],
'THB'], ['不丹卢比','BTN'],
[ ['玻利维亚诺','BOB'],
'沙特里亚尔', ['博茨瓦纳普拉','BWP'],
'SAR'], ['巴西里亚伊','BRL'],
[ ['保加利亚列瓦','BGN'],
'巴西里亚伊', ['布隆迪法郎','BIF'],
'BRL'], ['冰岛克朗','ISK'],
[ ['巴基斯坦卢比','PKR'],
'新土耳其新里拉', ['巴拿马巴尔博亚','PAB'],
'TRY'], ['巴布亚新几内亚基那','PGK'],
[ ['巴拉圭瓜拉尼','PYG'],
'新台币', ['波兰兹罗提','PLN'],
'TWD'], ['朝鲜圆','KPW'],
[ ['多哥非洲共同体法郎','XOF'],
'印度卢比', ['丹麦克朗','DKK'],
'INR'], ['多米尼加比索','DOP'],
[ ['俄罗斯卢布','RUB'],
'墨西哥比索', ['佛得角埃斯库多','CVE'],
'MXN'], ['福克兰群岛镑','FKP'],
[ ['斐济元','FJD'],
'日元', ['菲律宾比索','PHP'],
'JPY'], ['港元','HKD'],
[ ['刚果中非共同体法郎','XAF'],
'瑞典克朗', ['哥伦比亚比索','COP'],
'SEK'], ['哥斯达黎加科朗','CRC'],
[ ['古巴比索','CUP'],
'韩元', ['格林纳达东加勒比元','XCD'],
'KRW'], ['冈比亚达拉西','GMD'],
[ ['圭亚那元','GYD'],
'俄罗斯卢布', ['韩元','KRW'],
'RUB'], ['海地古德','HTG'],
[ ['洪都拉斯伦皮拉','HNL'],
'新加坡元', ['哈萨克斯坦腾格','KZT'],
'SGD'], ['柬埔寨利尔斯','KHR'],
[ ['加拿大元','CAD'],
'港币', ['捷克克朗','CZK'],
'HKD']] ['吉布提法郎','DJF'],
['几内亚法郎','GNF'],
['科摩罗法郎','KMF'],
['克罗地亚库纳','HRK'],
['肯尼亚先令','KES'],
['科威特第纳尔','KWD'],
['卡塔尔利尔','QAR'],
['老挝基普','LAK'],
['拉脱维亚拉图','LVL'],
['黎巴嫩镑','LBP'],
['莱索托洛提','LSL'],
['利比里亚元','LRD'],
['利比亚第纳尔','LYD'],
['立陶宛里塔斯','LTL'],
['列斯荷兰盾','ANG'],
['罗马尼亚新列伊','RON'],
['卢旺达法郎','RWF'],
['美元','USD'],
['孟加拉塔卡','BDT'],
['马其顿第纳尔','MKD'],
['马拉维克瓦查','MWK'],
['马来西亚林吉特','MYR'],
['马尔代夫卢非亚','MVR'],
['毛里塔尼亚乌吉亚','MRO'],
['毛里求斯卢比','MUR'],
['墨西哥比索','MXN'],
['摩尔多瓦列伊','MDL'],
['蒙古图格里克','MNT'],
['摩洛哥道拉姆','MAD'],
['缅甸元','MMK'],
['秘鲁索尔','PEN'],
['纳米比亚元','NAD'],
['尼泊尔卢比','NPR'],
['尼加拉瓜科多巴','NIO'],
['尼日利亚奈拉','NGN'],
['挪威克朗','NOK'],
['南非兰特','ZAR'],
['欧元','EUR'],
['日元','JPY'],
['瑞典克朗','SEK'],
['瑞士法郎','CHF'],
['萨尔瓦多科朗','SVC'],
['萨摩亚塔拉','WST'],
['圣多美多布拉','STD'],
['沙特阿拉伯里亚尔','SAR'],
['塞舌尔法郎','SCR'],
['塞拉利昂利昂','SLL'],
['所罗门群岛元','SBD'],
['索马里先令','SOS'],
['斯里兰卡卢比','LKR'],
['圣赫勒拿群岛磅','SHP'],
['斯威士兰里兰吉尼','SZL'],
['台币','TWD'],
['土耳其新里拉','TRY'],
['太平洋法郎','XPF'],
['坦桑尼亚先令','TZS'],
['泰国铢','THB'],
['汤加潘加','TOP'],
['特立尼达和多巴哥元','TTD'],
['突尼斯第纳尔','TND'],
['文莱元','BND'],
['危地马拉格查尔','GTQ'],
['乌克兰赫夫米','UAH'],
['乌拉圭新比索','UYU'],
['瓦努阿图瓦图','VUV'],
['越南盾','VND'],
['匈牙利福林','HUF'],
['新西兰元','NZD'],
['新加坡元','SGD'],
['叙利亚镑','SYP'],
['英镑','GBP'],
['印度卢比','INR'],
['印度尼西亚卢比(盾)','IDR'],
['伊朗里亚尔','IRR'],
['伊拉克第纳尔','IQD'],
['以色列镑','ILS'],
['牙买加元','JMD'],
['约旦第纳尔','JOD'],
['也门里亚尔','YER'],
['智利比索','CLP'],
['直布罗陀镑','GIP'],
['铜价盎司','XCP'],
['金价盎司','XAU'],
['钯价盎司','XPD'],
['铂价盎司','XPT'],
['银价盎司','XAG']
]
result_list2 = [ result_list2 = [
'USD', 'USD',
'CNY'] 'CNY']
......
# -*- coding: utf-8 -*-
import datetime
from selenium.webdriver.support.wait import WebDriverWait
import time
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
from requests.packages import urllib3
urllib3.disable_warnings()
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import configparser
import redis
class CurrencyRate(object):
def __init__(self):
self.config = configparser.ConfigParser()
# 读取配置文件
self.config.read('config.ini')
self.r = redis.Redis(host=self.config.get('redis', 'host'),
port=self.config.get('redis', 'port'),
password=self.config.get('redis', 'pass'), db=6)
# self.driver=self.get_webdriver()
def get_webdriver(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument('--headless')
chrome_options.binary_location = self.config.get('selenium', 'binary_location')
executable_path =self.config.get('selenium', 'chrome_driver')
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver
def resHtml(self,url):
header={
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Cookie':'PHPSESSID=vii39tk7djrlebgqbdson80oj0; Hm_lvt_431865d92b681d30c9b5170d5d64ea44=1693794941; Hm_lpvt_431865d92b681d30c9b5170d5d64ea44=1693795260',
'Host':'www.waihui999.com',
'Pragma':'no-cache',
'Referer':'https://www.waihui999.com/HUFcny/',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"'
}
res = requests.get(url,headers=header)
res.encoding = 'utf-8'
html=res.text
# print(html)
return html
def getRate(self):
rateList=[]
for result1 in result_list1:
currency_name = result1[0]
currency = result1[1]
to_USD = ''
to_CNY = ''
for i in range(len(result_list2)):
result2 = result_list2[i]
# https://qq.ip138.com/hl.asp?from=CNY&to=USD&q=1
# url = f'''https://qq.ip138.com/hl.asp?from={currency}&to={result2}&q=1'''
url = f'''https://www.waihui999.com/{currency}{result2}/#100'''
url=url.lower()
# 等待页面加载完成
try:
# self.driver.get(url)
# wait = WebDriverWait(self.driver, 300)
# wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(2)
html=self.resHtml(url)
doc_resp = pq(html)
money = doc_resp('div[class="ft"]>table>tbody>tr>td:nth-child(2)').text()
if money == '1':
money_result = money
else:
try:
money_result = round(float(money), 4)
except:
continue
if i == 0:
to_USD = money_result
else:
to_CNY = money_result
except Exception as e:
print('请求异常!!')
# try:
# self.driver.close()
# self.driver.quit()
# except Exception as e:
# print(e)
# self.driver=self.get_webdriver()
now = datetime.datetime.now()
now_time = now.strftime('%Y-%m-%d')
if to_USD == '' or to_CNY == '':
continue
result_dict = {
'币种': currency_name,
'币简称': currency,
'对美元': to_USD,
'对人民币': to_CNY,
'更新时间': now_time }
print(result_dict)
rate={
"currencyName": currency_name,
"currencyCode": currency,
"rateToUSD": to_USD,
"rateToCNY": to_CNY,
"createDate": now_time
}
rateList.append(rate)
# market_url = f'http://192.168.1.39:8088/sync/currencyRate'
market_url = f'http://114.115.236.206:8088/sync/currencyRate'
try:
resp = requests.post(market_url,json=rateList)
# 检查响应状态码
print("请求成功")
if resp.status_code == 200:
print("请求成功")
# 打印响应内容
print(resp.content)
else:
print("请求失败")
except Exception as e:
print(e)
if __name__ == '__main__':
result_list1 = [
['人民币','CNY'],
['港元','HKD'],
['台币','TWD'],
['欧元','EUR'],
['美元','USD'],
['英镑','GBP'],
['澳元','AUD'],
['韩元','KRW'],
['日元','JPY'],
['澳元','AUD'],
['阿尔巴尼亚列克','ALL'],
['阿尔及利亚第纳尔','DZD'],
['阿根廷比索','ARS'],
['阿鲁巴岛弗罗林','AWG'],
['澳元','AUD'],
['埃及镑','EGP'],
['埃塞俄比亚比尔','ETB'],
['澳门元','MOP'],
['阿曼里亚尔','OMR'],
['阿联酋迪拉姆','AED'],
['巴哈马元','BSD'],
['巴林第纳尔','BHD'],
['巴巴多斯元','BBD'],
['白俄罗斯卢布','BYR'],
['伯利兹元','BZD'],
['百慕大元','BMD'],
['不丹卢比','BTN'],
['玻利维亚诺','BOB'],
['博茨瓦纳普拉','BWP'],
['巴西里亚伊','BRL'],
['保加利亚列瓦','BGN'],
['布隆迪法郎','BIF'],
['冰岛克朗','ISK'],
['巴基斯坦卢比','PKR'],
['巴拿马巴尔博亚','PAB'],
['巴布亚新几内亚基那','PGK'],
['巴拉圭瓜拉尼','PYG'],
['波兰兹罗提','PLN'],
['朝鲜圆','KPW'],
['多哥非洲共同体法郎','XOF'],
['丹麦克朗','DKK'],
['多米尼加比索','DOP'],
['俄罗斯卢布','RUB'],
['佛得角埃斯库多','CVE'],
['福克兰群岛镑','FKP'],
['斐济元','FJD'],
['菲律宾比索','PHP'],
['港元','HKD'],
['刚果中非共同体法郎','XAF'],
['哥伦比亚比索','COP'],
['哥斯达黎加科朗','CRC'],
['古巴比索','CUP'],
['格林纳达东加勒比元','XCD'],
['冈比亚达拉西','GMD'],
['圭亚那元','GYD'],
['韩元','KRW'],
['海地古德','HTG'],
['洪都拉斯伦皮拉','HNL'],
['哈萨克斯坦腾格','KZT'],
['柬埔寨利尔斯','KHR'],
['加拿大元','CAD'],
['捷克克朗','CZK'],
['吉布提法郎','DJF'],
['几内亚法郎','GNF'],
['科摩罗法郎','KMF'],
['克罗地亚库纳','HRK'],
['肯尼亚先令','KES'],
['科威特第纳尔','KWD'],
['卡塔尔利尔','QAR'],
['老挝基普','LAK'],
['拉脱维亚拉图','LVL'],
['黎巴嫩镑','LBP'],
['莱索托洛提','LSL'],
['利比里亚元','LRD'],
['利比亚第纳尔','LYD'],
['立陶宛里塔斯','LTL'],
['列斯荷兰盾','ANG'],
['罗马尼亚新列伊','RON'],
['卢旺达法郎','RWF'],
['美元','USD'],
['孟加拉塔卡','BDT'],
['马其顿第纳尔','MKD'],
['马拉维克瓦查','MWK'],
['马来西亚林吉特','MYR'],
['马尔代夫卢非亚','MVR'],
['毛里塔尼亚乌吉亚','MRO'],
['毛里求斯卢比','MUR'],
['墨西哥比索','MXN'],
['摩尔多瓦列伊','MDL'],
['蒙古图格里克','MNT'],
['摩洛哥道拉姆','MAD'],
['缅甸元','MMK'],
['秘鲁索尔','PEN'],
['纳米比亚元','NAD'],
['尼泊尔卢比','NPR'],
['尼加拉瓜科多巴','NIO'],
['尼日利亚奈拉','NGN'],
['挪威克朗','NOK'],
['南非兰特','ZAR'],
['欧元','EUR'],
['日元','JPY'],
['瑞典克朗','SEK'],
['瑞士法郎','CHF'],
['萨尔瓦多科朗','SVC'],
['萨摩亚塔拉','WST'],
['圣多美多布拉','STD'],
['沙特阿拉伯里亚尔','SAR'],
['塞舌尔法郎','SCR'],
['塞拉利昂利昂','SLL'],
['所罗门群岛元','SBD'],
['索马里先令','SOS'],
['斯里兰卡卢比','LKR'],
['圣赫勒拿群岛磅','SHP'],
['斯威士兰里兰吉尼','SZL'],
['台币','TWD'],
['土耳其新里拉','TRY'],
['太平洋法郎','XPF'],
['坦桑尼亚先令','TZS'],
['泰国铢','THB'],
['汤加潘加','TOP'],
['特立尼达和多巴哥元','TTD'],
['突尼斯第纳尔','TND'],
['文莱元','BND'],
['危地马拉格查尔','GTQ'],
['乌克兰赫夫米','UAH'],
['乌拉圭新比索','UYU'],
['瓦努阿图瓦图','VUV'],
['越南盾','VND'],
['匈牙利福林','HUF'],
['新西兰元','NZD'],
['新加坡元','SGD'],
['叙利亚镑','SYP'],
['英镑','GBP'],
['印度卢比','INR'],
['印度尼西亚卢比(盾)','IDR'],
['伊朗里亚尔','IRR'],
['伊拉克第纳尔','IQD'],
['以色列镑','ILS'],
['牙买加元','JMD'],
['约旦第纳尔','JOD'],
['也门里亚尔','YER'],
['智利比索','CLP'],
['直布罗陀镑','GIP'],
['铜价盎司','XCP'],
['金价盎司','XAU'],
['钯价盎司','XPD'],
['铂价盎司','XPT'],
['银价盎司','XAG']
]
result_list2 = [
'USD',
'CNY']
currenRate=CurrencyRate()
currenRate.getRate()
currenRate.driver.quit()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
...@@ -50,11 +50,15 @@ class Shizhi(object): ...@@ -50,11 +50,15 @@ class Shizhi(object):
charset='utf8') charset='utf8')
cursor = conn.cursor() cursor = conn.cursor()
return conn,cursor return conn,cursor
def getCodeFromRedis(self):
securitiescode=self.r.lpop('NoticeEnterprise:shizhi_code')
securitiescode = securitiescode.decode('utf-8')
return securitiescode
def getmarketCap(self): def getmarketCap(self,securitiescode):
conn,cursor=self.conn11() conn,cursor=self.conn11()
try: try:
sql1 = """select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where category in ('4','5','6') """ # and stock_code = "SYNH" sql1 = f"select social_credit_code,securities_code,securities_short_name from sys_base_enterprise_ipo where securities_code='{securitiescode}' " # and stock_code = "SYNH"
cursor.execute(sql1) cursor.execute(sql1)
result_data = cursor.fetchall() result_data = cursor.fetchall()
except Exception as e: except Exception as e:
...@@ -75,21 +79,24 @@ class Shizhi(object): ...@@ -75,21 +79,24 @@ class Shizhi(object):
url = f'https://finance.yahoo.com/quote/{stock2}?p={stock2}' url = f'https://finance.yahoo.com/quote/{stock2}?p={stock2}'
try: try:
self.logger.info(f'正在采集:{url}') self.logger.info(f'正在采集:{url}')
# 设置页面加载超时时间为10秒
self.driver.set_page_load_timeout(60)
self.driver.get(url) self.driver.get(url)
# 等待页面加载完成 # 等待页面加载完成
wait = WebDriverWait(self.driver, 300) wait = WebDriverWait(self.driver, 60)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body"))) wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(5) time.sleep(5)
doc_resp = pq(self.driver.page_source) doc_resp = pq(self.driver.page_source)
unit=doc_resp('div[id="quote-header-info"]>div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span') unit=doc_resp('div[id="quote-header-info"]>div:nth-child(2)>div:nth-child(1)>div:nth-child(2)>span')
currency = unit.text().split("Currency in ")[1] currency = unit.text().split("Currency in ")[1]
currency =currency.split("(")[0]
market_cap=doc_resp('td[data-test="MARKET_CAP-value"]') market_cap=doc_resp('td[data-test="MARKET_CAP-value"]')
marketcap=market_cap.text() marketcap=market_cap.text()
if marketcap and marketcap!='N/A': if marketcap and marketcap!='N/A':
# 获取当前时间 # 获取当前时间
current_time = datetime.datetime.now() current_time = datetime.datetime.now()
currentdate = current_time.strftime("%Y-%m-%d") currentdate = current_time.strftime("%Y-%m-%d")
print(f'信用代码:{social_credit_code} 股票代码:{stock} 币种:{currency} 市值:{marketcap} 日期:{currentdate}') self.logger.info(f'信用代码:{social_credit_code} 股票代码:{stock} 币种:{currency} 市值:{marketcap} 日期:{currentdate}')
# market_url = f'http://192.168.1.39:8088/sync/marketValue' # market_url = f'http://192.168.1.39:8088/sync/marketValue'
market_url = f'http://114.115.236.206:8088/sync/marketValue' market_url = f'http://114.115.236.206:8088/sync/marketValue'
param= { param= {
...@@ -103,26 +110,33 @@ class Shizhi(object): ...@@ -103,26 +110,33 @@ class Shizhi(object):
resp = requests.post(market_url,json=param) resp = requests.post(market_url,json=param)
# 检查响应状态码 # 检查响应状态码
if resp.status_code == 200: if resp.status_code == 200:
print("请求成功") self.logger.info("调用接口请求成功")
# 打印响应内容 # 打印响应内容
print(resp.content) self.logger.info(resp.content)
else: else:
print("请求失败") self.logger.info("调用接口请求失败")
except: except:
with open('雅虎财经-财务数据_发送错误ID.txt', 'a', encoding='utf8')as f: self.logger.info("调用接口请求失败")
f.write(stock + '\n')
except Exception as e: except Exception as e:
self.driver.close() self.logger.info('请求异常!重新打开浏览器')
self.driver.quit() self.driver.quit()
self.driver=self.get_webdriver() self.driver=self.get_webdriver()
print(e)
except Exception as e: except Exception as e:
print(e)
self.driver.close()
self.driver.quit() self.driver.quit()
self.driver=self.get_webdriver() self.driver=self.get_webdriver()
self.logger.info(f'{securitiescode}股票的市值采集结束')
if __name__ == '__main__': if __name__ == '__main__':
shizhi=Shizhi() shizhi=Shizhi()
shizhi.getmarketCap() # shizhi.getmarketCap()
\ No newline at end of file while True:
securitiescode=''
try:
securitiescode=shizhi.getCodeFromRedis()
shizhi.getmarketCap(securitiescode)
except Exception as e:
shizhi.logger.info("redis为空等待5分钟")
if securitiescode:
shizhi.r.rpush('NoticeEnterprise:shizhi_code',securitiescode)
else:
time.sleep(300)
\ No newline at end of file
import datetime
import time
import redis
import requests
import urllib3
from pyquery import PyQuery as pq
import json
from kafka import KafkaProducer
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
#务院政策问答平台最新发布信息采集
def reqHtml(url,data,header):
try:
proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
json_data=json.dumps(data)
response = requests.post(url,data=json_data,headers=header,verify=False,timeout=10)
print(response.status_code)
html=response.text
except Exception as e:
html=''
return html
def page_list():
header = {
'Host':'xcx.www.gov.cn',
'Connection':'keep-alive',
'Content-Length':'72',
'x-tif-openid':'ojyj-41lGcemgsREMHBh1ac7iZUw',
'x-tif-did':'pb5XUGL1Zm',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8379',
'x-tif-sid':'e1436792814f1c6845af4d84cbc4ad9957',
'Content-Type':'application/json',
'xweb_xhr':'1',
'dgd-pre-release':'0',
'x-yss-page':'publicService/pages/policyQALibrary/index/index',
'x-yss-city-code':'4400',
'Accept':'*/*',
'Sec-Fetch-Site':'cross-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://servicewechat.com/wxbebb3cdd9b331046/713/page-frame.html',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh'
}
url='https://xcx.www.gov.cn/ebus/gwymp/api/r/faqlib/GetPolicyList'
for i in range(1,445):
print(f'采集第{i}页数据')
k=i
da='{"filterType":"","departmentid":"","keyword":"","page_size":15,"page":[k]}'
data=da.replace('[k]',str(k))
try:
data=json.loads(data)
lhtml=reqHtml(url,data,header)
hjson=json.loads(lhtml)
data=hjson['data']['list']
except Exception as e:
print(e)
time.sleep(60)
continue
for ss in data:
id=ss['id']
durl=f'https://xcx.www.gov.cn/ebus/gwymp/api/r/faqlib/GetPolicy'
sourceAddress=f'https://bmfw.www.gov.cn/zcdwpt/index.html#/detail?id={id}'
try:
flag=r.sismember('IN-20230829-0146',sourceAddress)
if flag:
print('信息已采集入库过')
continue
except Exception as e:
continue
ss['url']=durl
ss['sourceAddress']=sourceAddress
detailpaser(ss)
# time.sleep(5)
def detailpaser(dmsg):
hh={
'Host':'xcx.www.gov.cn',
'Connection':'keep-alive',
'Content-Length':'14',
'x-tif-openid':'ojyj-41lGcemgsREMHBh1ac7iZUw',
'x-tif-did':'pb5XUGL1Zm',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8379',
'x-tif-sid':'e1436792814f1c6845af4d84cbc4ad9957',
'Content-Type':'application/json',
'xweb_xhr':'1',
'dgd-pre-release':'0',
'x-yss-page':'publicService/pages/policyQALibrary/detail/detail',
'x-yss-city-code':'4400',
'Accept':'*/*',
'Sec-Fetch-Site':'cross-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://servicewechat.com/wxbebb3cdd9b331046/713/page-frame.html',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh'
}
try:
durl=dmsg['url']
id=str(dmsg['id'])
data={"id":id}
json_data=json.dumps(data)
response = requests.post(durl,data=json_data,headers=hh,verify=False,timeout=10)
dhtml=response.text
dd=json.loads(dhtml)
sendTokafka(dd)
except Exception as e:
print(e)
print(dhtml)
def sendTokafka(ddata):
dd=ddata['data']
title=dd['title']
id=dd['id']
content=dd['content']
contentWithTag=dd['content']
publishTime=dd['publishTime']
time_format='%Y年%m月%d日'
publishDate=str(datetime.datetime.strptime(publishTime, time_format))
origin=dd['departmentName']
sourceAddress=f'https://bmfw.www.gov.cn/zcdwpt/index.html#/detail?id={id}'
sid='1696404919115825153'
info_code='IN-20230829-0146'
aa_dict = {
'content': content,
'contentWithTag': contentWithTag,
'id': '',
'sid': sid,
'origin': origin,
'publishDate': publishDate,
'sourceAddress': sourceAddress,
'title': title,
'source': 'python定制采集',
'type': ''
}
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
try:
kafka_result = producer.send("crawlerInfo", json.dumps(aa_dict, ensure_ascii=False).encode('utf8'))
r.sadd(info_code,sourceAddress)
print('发送kafka成功!')
except Exception as e:
print(e)
finally:
producer.close()
# r.close()
if __name__ == '__main__':
r = redis.Redis(host='114.115.236.206', port=6379,password='clbzzsn', db=5)
page_list()
from datetime import datetime
from urllib.parse import urljoin
import redis
import requests
import urllib3
from bs4 import BeautifulSoup
from kafka import KafkaProducer
from pyquery import PyQuery as pq
import json
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def reqHtml(url):
try:
proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
header={
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Cookie':'__jsluid_s=d344baee4a1e027b745a48855ff6539d',
'Host':'www.miit.gov.cn',
'Pragma':'no-cache',
'Referer':'https://www.miit.gov.cn/zwgk/zcjd/index.html',
'Sec-Fetch-Dest':'empty',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Site':'same-origin',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'X-Requested-With':'XMLHttpRequest',
'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"'
}
response = requests.get(url,headers=header,verify=False,timeout=10)
code=response.status_code
print(f'url:{url} 信息的采集状态码{code}')
html=response.text
except Exception as e:
html=''
return html
# 将html中的相对地址转换成绝对地址
def paserUrl(html,listurl):
soup = BeautifulSoup(html, 'html.parser')
# 获取所有的<a>标签和<img>标签
links = soup.find_all(['a', 'img'])
# 遍历标签,将相对地址转换为绝对地址
for link in links:
if 'href' in link.attrs:
link['href'] = urljoin(listurl, link['href'])
elif 'src' in link.attrs:
link['src'] = urljoin(listurl, link['src'])
return soup
def page_list():
for i in range(1,27):
print(f"采集到第{i}页!!")
aurl='https://www.miit.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit?webId=8d828e408d90447786ddbe128d495e9e&pageId=1b56e5adc362428299dfc3eb444fe23a&parseType=buildstatic&pageType=column&tagId=右侧内容&tplSetId=209741b2109044b5b7695700b2bec37e&paramJson={"pageNo":[i],"pageSize":"24"}'
url=aurl.replace('[i]',str(i))
html=reqHtml(url)
text=json.loads(html)
html=text['data']['html']
soup=paserUrl(html,'https://www.miit.gov.cn/zwgk/zcjd/index.html')
html=str(soup.prettify())
doc=pq(html)
ll=doc('li[class="cf"]')
for list in ll:
ldoc=pq(list)
title=ldoc('a').text()
url=ldoc('a').attr('href')
# url='https://www.miit.gov.cn'+url
try:
flag=r.sismember('IN-20230829-0199',url)
if flag:
print(f'信息已采集入库{title}')
continue
except Exception as e:
continue
publishdate=ldoc('span').text()
dmsg={
'title':title,
'url':url,
'publishdate':publishdate
}
print(f'列表信息: title:{title} url:{url} time:{publishdate}')
detail(dmsg)
def detail(dmsg):
try:
durl=dmsg['url']
title=dmsg['title']
publishTime=dmsg['publishdate']
html=reqHtml(durl)
soup=paserUrl(html,durl)
con=soup.select('div[id="con_con"]')[0]
contentWithTag=con.prettify()
content=con.text
if content:
pass
else:
content=contentWithTag
ddata={
'title':title,
'publishTime':publishTime,
'sourceAddress':durl,
'content':content,
'contentWithTag':contentWithTag,
'origin':'中华人民共和国工业和信息化部-政务公开-政策解读',
}
sendTokafka(ddata)
except Exception as e:
print(e)
def sendTokafka(ddata):
title=ddata['title']
content=ddata['content']
contentWithTag=ddata['contentWithTag']
publishTime=ddata['publishTime']
sourceAddress=ddata['sourceAddress']
origin=ddata['origin']
time_format='%Y-%m-%d'
publishDate=str(datetime.strptime(publishTime, time_format))
sid='1696452056436424706'
info_code='IN-20230829-0199'
aa_dict = {
'content': content,
'contentWithTag': contentWithTag,
'id': '',
'sid': sid,
'origin': origin,
'publishDate': publishDate,
'sourceAddress': sourceAddress,
'title': title,
'source': 'python定制采集',
'type': ''
}
producer = KafkaProducer(bootstrap_servers=['114.115.159.144:9092'])
try:
kafka_result = producer.send("crawlerInfo", json.dumps(aa_dict, ensure_ascii=False).encode('utf8'))
r.sadd(info_code,sourceAddress)
print('发送kafka结束')
except Exception as e:
print(e)
print('发送kafka异常!')
finally:
producer.close()
# r.close()
if __name__ == '__main__':
r = redis.Redis(host='114.115.236.206', port=6379,password='clbzzsn', db=5)
page_list()
print('采集结束===')
\ No newline at end of file
主要采集了两个网站
1、中国政府网-最新发布 数据是从微信小程序中获取的跟网站的数据是一致的 http://bmfw.www.gov.cn/zcdwpt/index.html#/
2、中华人民共和国工业和信息化部-政务公开-政策解读 http://bmfw.www.gov.cn/zcdwpt/index.html#/
主要是获取对应的资讯信息
import time
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from pyquery import PyQuery as pq
import json
import re
from openpyxl import Workbook
import pandas as pd
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
class Chaoxing(object):
def __init__(self):
self.cookie='__dxca=d1027170-19cf-4c12-850d-a2491d365f21; cookiecheck=true; AID_dsr=7209; msign_dsr=1693016311917; search_uuid=93063aab%2d552d%2d44a5%2d97dd%2d69291e006407; mqs=19e3b526c24d63961c594850f09cc91fb2684007674e38588d7c2e084ce6404e6d58f013518d47b7966204b865bf74394b6aea37199a56bd226a4f6e20ecf6dd7a56bfaeb2a1db23e4446bf79392c19a3b7ac3c5ffa4f00ee4b101eb614dfa827ff7dd70f5c2e05994258c957fef2435; qkindustry=; lv=0; chaoxinguser=1; uname=""; _uid=294788899; uf=f9866f9a46b70622f1364c77ecd4d7131ab59cdd60bf892e7a4fb73fc6375849f0d3025d42acdc547f3affbfde451bc49b0594e13f4b452fbdd6b93a43158491db9a01fd759e1b9870b8e6462cc1afdfe506c5241298ab1b; _d=1693016401839; UID=294788899; vc=F998B685897E257FBE4CBDEB36BC4781; vc2=CEE00B2BE090389C97B44000FEFC16C6; vc3=H4qn7owTnyWvR0ubBUMWGf4zX3U0pgoj59Bk4URCwnrBZc1M4ywPJxorV%2B2PhJeMN6sb2DBo7XuPQ%2BEpdtQuXWg1XLj8Z2ZYbFY0X2fYHunmK9tjFteI8BN1V0nXUCUOxAIkpIBcwaPx3D%2BXqRilmQRaYTS66L7i2VoD9GfiQjQ%3D312e1dbb9c6d3a7eea186e0579ee47b6; cx_p_token=17e6b9b56f8636b05c074933cc668ced; xxtenc=ec455945739f206ee2b2e416e997970e; DSSTASH_LOG=C_0-UN_0-US_294788899-T_1693016401841; duxiu=userName%5fdsr%2c%3dzhizhentest%2c%21userid%5fdsr%2c%3d21498%2c%21char%5fdsr%2c%3d%2c%21metaType%2c%3d260%2c%21dsr%5ffrom%2c%3d0%2c%21logo%5fdsr%2c%3dlogo0408%2ejpg%2c%21logosmall%5fdsr%2c%3dsmall0408%2ejpg%2c%21title%5fdsr%2c%3d%u8d85%u661f%u53d1%u73b0%2c%21url%5fdsr%2c%3d%2c%21compcode%5fdsr%2c%3d%2c%21province%5fdsr%2c%3d%u5176%u5b83%2c%21readDom%2c%3d0%2c%21isdomain%2c%3d17153%2c%21showcol%2c%3d0%2c%21hu%2c%3d0%2c%21uscol%2c%3d0%2c%21isfirst%2c%3d0%2c%21istest%2c%3d1%2c%21cdb%2c%3d0%2c%21og%2c%3d0%2c%21ogvalue%2c%3d0%2c%21testornot%2c%3d1%2c%21remind%2c%3d0%2c%21datecount%2c%3d2315%2c%21userIPType%2c%3d2%2c%21lt%2c%3d0%2c%21ttt%2c%3dfxlogin%2echaoxing%2c%21enc%5fdsr%2c%3dFCCD81D85DEC4A17B4F4F17DFD4F5515; historySearchWord=%25E5%2586%25B6%25E9%2587%2591%252C6%252C0831%253B%25E4%25BA%25A4%25E4%25BA%2592%25E6%2596%25B0%25E9%2597%25BB%252C1%252C0826; JSESSIONID=E6662D3AE489E4BFB69B272D6216A15E.fx4210'
self.driver=self.webdriver()
def resGetHtml(self,url):
try:
proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
header = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Cookie': self.cookie,
'Host':'qikan.chaoxing.com',
'Pragma':'no-cache',
'Referer':'https://qikan.chaoxing.com/searchjour?sw=%E5%86%B6%E9%87%91&topsearch=0&size=50',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"'
}
# url='https://qikan.chaoxing.com/searchjour?sw=%E5%86%B6%E9%87%91&topsearch=0&size=50'
response = requests.get(url,headers=header,verify=False,timeout=10)
print(f"请求返回的code码{response.status_code}")
html=response.text
print(html)
except Exception as e:
html=''
return html
def webdriver(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument('--headless')
# 添加个人资料路径参数
profile_path=r'C:\Users\WIN10\AppData\Local\Google\Chrome\User Data\Default'
chrome_options.add_argument(f'--user-data-dir={profile_path}')
chrome_options.binary_location =r'D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe'
executable_path =r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
driver = webdriver.Chrome(options=chrome_options, executable_path=executable_path)
return driver
def paserUrl(self,html,listurl):
soup = BeautifulSoup(html, 'html.parser')
# 获取所有的<a>标签和<img>标签
links = soup.find_all(['a', 'img'])
# 遍历标签,将相对地址转换为绝对地址
for link in links:
if 'href' in link.attrs:
link['href'] = urljoin(listurl, link['href'])
elif 'src' in link.attrs:
link['src'] = urljoin(listurl, link['src'])
return soup
def rmTagattr(self,html,url):
# 使用BeautifulSoup解析网页内容
# soup = BeautifulSoup(html, 'html.parser')
soup = self.paserUrl(html,url)
# 遍历所有标签,并去掉属性
for tag in soup.find_all(True):
if tag.name == 'img':
tag.attrs = {key: value for key, value in tag.attrs.items() if key == 'src'}
elif tag.name !='img':
tag.attrs = {key: value for key, value in tag.attrs.items() if key == 'src'}
else:
tag.attrs = {key: value for key, value in tag.attrs.items()}
# 打印去掉属性后的网页内容
# print(soup.prettify())
html=soup.prettify()
return html
def pageList(self):
for i in range(1,100):
url=f'https://qikan.chaoxing.com/searchjour?sw=%E5%86%B6%E9%87%91&nosim=1&size=50&x=0_7209&pages={i}'
print(f'采集第{i}页数据')
# html=self.resGetHtml(url)
try:
self.driver.get(url)
time.sleep(5)
html=self.driver.page_source
except Exception as e:
self.driver.close()
self.driver=self.webdriver()
soup=self.paserUrl(html,url)
text=soup.prettify()
doc=pq(text)
trtext=doc('table[class="listTable"]>tbody>tr')
trdoc=pq(trtext)
for td in trdoc:
time.sleep(5)
tddoc=pq(td)
title=tddoc("td:nth-child(2)>a").text()
title=re.sub(r'\s+', '', title)
turl=tddoc("td:nth-child(2)>a").attr('href')
author=tddoc("td:nth-child(3)").text()
author=re.sub(r'\s+', '', author)
source=tddoc("td:nth-child(4)").text()
source=re.sub(r'\s+', '', source)
cloumn=tddoc("td:nth-child(5)").text()
cloumn=re.sub(r'\s+', '', cloumn)
yearqi=tddoc("td:nth-child(6)").text()
yearqi=re.sub(r'\s+', '', yearqi)
type=tddoc("td:nth-child(7)").text()
type=re.sub(r'\s+', '', type)
yincount=tddoc("td:nth-child(8)").text()
yincount=re.sub(r'\s+', '', yincount)
readcount=tddoc("td:nth-child(9)").text()
readcount=re.sub(r'\s+', '', readcount)
pdfurl=tddoc("td:nth-child(10)>a").attr('href')
# print(f'title:{title} turl:{turl} author:{author} source:{source} cloumn:{cloumn} yearqi:{yearqi} type{type} yincount:{yincount} readcount:{readcount} pdfurl:{pdfurl}')
detailmsg={
'turl':turl,
'yearqi':yearqi,
'type':type,
'yincount':yincount,
'readcount':readcount,
# 'pdfurl':pdfurl
}
print(f"解析详情页面地址:{turl}")
self.paserDetail(detailmsg)
def paserDetail(self,detailmsg):
detailList=[]
durl=detailmsg['turl']
# html=self.resGetHtml(durl)
try:
self.driver.get(durl)
time.sleep(5)
html=self.driver.page_source
except Exception as e:
self.driver.close()
self.driver=self.webdriver()
soup=self.paserUrl(html,durl)
text=soup.prettify()
ddoc=pq(text)
title=ddoc('h1[class="F_titel"]').text()
atxt=ddoc('p[class="F_name"]>sup')
atxt.empty()
author=ddoc('p[class="F_name"]').text()
sup=ddoc.find('tr:nth-child(1)>td:nth-child(2)>sup')
sup.empty()
jigou=ddoc.find('tr:nth-child(1)>td:nth-child(2)').text()
sup=ddoc.find('tr:nth-child(2)>td:nth-child(2)>sup')
source=ddoc.find('tr:nth-child(2)>td:nth-child(2)').text()
sup=ddoc.find('tr:nth-child(3)>td:nth-child(2)>sup')
classnum=ddoc.find('tr:nth-child(3)>td:nth-child(2)').text()
classnav=ddoc.find('tr:nth-child(4)>td:nth-child(2)').text()
keyword=ddoc.find('tr:nth-child(5)>td:nth-child(2)').text()
jijin=ddoc.find('tr:nth-child(6)>td:nth-child(2)').text()
summ=ddoc.find('tr:nth-child(7)>td:nth-child(2)').text()
pdfurl=ddoc.find('a[class="pdfdown"]').attr('href')
contentTag=ddoc.find('div[id="FtextCon"]')
content=contentTag.text()
contentWithTag=self.rmTagattr(str(contentTag),durl)
detailmsg['title']=title
detailmsg['author']=author
detailmsg['source']=source
detailmsg['classnum']=classnum
detailmsg['classnav']=classnav
detailmsg['keyword']=keyword
detailmsg['jijin']=jijin
detailmsg['summ']=summ
detailmsg['pdfurl']=pdfurl
detailmsg['content']=content
detailmsg['contentWithTag']=contentWithTag
detailList.append(detailmsg)
print(f"详情数据入口{pdfurl}")
self.writerToExcel(detailList)
def writerToExcel(self,detailList):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
print('保存成功!!')
if __name__ == '__main__':
filename=f'超星期刊.xlsx'
# # 创建一个工作簿
workbook = Workbook()
workbook.save(filename)
chaoxing=Chaoxing()
chaoxing.pageList()
# -*- coding: utf-8 -*-
import datetime
import time
import pymysql
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from pyquery import PyQuery as pq
from openpyxl import Workbook
import pandas as pd
class WanfangSpider(object):
def __init__(self):
pass
def req(self,url):
header={
"accept":"*/*",
"connection":"Keep-Alive",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
res = requests.get(url,headers=header)
if res.status_code==200:
text=res.text
print('请求成功!')
else:
text=''
print('请求失败!')
return text
# 将html中的相对地址转换成绝对地址
def paserUrl(self,html,listurl):
soup = BeautifulSoup(html, 'html.parser')
# 获取所有的<a>标签和<img>标签
links = soup.find_all(['a', 'img'])
# 遍历标签,将相对地址转换为绝对地址
for link in links:
if 'href' in link.attrs:
link['href'] = urljoin(listurl, link['href'])
elif 'src' in link.attrs:
link['src'] = urljoin(listurl, link['src'])
return soup
def pageList(self,start,end):
listmsg=[]
# for num in range(1,1321):
for num in range(start,end):
url=f'https://kms.wanfangdata.com.cn/IndustryYJ/Search/Cecdb?q=%E5%86%B6%E9%87%91%2B%E5%86%B6%E7%82%BC&PageNumber={num}'
html=self.req(url)
soup=self.paserUrl(html,url)
text=str(soup.prettify())
doc=pq(text)
liTag=doc('li[class="rt-wrap"]')
# print(liTag)
for li in liTag:
lidoc=pq(li)
title=lidoc('a[class="title"]').text()
turl=lidoc('a[class="title"]').attr('href')
msg={
'title':title,
'turl':turl
}
print(f'title:{title} url:{url}')
listmsg.append(msg)
return listmsg
def detailMsg(self,msg):
detailList=[]
turl = msg['turl']
title = msg['title']
html=self.req(turl)
soup=self.paserUrl(html,turl)
dtext=str(soup.prettify())
ddoc=pq(dtext)
a1=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(1)').text().replace(":","")
institutionType=ddoc('table[class="detail-md"]>tr:nth-child(2)>td:nth-child(2)').text()
a2=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(1)').text().replace(":","")
formerName=ddoc('table[class="detail-md"]>tr:nth-child(3)>td:nth-child(2)').text()
a3=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(1)').text().replace(":","")
leader=ddoc('table[class="detail-md"]>tr:nth-child(4)>td:nth-child(2)').text()
a4=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(1)').text().replace(":","")
establishmentDate=ddoc('table[class="detail-md"]>tr:nth-child(5)>td:nth-child(2)').text()
a5=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(1)').text().replace(":","")
introduction=ddoc('table[class="detail-md"]>tr:nth-child(6)>td:nth-child(2)').text()
a6=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(1)').text().replace(":","")
classification=ddoc('table[class="detail-md"]>tr:nth-child(7)>td:nth-child(2)').text()
a7=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(1)').text().replace(":","")
keywords=ddoc('table[class="detail-md"]>tr:nth-child(8)>td:nth-child(2)').text()
a8=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(1)').text().replace(":","")
researchEquipment=ddoc('table[class="detail-md"]>tr:nth-child(9)>td:nth-child(2)').text()
a9=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(1)').text().replace(":","")
researchAreas=ddoc('table[class="detail-md"]>tr:nth-child(10)>td:nth-child(2)').text()
a10=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(1)').text().replace(":","")
awards=ddoc('table[class="detail-md"]>tr:nth-child(11)>td:nth-child(2)').text()
a11=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(1)').text().replace(":","")
internalDepartments=ddoc('table[class="detail-md"]>tr:nth-child(12)>td:nth-child(2)').text()
a12=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(1)').text().replace(":","")
subsidiaryInstitutions=ddoc('table[class="detail-md"]>tr:nth-child(13)>td:nth-child(2)').text()
a13=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(1)').text().replace(":","")
productInformation=ddoc('table[class="detail-md"]>tr:nth-child(14)>td:nth-child(2)').text()
a14=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(1)').text().replace(":","")
publicationJournals=ddoc('table[class="detail-md"]>tr:nth-child(15)>td:nth-child(2)').text()
a15=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(1)').text().replace(":","")
mailingAddress=ddoc('table[class="detail-md"]>tr:nth-child(16)>td:nth-child(2)').text()
a16=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(1)').text().replace(":","")
tel=ddoc('table[class="detail-md"]>tr:nth-child(17)>td:nth-child(2)').text()
a17=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(1)').text().replace(":","")
faxNumber=ddoc('table[class="detail-md"]>tr:nth-child(18)>td:nth-child(2)').text()
a18=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(1)').text().replace(":","")
email=ddoc('table[class="detail-md"]>tr:nth-child(19)>td:nth-child(2)').text()
a19=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(1)').text().replace(":","")
website=ddoc('table[class="detail-md"]>tr:nth-child(20)>td:nth-child(2)').text()
a20=ddoc('table[class="detail-md"]>tr:nth-child(21)>td:nth-child(1)').text().replace(":","")
web=ddoc('table[class="detail-md"]>tr:nth-child(21)>td:nth-child(2)').text()
detailmsg={
'title':title,
'turl':turl,
a1:institutionType,
a2:formerName,
a3:leader,
a4:establishmentDate,
a5:introduction,
a6:classification,
a7:keywords,
a8:researchEquipment,
a9:researchAreas,
a10:awards,
a11:internalDepartments,
a12:subsidiaryInstitutions,
a13:productInformation,
a14:publicationJournals,
a15:mailingAddress,
a16:tel,
a17:faxNumber,
a18:email,
a19:website,
a20:web
}
detailList.append(detailmsg)
self.writerToExcel(detailList)
def conn144(self):
conn = pymysql.Connect(host='114.115.159.144', port=3306, user='caiji', passwd='zzsn9988', db='caiji',
charset='utf8')
cursor = conn.cursor()
return conn,cursor
def dataToSql(self,detailmsg):
conn,cursor=self.conn144()
try:
# 检查记录是否存在
# 获取当前时间
current_time = datetime.datetime.now()
# 将时间转换为字符串
currentdate = current_time.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
print('+++++')
finally:
cursor.close()
conn.close()
# 将数据追加到excel
def writerToExcel(self,detailList):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
print('保存成功!!')
if __name__ == '__main__':
wanfang=WanfangSpider()
for num in range(801,1321,100):
filename=f'企业_{num}.xlsx'
# # 创建一个工作簿
workbook = Workbook()
workbook.save(filename)
start=num
end=num+100
lsitmsg=wanfang.pageList(start,end)
for msg in lsitmsg:
wanfang.detailMsg(msg)
\ No newline at end of file
采集说明:
机构和企业的数据采集是从万方网站上获取冶金相关的机构和企业的基本信息
万方的地址:https://kms.wanfangdata.com.cn/
<img alt="img.png" height="200" src="img.png" width="200"/>
入口
地址
机构:https://kms.wanfangdata.com.cn/IndustryYJ/Search/Cecdb?q=%E5%86%B6%E9%87%91%2B%E5%86%B6%E7%82%BC%20%E6%9C%BA%E6%9E%84%3Acsi&f=Inst.Type
企业:https://kms.wanfangdata.com.cn/IndustryYJ/Search/Cecdb?q=%E5%86%B6%E9%87%91%2B%E5%86%B6%E7%82%BC%20%E6%9C%BA%E6%9E%84%3Acecdb&f=Inst.Type
<img alt="img_1.png" height="200" src="img_1.png" width="200"/>
主要是获取基本的信息
# -*- coding: utf-8 -*-
import pymysql
import pandas as pd
from tqdm import tqdm
import xlsxwriter
import openpyxl
from urllib.parse import urlparse
# 打开Excel文件
workbook = openpyxl.load_workbook('name.xlsx')
# 获取工作表对象
worksheet = workbook.active
qiyedatas=[]
# 遍历工作表的行
for row in worksheet.iter_rows(values_only=True):
qiyemsg={
'yname':row[0],
'name':row[1],
'url':row[2]
}
qiyedatas.append(qiyemsg)
# 打印每行的数据
# print(row)
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
sql1 = """select id, info_source_code, web_site_name, site_name , site_uri from info_source WHERE site_uri like '%[url]%' """
cont=1;
qynot=[]
qyin=[]
for qy in qiyedatas:
url=qy['url']
if url is None:
qynot.append(qy)
continue
try:
parsed_url = urlparse(url)
domain = parsed_url.netloc
if ''==domain:
qynot.append(qy)
continue
except Exception as e:
print(e)
qynot.append(qy)
continue
sql2=sql1.replace("[url]",domain)
cursor.execute(sql2)
result_data = cursor.fetchall()
if(len(result_data)<1):
qynot.append(qy)
else:
cont+=1
print(cont)
# qyin.append(qy)
for row2 in tqdm(result_data):
try:
rd = {'id': row2[0],
'编码': row2[1],
'网站名称': row2[2],
'栏目名称': row2[3],
'栏目地址': row2[4],
'企业名称': qy['name']
}
qyin.append(rd)
except Exception as e:
print(e)
print("查询失败!!"+sql2)
df_out = pd.DataFrame(data=qyin)
df_out.to_excel('企业情况在平台中有数据.xlsx', engine='xlsxwriter', index=False)
df_out = pd.DataFrame(data=qynot)
df_out.to_excel('企业情况在平台中没有数据.xlsx', engine='xlsxwriter', index=False)
# for row in tqdm(result_data):
#
# urls=['www.shtsp.com',
# 'www.onlyedu.com',
# 'www.shanghai-electric.com',
# 'www.skmic.sh.cn',
# 'www.szby.cn',
# 'www.xatourism.com',
# 'www.netac.com.cn',
# 'www.improve-medical.com',
# 'www.shanghaiyatong.com',
# 'www.nsrd.com.cn',
# 'www.400mn.com',
# 'www.efort.com.cn',
# 'www.jbdc.com.cn',
# 'www.wanma-cable.cn',
# 'www.lugangwool.com',
# 'www.cohc.citic',
# 'www.yafco.com',
# 'www.yccable.cn',
# 'www.ctv-media.com.cn',
# 'www.dehong.com.cn',
# 'www.tong-feng.com',
# 'www.bankgy.cn',
# 'www.viti.net.cn',
# 'www.hcsemitek.com',
# 'www.hrdq.cn',
# 'www.jlgsgl.com',
# 'www.cachet.com.cn',
# 'www.alcha.com',
# 'nj-port.com',
# 'www.qt300061.com',
# 'www.rrlgou.com',
# 'www.ronglian.com',
# 'www.oumasoft.com',
# 'www.cseg.cn',
# 'www.fjgs.com.cn',
# 'www.cr-jh.cn',
# 'www.tckg.cn',
# 'htk.hrbrail.cn',
# 'www.hnainfrastructure.com',
# 'www.chinahaiyue.com',
# 'www.nh.com.cn',
# 'www.hwatsing.com',
# 'www.kingenta.com',
# 'www.nantex.com.cn',
# 'www.fuda.com',
# 'www.nbtp.com.cn',
# 'www.shahe.cn',
# 'www.yulonggold.com',
# 'www.hnss.net.cn',
# 'www.hifuture.com',
# 'www.sye.com.cn',
# 'www.xjh-sc.com',
# 'www.jyzs.com.cn',
# 'www.szmeizhi.com',
# 'www.htrd.cn',
# 'www.wushang.com.cn',
# 'www.sfjt.com.cn',
# 'www.swpdi.com',
# 'www.qjtourism.com',
# 'www.bjtyz.com',
# 'www.leaguerme.com',
# 'www.kingsemi.com',
# 'www.dongri.com',
# 'www.cimcvehiclesgroup.com',
# 'www.bocichina.com',
# 'www.inzonegroup.cn',
# 'www.bchtpharm.com',
# 'www.unifull.com',
# 'www.jingui-silver.com',
# 'www.nbocc.com',
# 'www.zjzhongda.com',
# 'www.zwhlgroup.com',
# 'www.cebbank.com',
# 'www.zhonghongmedical.com',
# 'www.zoje.com',
# 'www.geovis.com.cn',
# 'www.capg.com.cn',
# 'www.crpcc.com.cn',
# 'www.cs.ecitic.com',
# 'www.nesc.cn',
# 'www.ellingtonpcb.com',
# 'www.sanjing.com.cn',
# 'www.hi-expressway.com',
# 'www.joyware.com',
# 'www.yuneng.com.cn',
# 'jngxfz.com',
# 'www.rtxc.com',
# 'www.jsxq.com',
# 'www.jsgaoke.com',
# 'www.cre8direct.cn',
# 'www.qijing-m.com',
# 'www.yaboo-cn.com',
# 'www.cictmobile.com',
# 'www.cqgt.cn',
# 'san-mu.com',
# 'www.zygs.com',
# 'www.hntz.com.cn',
# 'www.gemac-cn.com',
# 'www.ivo.com.cn',
# 'www.cdbio.cn',
# 'www.qdfood.com',
# 'www.sacredsun.cn',
# 'www.glsc.com.cn',
# 'www.bohai-water.com',
# 'www.nsig.com',
# 'www.ticw.com.cn',
# 'www.jjtz.com',
# 'www.fmsh.com',
# 'www.hirisun.com',
# 'www.bjtkgd.com',
# 'www.gj000096.com',
# 'www.ddmcgroup.cn',
# 'www.chinaxaperi.com',
# 'www.xjxlgf.com.cn',
# 'www.xcsg.cn',
# 'www.omnijoi.com',
# 'www.xakaili.com',
# 'www.tbjijian.com',
# 'www.sztechand.com.cn',
# 'www.xhchem.com',
# 'www.cbmt.com.cn',
# 'www.gsgf.com',
# 'www.hisunplas.com',
# 'www.cimc.com',
# 'www.cqggf.com.cn',
# 'www.afrlaser.com',
# 'www.dezhanhealthcare.com',
# 'www.minfa.com',
# 'www.fjzbgf.com',
# 'www.fjcement.com',
# 'www.hhink.com',
# 'www.hbjnhg.com',
# 'www.baiyang.com',
# 'www.cpttg.com.cn',
# 'www.jxcy.com.cn',
# 'www.bhgmall.com.cn',
# 'www.eastsoft.com.cn',
# 'www.dareway.com.cn',
# 'www.sxsanwei.com',
# 'www.hncde.cn',
# 'www.dlrd.com',
# 'www.dirui.com.cn',
# 'www.dxzq.net',
# 'www.lzbank.com',
# 'www.mdep.com.cn',
# 'www.ahtgtb.com',
# 'www.guanfu.com',
# 'www.jszs-group.com',
# 'www.ebjb.com',
# 'www.sunwill.com.cn',
# 'www.gdep.com.cn',
# 'www.gsfins.com',
# 'www.600083.com',
# 'www.htw.cn',
# 'www.china-boya.com',
# 'www.holsin.cn',
# 'www.fhzy.cn',
# 'www.nyocor.com',
# 'tech.csg.cn',
# 'www.bringspring.com',
# 'www.zh-echem.com',
# 'www.crcchem.com',
# 'www.longgaogf.com',
# 'www.sinotherapeutics.com',
# 'www.cesgroup.com.cn',
# 'www.shdyyy.com.cn',
# 'www.shmetro.com',
# 'www.worldunion.com.cn',
# 'www.jsti.com',
# 'www.szweiye.com',
# 'www.szmtc.com.cn',
# 'www.scjd.cn',
# 'www.tfzq.com',
# 'www.tielingnewcity.com.cn',
# 'www.cset.ac.cn',
# 'www.sunnyloantop.cn',
# 'www.vanfund.cn',
# 'www.chinayaxing.com',
# 'www.wrcb.com.cn',
# 'www.600168.com.cn',
# 'www.xacbank.com',
# 'www.goldwind.com.cn',
# 'www.vvgroup.com',
# 'www.roadrover.cn',
# 'www.tellus.cn',
# 'www.sdses.com',
# 'www.ytsc.cn',
# 'www.tande.cn',
# 'www.600773sh.com',
# 'www.lixinner.com',
# 'www.bankofchangsha.com',
# 'www.zjcbcm.com',
# 'www.zhefuet.com',
# 'www.ywgf.cn',
# 'www.zftc.net',
# 'www.zjjgf.com',
# 'www.zhangzidao.com',
# 'www.e-tongcheng.com',
# 'www.conbapharm.com',
# '10.150.48.9',
# 'www.izpec.com',
# 'www.cicc.com',
# 'www.chinastock.com.cn',
# 'www.znhi.com.cn',
# 'www.jiuhuashan.cc',
# 'www.winallseed.com',
# 'ahhui-expressway.net',
# 'www.bossco.cc',
# 'www.gzzjsy.com',
# 'www.hongtastock.com',
# 'www.dazhitech.com',
# '000509.ipo.hk',
# 'www.qxzs.com',
# 'www.lzminbai.com.cn',
# 'www.lzzhuangyuan.com',
# 'www.mosopower.com',
# 'www.nmgxhfxjt.com',
# 'www.sunseagroup.com',
# 'www.rzpcl.com',
# 'www.kaiwenedu.com',
# 'www.avicuas.com',
# 'www.jinsenforestry.com',
# 'www.gpedcl.com',
# 'www.njuae.cn',
# 'www.originwater.com',
# 'www.motic-electric.com',
# 'www.huatongreli.com',
# 'www.dfzq.com.cn',
# 'www.td300321.com',
# 'www.ahjnhg.com',
# 'www.xtc-xny.com',
# 'www.qdtnp.com',
# 'www.sgsbgroup.com',
# 'www.xhmedia.com',
# 'www.sh-sfc.com',
# 'www.shtdcy.com',
# 'www.fd-zj.com',
# 'www.wkyy.com',
# 'www.sdlomon.com',
# 'www.newssc.org',
# 'cs.xunyou.com',
# 'www.whghjt.com',
# ' www.xjxbmy.com',
# 'www.swhygh.com',
# 'www.geron-china.com',
# 'www.yb-zy.com',
# 'www.zjwh.com.cn',
# 'www.tangde.com.cn',
# 'www.crscl.com.cn',
# 'www.insigma.com.cn',
# 'www.cs-grkj.com',
# 'www.psbc.com',
# 'www.csgholding.com',
# 'www.e-chinalife.com',
# 'bank.ecitic.com',
# 'www.cqlummy.com',
# 'www.dlg-expo.com',
# 'www.aadri.com',
# 'www.ghkg000529.com',
# 'www.hzbank.com.cn',
# 'www.hangjintechnology.com',
# 'www.hljjt.com',
# 'www.nxz.com.cn',
# 'www.csrcbank.com',
# 'www.jscnnet.com',
# 'www.jsbchina.cn',
# 'www.zjrcbank.com',
# 'www.hrflanges.com',
# 'www.lander.com.cn',
# 'www.xmgw.com.cn',
# 'www.sdcbcm.com',
# 'www.foundersc.com',
# 'www.sunvim.com',
# 'www.guangyuyuan.com',
# 'www.ghzq.com.cn',
# 'www.hljcbgf.com',
# 'www.crmicro.com',
# 'www.hy-online.com',
# 'www.jiawei.com',
# 'www.saintycorp.com',
# 'www.jrjkg.com.cn',
# 'www.dinghantech.com',
# 'www.semifg.com',
# 'www.srcb.com',
# 'www.tongyuheavy.com',
# 'www.chinalihu.com',
# 'www.amec-inc.com',
# 'www.zoomlion.com',
# 'www.hlxl.com',
# 'www.bbcm.com.cn',
# 'www.ntfan.com',
# 'www.molonggroup.com',
# 'www.bosuntools.com',
# 'www.s10000.com',
# 'www.gtja.com',
# 'www.omhgroup.com',
# 'www.hxb.com.cn',
# 'www.saimo.cn',
# 'www.zjfzgroup.com',
# 'www.longking.com.cn',
# 'www.informrack.com',
# 'www.bosc.cn',
# 'www.cecm.com.cn',
# 'www.zmee.com.cn',
# ' www.ccnew.com ',
# 'www.szclou.com',
# 'www.chengduair.com',
# 'www.zts.com.cn',
# 'www.cofcoet.com',
# 'www.zhengyee.com',
# 'www.bd-zg.com',
# 'www.rainbowco.com.cn',
# 'www.bjcx.cn',
# 'www.xbcy.nx.cn',
# 'www.cylico.com',
# 'www.yundingkeji.cn',
# 'www.gbdz.net',
# 'www.th600281.cn',
# 'www.pdjs.com.cn',
# 'www.xjxfkj.com',
# 'www.zfsycf.com.cn',
# 'www.600228.net',
# 'www.sxbctv.com',
# 'www.sd-wit.com',
# 'www.hzrdjt.com',
# 'www.guofeng.com',
# 'www.jdee.com.cn',
# 'www.hfgf.cn',
# 'www.foton.com.cn',
# 'www.dongjiang.com.cn',
# 'zhujiangpijiu.tmall.com',
# 'www.jin-fu.cn',
# 'www.yanzhoucoal.com.cn',
# 'www.wxm.com',
# 'www.cggc.cn',
# 'www.chinagci.com',
# 'www.jx-blackcat.com',
# 'www.tfny.com',
# 'www.kkcc.com.cn',
# 'www.hengshuilaobaigan.net',
# 'www.smsc.sh.cn',
# 'www.hbkangxin.com.cn',
# 'www.000899.com',
# 'www.rundamedical.com',
# 'www.fjec.com.cn',
# 'www.luckyfilm.com.cn',
# 'www.ansteel.com.cn',
# 'www.rendongholdings.com',
# 'www.toppcb.com',
# 'www.reht.com',
# 'www.portshanghai.com.cn',
# 'scxcdl.com',
# 'www.96335.com',
# 'www.bbmg.com.cn',
# 'www.dahaobj.com',
# 'www.hayao.com',
# 'www.moon-tech.com',
# '210.5.145.194',
# 'www.weldatlantic.com',
# 'www.tvzone.cn',
# 'www.cutc.com.cn',
# 'www.china-htdl.com',
# 'www.jinshiyuan.com.cn',
# 'www.jiangong.com.cn',
# 'www.hnair.net',
# 'www.sxjh.com.cn',
# 'www.gzport.com',
# 'pearlriveryq.tmall.com',
# 'www.hdbp.com',
# 'mecc.sinosteel.com',
# 'www.newworld-china.com',
# 'www.hebgtgf.com',
# 'www.xiangyu.cn',
# 'www.conch.cn',
# 'www.cygs.com',
# 'www.swsc.com.cn',
# 'www.hangyang.com',
# 'www.helichina.com',
# 'www.gybys.com.cn',
# 'www.jlyy1999.com',
# 'www.honglitronic.com',
# 'www.grgbanking.com',
# 'www.xzmbgf.com',
# 'www.yilida.com',
# 'www.westsecu.com',
# 'www.tefafuwu.com',
# 'www.sinopack.com.cn',
# 'www.jsleasing.cn',
# 'www.Gsysgf.com',
# 'www.baoti.com',
# 'www.citicguoaninfo.com',
# 'www.scte.com.cn',
# 'www.whchem.com',
# 'www.scmc-xa.com',
# 'cn.changhong.com',
# 'www.angelyeast.com',
# 'www.hbklgroup.cn',
# 'www.scimee.com',
# 'www.desaysv.com',
# 'www.zthj.com',
# 'www.shaaas.com',
# '218.22.147.82',
# 'www.jlsg.com.cn',
# 'www.iflytek.com',
# 'www.yuedainvest.com',
# 'www.chengzhi.com.cn',
# 'www.jxgtyjy.com',
# 'www.bhcc.cn',
# 'www.gssok.com',
# 'www.whzb.com',
# 'www.citichmc.com',
# 'www.artall.com.cn',
# 'www.china-ftz.com',
# 'www.jiuzhoutech.com.cn',
# 'www.desaybattery.com',
# 'www.zhenye.com',
# 'www.jinjianghotels.sh.cn',
# 'www.fawer.com.cn',
# 'www.chenmingpaper.com',
# 'www.start.com.cn',
# 'www.jhgf.com.cn',
# 'www.xztianlu.com',
# 'www.jljgdq.cn',
# 'www.300188.cn',
# 'www.slkg1949.com',
# 'www.gujing.com',
# 'www.jsjsyh.com',
# 'www.baiyunairport.com',
# 'www.gzr.com.cn',
# 'www.wnq.com.cn',
# 'www.xiamenairport.com.cn',
# 'www.ymhg.com.cn',
# 'www.tjkjsy.com.cn',
# 'www.jlucdi.com',
# 'www.hxfz.com.cn',
# 'www.naura.com',
# 'www.unistrong.com',
# 'www.people.cn',
# 'www.wenergy.cn',
# 'www.navinfo.com',
# 'www.hnfzgf.com',
# 'www.shibeiht.com',
# 'www.abchina.com',
# 'www.gmdi.cn',
# 'www.boc.cn',
# 'ny.csg.cn',
# 'www.chinacnd.com',
# 'www.lxct.cn',
# 'www.hengyun.com.cn',
# 'www.qingshanpaper.com',
# 'www.e-cbest.com',
# 'www.cnyeic.com',
# 'www.cqcy.com',
# 'www.chinawuyi.com.cn',
# 'www.first-panel.com',
# 'www.snxhchem.com',
# 'www.ebscn.com',
# 'www.china-hbp.com',
# 'www.xmklm.com.cn',
# 'www.gsrc.com',
# 'www.gsgczx.cn',
# 'www.cpepgc.com',
# 'www.hwgf757.com',
# 'www.moutaichina.com',
# 'www.fspg.com.cn',
# 'www.bez.com.cn',
# 'www.inspur.com',
# 'www.inesa-it.com',
# 'www.kingyork.biz',
# 'www.chinahitech.com.cn',
# 'www.sanyou-chem.com.cn',
# 'www.jdcmoly.com',
# 'www.lzgf.cn',
# 'www.lshec.com',
# 'www.pub.citic.com',
# 'www.bjca.cn',
# 'www.motimo.com',
# 'www.bluestar-adisseo.com',
# 'www.zgpgc.com',
# 'www.shanghaidragon.com.cn',
# 'www.itg.com.cn',
# 'www.sc-aaa.com',
# 'www.nnsugar.com',
# 'www.600064.com',
# 'www.luxichemical.com',
# 'www.zzce.com.cn',
# 'www.hbshkj.cn',
# 'www.liugong.com',
# 'www.xjpharma.com',
# 'www.xdtz.net',
# 'www.tsingtao.com.cn',
# 'www.cdgxfz.com',
# 'www.shanghai-electric.com',
# 'www.wxtj.com',
# 'www.xinye-tex.com',
# 'www.chinasec.cn',
# 'gufen.luckyfilm.com.cn',
# 'www.szwg.com',
# 'www.hnboyun.com.cn',
# 'www.szmicrogate.com',
# 'www.i-scip.com',
# 'www.yatai.com',
# 'www.ljz.com.cn',
# 'www.hucd.cn',
# 'www.sxzq.com',
# 'www.qdhuaren.com',
# 'www.siti.com.cn',
# 'www.scrbc.com.cn',
# 'www.flzc.com',
# 'www.jx9394.com',
# 'www.cdgas.com',
# 'www.xingfagroup.com',
# 'www.tjsjgf.com.cn',
# 'www.san-huan.com.cn',
# 'www.cqgasgis.com',
# 'www.baoguang.com.cn',
# 'www.lishengpharma.com',
# 'www.arcplus.com.cn',
# 'www.binhaienergy.com',
# 'www.sdjlky.com',
# 'www.zthx.com',
# 'www.lshfz.com.cn',
# 'www.jingnengpower.com',
# 'www.ppm.cn',
# 'www.shenhuo.com',
# 'www.silverbasis.com',
# 'www.chinawindey.com',
# 'www.jialing.com.cn',
# 'www.catec-ltd.cn',
# 'www.jzjt.com',
# 'www.chalkistomato.com',
# 'www.huatian-hotel.com',
# 'www.xhzy.com',
# 'www.shantui.com',
# 'www.chinahuamao.net',
# 'www.sugon.com',
# 'www.mogao.com',
# 'www.valin.cn',
# 'www.ahhymd.com.cn',
# 'www.jei.com.cn',
# 'www.bjhhny.com',
# 'www.liuguo.com',
# 'www.easy-visible.com',
# 'www.xemc.com.cn',
# 'www.jinbei.com.cn',
# 'www.gzgsgf.com.cn',
# 'www.600373.com.cn',
# 'www.dhseed.com',
# 'www.up-china.com',
# 'www.ght-china.com',
# 'www.c-wmm.com',
# 'www.712.cn',
# 'www.czdh.chemchina.com',
# 'www.dgholdings.cn',
# 'www.ykplc.com',
# 'www.cecep.cn',
# 'www.njtc.com.cn',
# 'www.qfcgroup.com',
# 'www.ymnygf.com',
# 'www.zncmjt.com',
# 'www.jianfeng.com.cn',
# 'www.zzepc.com.cn',
# 'www.chengda.com.cn',
# 'www.cindare.com',
# 'www.slpharm.com.cn',
# 'www.ycne.com.cn',
# 'www.brightdairy.com',
# 'www.meichen.cc',
# 'www.jxcgc.com',
# 'www.sanbian.cn',
# 'www.sino-platinum.com.cn',
# 'www.copote.com',
# 'www.gzjiulian.com',
# 'www.tfxingfujia.com',
# 'www.hg-oa.com',
# 'www.beijingnorthstar.com',
# 'www.nfzje.com',
# 'www.hakim.com.cn',
# 'www.hqbeer.com',
# 'www.cnpubc.com',
# 'www.shengheholding.com',
# 'www.pku-hc.com',
# 'www.sanxia.com',
# 'www.zzbank.cn',
# 'www.casit.com.cn',
# 'www.shpdjq.com',
# 'www.xntsgs.com',
# 'www.zjorient.com',
# 'www.capitalwater.cn',
# 'www.mfspchina.com',
# 'www.elecspn.com',
# 'www.glasstex.cn',
# 'www.cpic.com.cn',
# 'www.pingan.cn',
# 'www.tlys.cn',
# 'www.hgbdzhyl.com',
# 'www.stec.net',
# 'www.lpht.com.cn',
# 'www.ezjzy.com',
# 'www.sdec.com.cn',
# 'www.szap.com',
# 'www.cqphar.com',
# 'www.szgas.com.cn',
# 'facs.com.cn',
# 'www.gac.com.cn',
# 'www.changshantex.com',
# 'www.newwf.com',
# 'www.000607.cn',
# 'www.ctsec.com',
# 'www.ecsponline.com',
# 'www.dynagreen.com.cn',
# 'www.grandblue.cn',
# 'www.daqintielu.com',
# 'www.portqhd.com',
# 'www.sanyuan.com.cn',
# 'www.sypglass.com',
# 'www.hx168.com.cn',
# 'www.athub.com',
# 'www.ygsoft.com',
# 'www.chinaddn.com',
# 'yqmy.ymjt.com.cn',
# 'www.jiebai.com',
# 'www.zjjiaoke.com',
# 'www.htsec.com',
# 'www.guilintravel.com',
# 'www.htsc.com.cn',
# 'www.cnooc.com.cn',
# 'www.spacesat.com.cn',
# 'www.hua-yi.cn',
# 'www.starlake.com.cn',
# 'www.xj-tianye.com',
# 'www.boe.com',
# 'www.longjianlq.com',
# 'www.shenhuachina.com',
# 'www.sinovatio.com',
# 'www.chinayanghe.com',
# 'www.wz-zhongheng.com',
# 'www.hbyl.cn',
# 'www.tayho.com.cn',
# 'www.castech.com',
# 'www.foundertech.com',
# 'www.hua-ying.com',
# 'www.csig158.com',
# 'www.leadmanbio.com',
# 'www.highly.cc',
# 'www.kjtbao.com',
# 'www.hrtn.com.cn',
# 'www.jsjnsw.com',
# 'www.chinazhjt.com.cn',
# 'www.xindeco.com',
# 'www.intmedic.com',
# 'pvc.conch.cn',
# 'www.cjtz.cn',
# 'www.fengle.com.cn',
# 'www.shoukaigufen.com',
# 'www.aucma.com',
# 'www.shxcoal.com',
# 'www.soyea.com.cn',
# 'www.ccrq.com.cn',
# 'www.hac.com.cn',
# 'www.ncfc.cn',
# 'www.xinsteel.com.cn',
# 'www.energas.cn',
# 'www.tcsw.com.cn',
# 'www.cchbds.com.cn',
# 'www.gimc.cn',
# 'www.thvow.com',
# 'www.sdecl.com.cn',
# 'www.600689.com',
# 'www.gdgzrb.com',
# 'demo.cge.com.cn',
# 'www.smtcl.com',
# 'www.topnewinfo.cn',
# 'www.phoenix.com.cn',
# 'www.szwuye.com.cn',
# 'www.hitech-develop.com',
# 'www.ntjtc.com',
# 'hxdq.hisense.com',
# 'www.meiling.com',
# 'www.fenjiu.com.cn',
# 'www.lggf.com.cn',
# 'www.myorbita.net',
# 'wrgold.cn',
# 'www.tqcc.cn',
# 'www.citymedia.cn',
# 'www.sndnt.com',
# 'www.grandbuy.com.cn',
# 'www.chalieco.com.cn',
# 'www.bucid.com',
# 'www.xjqscc.com',
# 'www.tjcep.com',
# 'www.scacc.com',
# 'www.sinotruk.com',
# 'www.hgtech.com.cn',
# 'www.fenghua-advanced.com',
# 'www.nantian.com.cn',
# 'www.hisense.com',
# 'www.gztyre.com',
# 'www.opg.cn',
# 'www.yggf.com.cn',
# 'www.bjev.com.cn',
# 'www.hjjs.com',
# 'www.topway.com.cn',
# 'www.sxjgkg.com',
# 'www.shaanxigas.com',
# 'holitech.net',
# 'www.gngf.cn',
# 'www.nonfemet.com',
# 'www.yunnanbaiyao.com.cn',
# 'www.bxlq.com',
# 'www.gepiced.com',
# 'www.yuegui.cn',
# 'www.bbwport.cn',
# 'www.yyth.com.cn',
# 'www.yuexiu-finance.com',
# 'www.wuliangye.com.cn',
# 'www.petrochina.com.cn',
# 'www.ahtrq.com',
# 'www.cxtc.com',
# 'www.lubeichem.com',
# 'www.sgmg.com.cn',
# 'www.600796.com',
# 'www.zzdc.com.cn',
# 'www.jsgxgf.com',
# 'www.yyxc0819.com',
# 'www.tedastock.com',
# 'www.changchai.com',
# 'www.nj-public.com',
# 'gf.tongrentang.com',
# 'www.htchuav.com',
# 'www.000793.com',
# 'www.cnzgc.com',
# 'www.hnhlc.com',
# 'www.xiagong.com',
# 'www.qingdao-port.com',
# 'www.hngoldcorp.com',
# 'www.zhongtong.com',
# 'www.hghngroup.com',
# 'www.dggf.cn',
# 'www.cecl.com.cn',
# 'www.nexhome.cn',
# 'www.sh600649.com',
# 'www.chinasthc.com',
# 'www.tagen.cn',
# 'www.yimingroup.com',
# 'www.cqrcb.com',
# 'www.dongfangelec.com',
# 'www.sinodmc.com',
# 'www.shenergy.net.cn',
# 'www.xjjtjt.com',
# 'www.jzz.cn',
# 'www.yngreen.com',
# 'www.tik.com.cn',
# 'www.dhidcw.com',
# 'www.fjhxhb.com',
# 'www.xinhuanet.com',
# 'www.sidlgroup.com',
# 'www.zjgold.com',
# 'www.taiji.com',
# 'www.cqsxsl.com',
# 'www.bthhotels.com',
# 'www.picc.com',
# 'www.999.com.cn',
# 'home.cofcotunhe.com',
# 'www.gzlnholdings.com',
# 'tgbx.tisco.com.cn',
# 'www.xinzhu.com',
# 'www.zjhengshun.com',
# 'www.yunneidongli.com',
# 'www.weichaihm.com',
# 'acbc.com.cn',
# 'www.shidaiwanheng.com',
# 'www.wxrope.com',
# 'bldmo.blemall.com',
# 'www.hisunpharm.com',
# 'www.sz-expressway.com',
# 'www.chasesun.cn',
# 'www.bgctv.com.cn',
# 'www.c-wst.com',
# 'www.whty.com.cn',
# 'www.fnorient.com',
# 'www.chinagwe.com',
# 'www.yantangmilk.com',
# 'www.600608.net',
# 'www.lhpharma.com',
# 'www.shanghaipower.com',
# 'www.stocke.com.cn',
# 'www.inspur.com',
# 'www.600757.com.cn',
# 'www.chinatelling.com',
# 'www.benefo.tj.cn',
# 'www.chinaadvance.com',
# 'www.sopo.com.cn',
# 'www.xyzq.com.cn',
# 'www.butone.com',
# 'www.fudanfuhua.com',
# 'www.jjmy.cn',
# 'www.szibr.com',
# 'www.hirub.cn',
# 'www.ntjhzy.com',
# 'www.600269.cn',
# 'www.ciwen.com.cn',
# 'www.cytsonline.com',
# 'www.infinova.com.cn',
# 'www.gtiggm.com',
# 'www.scfrkj.cn',
# 'www.nupmg.com',
# 'www.cecep.cn',
# 'www.globalprinting.cn',
# 'www.winshare.com.cn',
# 'www.jlcfc.com',
# 'www.westmining.com',
# 'www.0507.com.cn',
# 'www.taloph.com',
# 'www.zhongxinp.com',
# 'www.guosen.com.cn',
# 'www.fsgas.com',
# 'www.sns-china.com',
# 'www.leaguer.com.cn',
# 'www.daangene.com',
# 'www.jsac.com.cn',
# 'www.xnskg.cn',
# 'www.cogogl.com.hk',
# 'www.sdlqgf.com',
# 'www.000551.cn',
# 'www.zgdygf.com',
# 'www.wwgf.com.cn',
# 'www.jac.com.cn',
# 'www.xsmd.cc',
# 'www.hbctgs.com',
# 'www.jsne.com.cn',
# 'www.camc.cc',
# 'www.grgtest.com',
# 'www.ghtchina.com',
# 'www.shkdchem.com',
# 'www.eascs.com',
# 'www.e-ande.com',
# 'www.qdzzzc.com',
# 'www.china-ceco.com',
# 'www.shjb600838.com',
# 'www.greedc.com',
# 'www.ahccjg.com.cn',
# 'www.ged.com.cn',
# 'www.cwjt.com',
# 'www.cssd.com.cn',
# 'www.sggf.com.cn',
# 'www.chinatypical.com',
# 'www.bhpiston.com',
# 'www.blower.cn',
# 'www.baose.com',
# 'www.guhan.com',
# 'www.pmta.com.cn',
# 'www.ytl.com.cn',
# 'www.dfzk.com',
# 'www.sxbychem.com',
# 'www.prolto.com',
# 'www.jmjj.com',
# 'www.siasun.com',
# 'www.zhzb.cecep.cn',
# 'www.sanju.cn',
# 'www.shaoneng.com.cn,www.sn0601.com',
# 'www.ldjt.com.cn',
# 'www.weifu.com.cn',
# 'www.nfcb.com.cn',
# 'www.600617.com.cn',
# 'www.ankai.com',
# 'www.thunis.com',
# 'www.asiastarbus.com',
# 'www.qjwater.com',
# 'www.chinaemd.com',
# 'www.hl-hengsheng.com',
# 'www.cnqjyy.com',
# 'www.cndl155.com',
# 'www.600822sh.com',
# 'www.600463.com.cn',
# 'www.aceg.com.cn',
# 'www.sdgi.com.cn',
# 'www.000803.com',
# 'xgjx.xcmg.com',
# 'www.nafine.com',
# 'www.yanjing.com.cn',
# 'www.tjsemi.com',
# 'www.hbiszy.com',
# 'www.hacl.cn',
# 'www.orientscape.com',
# 'www.hnsc.cc',
# 'www.tus-est.com',
# 'www.press-mart.com',
# 'www.scmeif.com',
# 'www.cisri-gaona.com.cn',
# 'www.fengguang.com',
# 'www.guangjipharm.com',
# 'www.oie.com.cn',
# 'www.bhferry.com',
# 'www.bnmc.com',
# 'www.changyu.com.cn',
# 'www.gdg.com.cn',
# 'www.huapengglass.com',
# 'www.eal-ceair.com',
# 'www.shenma.com',
# 'www.ihandy.cn',
# 'www.gxlcwater.com',
# 'www.gfx.com.cn',
# 'www.cncqsw.com',
# 'www.yinhai.com',
# 'www.pjgf.cn',
# 'www.magang.com.cn',
# 'www.magang.com.hk',
# 'www.wjwlg.com',
# 'www.chinacdi.com',
# 'www.95559.com.cn',
# 'www.jxcc.com',
# 'www.vanke.com',
# 'www.ccb.com',
# 'www.sinomach-he.cn',
# 'www.newchinalife.com',
# 'www.cofco.com',
# 'www.bxsteel.com',
# 'www.xntg.com',
# 'www.qinchuan.com',
# 'www.000860.com',
# 'www.tymqh.com',
# 'www.lzlj.com',
# 'www.jinfengwine.com',
# 'www.luanhn.com',
# 'www.njyy.com',
# 'www.hzsteel.com',
# 'www.nhbiogroup.com',
# 'www.lzhg.cn',
# 'www.chinacrt.com',
# 'www.sunasia.com',
# 'www.snbc.cn',
# 'www.ybty.com',
# 'www.gqy.com.cn',
# 'www.snm.gd.cn',
# 'www.shkg.com.cn',
# 'www.ssgf.com.cn',
# 'www.qrcb.com.cn',
# 'www.hengfengpaper.com',
# 'www.sec.com.cn',
# 'www.jishimedia.com',
# 'www.pharmglass.com',
# 'www.zyddcm.com',
# 'www.600844.com',
# 'www.zte.com.cn',
# 'www.600780.com.cn',
# 'www.huajinct.com',
# 'www.feidaep.com',
# 'www.citicsteel.com',
# 'www.dtmy.com.cn',
# 'www.king1.com.cn',
# 'www.qhyhgf.com',
# 'www.gsafety.com',
# 'www.yonglin.com',
# 'www.jznygf.com',
# 'www.gd-goworld.com',
# 'www.cnsjy.com',
# 'www.shanghaimaling.com',
# 'www.hbcoal.com',
# 'www.duzhepmc.com',
# 'www.hbyh.cn',
# 'www.nationstar.com',
# 'www.jtport.com.cn',
# 'www.smgjny.com',
# 'www.600633.cn',
# 'www.roadmaint.com',
# 'www.token-ito.com',
# 'www.doublestar.com.cn',
# 'www.yasheng.com.cn',
# 'www.yapp.com',
# 'www.eurasiagroup.com.cn',
# 'www.shchinafortune.com',
# 'www.gyzq.com',
# 'www.blackpeony.com',
# 'nb.nnbh.cn',
# 'www.skiad.com.cn',
# 'www.lonkey.com.cn',
# 'www.hazq.com',
# 'www.ncpc.cn',
# 'www.zmd.com.cn',
# 'www.tyhi.com.cn',
# 'www.lierchem.com',
# 'www.sh-shenda.com',
# 'www.yantian-port.com',
# 'www.palm-la.com',
# 'www.simei.cc',
# 'www.shinva.net',
# 'www.mgtv.com',
# 'www.chinesekk.com',
# 'www.suntien.com',
# 'www.shenzhou-gaotie.com',
# 'www.nbmc.com.cn',
# 'www.luyin.cn',
# 'www.sclth.com',
# 'www.shaangu.com',
# 'www.chinalanhua.com',
# 'www.huakongseg.com.cn',
# 'www.chinafsl.com',
# 'www.sunward.com.cn',
# 'www.lpxdgf.cn',
# 'www.gdsdej.com',
# 'www.jtlfans.com',
# 'www.guangrigf.com',
# 'www.zmj.com',
# 'www.shanghaizhongyida.com',
# 'www.wasu.com',
# 'www.jymdgs.com',
# 'www.ls.com.cn',
# 'www.langold.com.cn',
# 'ylgf.chinalco.com.cn',
# 'www.ht-saae.com',
# 'www.cgnmc.com',
# 'www.highhope.com',
# 'www.segcl.com.cn',
# 'www.nbport.com.cn',
# 'www.aysteel.com.cn',
# 'www.tianjin-port.com',
# 'www.zhangzepower.com',
# 'www.xjtrry.com',
# 'www.chinafirstpencil.com',
# 'www.dlport.cn',
# 'www.cccgroup.com.cn',
# 'www.hunan-huasheng.com',
# 'www.furielec.com',
# 'www.hndayou.com.cn',
# 'www.gosinoic.com',
# 'www.shuangtafood.com',
# 'www.hnnlmb.com',
# 'www.wfj.com.cn',
# 'www.ccht.jl.cn',
# 'www.0898hq.com',
# 'www.netposa.com',
# 'www.doublecoinholdings.com',
# 'www.baoganggf.com',
# 'www.jingchenggf.com.cn',
# 'www.hasco-group.com',
# 'www.shaoxingwine.com.cn',
# 'www.sinosoft.com.cn',
# 'www.b-raymedia.com',
# 'www.zpug.net',
# 'www.fenghuo.cn',
# 'www.bocd.com.cn',
# 'www.jisco.cn',
# 'www.masonled.com',
# 'www.qingxin.com.cn',
# 'www.elht.com',
# 'www.fczy.com',
# 'www.jinlinghotel.com',
# 'www.jxhcsy.com',
# 'www.andty.com',
# 'www.pinggao.com',
# 'www.lsrfzy.com',
# 'www.yhwins.com',
# 'www.chxz.com',
# 'www.laibao.com.cn',
# 'www.jianghang.com',
# 'www.quanjude.com.cn',
# 'www.dldc.com.cn',
# 'www.bailu.com',
# 'www.jsexpressway.com',
# 'www.lkpc.com',
# 'www.liusteel.com',
# 'www.gzgdwl.com',
# 'www.xinsai.com.cn',
# 'www.jdsn.com.cn',
# 'www.cdxrec.com',
# 'www.acdi.ah.cn',
# 'www.lmz.com.cn',
# 'www.cnhuafas.com',
# 'www.hfbh.com.cn',
# 'www.cceg.cn',
# 'www.xjyilite.com',
# 'www.sdhjgf.com.cn',
# 'www.zzpzh.com',
# 'www.xnkf.com',
# 'www.china-meili.com',
# 'www.tyjd.cc',
# 'www.thtf.com.cn',
# 'www.fdm.com.cn',
# 'www.sun-create.com',
# 'www.cgws.com',
# 'www.fsg.com.cn',
# 'www.600683.com',
# 'www.sinopharmholding.com',
# 'www.zhixindianqi.com',
# 'www.lypower.com',
# 'www.yypaper.com',
# 'www.tiantanbio.com',
# 'www.shyndec.com',
# 'www.tpv-tech.com',
# 'www.raycuslaser.com',
# 'www.torchcn.com',
# 'www.czst.com',
# 'www.fawjiefang.com.cn',
# 'www.cmie.csic.com.cn',
# 'zbhj.norincogroup.com.cn',
# 'www.yinglitechem.com',
# 'www.wsdl.com.cn',
# 'www.hlbn.cc',
# 'www.cydl.com.cn',
# 'www.polycn.com',
# 'www.qm.cn',
# 'www.khjt.com.cn',
# 'www.tianhong.cn',
# 'www.griam.cn',
# 'www.dec-ltd.cn',
# 'www.cccgreg.com',
# 'www.cofc.com.cn',
# 'www.segroup.cn',
# 'www.xjgt.com',
# 'www.avicem.com',
# 'www.aaec.com.cn',
# 'www.aeolustyre.com',
# 'bhgf.norincogroup.com.cn',
# 'www.gzqydl.cn',
# 'www.hmavic.com',
# 'www.meheco.com',
# 'www.aisino.com',
# 'www.jinxiaxle.com',
# 'www.accelink.com',
# 'www.chalco.com.cn',
# 'www.wutos.com',
# 'www.casic-addsino.com',
# 'www.huajinchem.com',
# 'www.jemlc.com',
# 'www.sdiczl.com',
# 'www.zdydep.com',
# 'comec.cssc.net.cn',
# 'www.camsl.com',
# 'www.cmsk1979.com',
# 'www.002163.com',
# 'www.trp.com.cn',
# 'www.chinatelecom-h.com',
# 'tech.coscoshipping.com',
# 'hold.coscoshipping.com',
# 'www.guotone.com',
# 'www.cmreltd.com',
# 'www.dcpc.com',
# 'www.dfmg.com.cn',
# 'www.hikvision.com',
# 'www.hpi.com.cn',
# 'www.atmcn.com',
# 'www.triumphltd.cn',
# 'www.hnlcj.cn',
# 'www.phenixoptics.com.cn',
# 'www.ehualu.com',
# 'www.jihua-mall.com',
# 'www.cdt-gxi.com',
# 'www.gyfz000537.com',
# 'www.bygt.com.cn',
# 'www.baosight.com',
# 'www.qlssn.com',
# 'www.otic.com.cn',
# 'www.sinopec.com',
# 'www.huake.com',
# 'www.cecsec.cn',
# 'www.fiytagroup.com',
# 'ynxygf.com',
# 'www.fiberhome.com',
# 'www.lida-oe.com',
# 'www.chinanhl.com',
# 'www.chinasufa.com.cn',
# 'www.besttoneh.com',
# 'www.hdenergy.com',
# 'www.belling.com.cn',
# 'www.baosteelpackaging.com',
# 'www.konkamobile.com',
# 'www.nmhdwz.com',
# 'www.eastcom.com',
# 'www.hbny.com.cn',
# 'www.datang.com',
# 'www.sdic.com.cn',
# 'www.scfast.cn',
# 'www.changan.com.cn',
# 'www.zhglb.com',
# 'www.nxyxny.com.cn',
# 'www.poly.com.cn',
# 'www.chinajinmao.cn',
# 'www.panda.cn',
# 'www.bfdh.com.cn',
# 'www.faway.com',
# 'yjjt.norincogroup.com.cn',
# 'www.camce.com.cn',
# 'www.cnpccapital.cn',
# 'www.spichebei.com',
# 'www.minfinance.com.cn',
# 'www.gzghgf.com',
# 'www.nmgpzny.com',
# 'www.greatwall.cn',
# 'www.baoshengcable.com',
# 'www.csrgc.com.cn',
# 'www.ccccltd.cn',
# 'www.jmc.com.cn',
# 'www.ccyd.com.cn',
# 'www.sinoma-ec.cn',
# 'www.powerchina.cn',
# 'www.szcwh.com',
# 'www.twbb.com.cn',
# 'www.crec.cn',
# 'www.cosl.com.cn',
# 'www.ctgdutyfree.com.cn',
# 'www.gzhtdq.com.cn',
# 'www.sinomach-pi.cn',
# 'www.lingyun.com.cn',
# 'www.grandjoy.com',
# 'www.ycig.com',
# 'www.sychem.com',
# 'www.nfc.com.cn',
# 'www.scc.com.cn',
# 'www.cncec.com.cn',
# 'www.ty-magnet.com',
# 'www.taiji.com.cn',
# 'www.aircraft_co.avic.com',
# 'www.sinoma.com.cn',
# 'www.600316.com.cn',
# 'www.crhic.cn',
# 'www.ceovu.com',
# 'www.csair.com',
# 'www.cncm.com',
# 'www.xdect.com.cn',
# 'www.adama.com',
# 'ssc.sinopec.com',
# 'csscholdings.cssc.net.cn',
# 'www.csic446.com',
# 'www.spc.com.cn',
# 'www.cmschina.com',
# 'www.sinomatech.com',
# 'www.cypc.com.cn',
# 'www.sgss.com.cn',
# 'www.avichina.com',
# 'www.cnecc.com',
# 'www.css.com.cn',
# 'www.caeri.com.cn',
# 'www.tianma.cn',
# 'www.gohigh.com.cn',
# 'znfzy.cnadc.com.cn',
# 'www.bjruitai.com',
# 'www.sinoma-tianshan.cn',
# 'www.ceair.com',
# 'www.chinaunicom.com.hk',
# 'www.cdt-re.com',
# 'www.glaruntech.com',
# 'www.kaifa.cn',
# 'www.clypg.com.cn',
# 'www.baosteel.com',
# 'www.aerocom.cn',
# 'www.hdpi.com.cn',
# 'cwcg.cscec.com',
# 'www.minlist.com.cn',
# 'www.jnhi.com',
# 'www.cac-citc.com',
# 'www.dtpower.com',
# 'www.jwgf.com',
# 'www.chinahaihua.com',
# 'www.westone.com.cn',
# 'www.detc.com.cn',
# 'www.eastcompeace.com',
# 'www.sinochemintl.com',
# 'www.hypower.com.cn',
# 'www.crcce.com.cn',
# 'www.cahic.com',
# 'www.travelsky.net',
# 'www.cdc.com.cn',
# 'www.cnbmltd.com',
# 'www.mjsdgs.com',
# 'www.jonhon.cn',
# 'www.polyculture.com.cn',
# 'www.airchina.com',
# 'www.hfdl.aecc.com',
# 'www.cgnnt.com.cn',
# 'www.cmenergyshipping.com',
# 'www.cei1958.com',
# 'www.sac-china.com',
# 'www.northeo.com,www.sicong.com',
# 'www.pgvt.cn',
# 'www.shecc.com',
# 'www.tdtec.com',
# 'www.600795.com.cn',
# 'www.bnbm.com.cn',
# 'www.yangnongchem.com',
# 'www.tjfaw.com',
# 'www.aviconics.com.cn',
# 'www.sumec.com',
# 'www.lanpec.com',
# 'www.xjgc.sgcc.com.cn',
# 'www.spicjl.com',
# 'www.cnoocengineering.com',
# 'cpec.cnpc.com.cn',
# 'www.cgnp.com.cn',
# 'www.aritime.com',
# 'www.cmstd.com.cn',
# 'www.guanhao.com',
# 'www.tec.crrczic.cc',
# 'energy.coscoshipping.com',
# 'development.coscoshipping.com',
# 'www.cnnp.com.cn',
# 'www.crcc.cn',
# 'www.jushi.com',
# 'www.cfhi.com',
# 'www.ctsfreight.com',
# 'www.norinco-intl.com',
# 'www.cmpo1914.com',
# 'www.easpring.com',
# 'www.aviccapital.com',
# 'www.mxdl.com.cn',
# 'www.crrcgc.cc',
# 'www.szaccord.com.cn',
# 'www.naritech.cn',
# 'www.hhi.com.cn',
# 'www.daae.com.cn',
# 'www.jiuguijiu000799.com',
# 'www.4008874005.com',
# 'www.zssw.com',
# 'www.lantaicn.com',
# 'www.crhms.cn',
# 'www.china-csicpower.com.cn',
# 'www.avic-sac.com',
# 'www.sinopecgroup.com',
# 'www.ascf.com.cn',
# 'www.haisum.com',
# 'cenertech.cnooc.com.cn',
# 'www.aerosun.cn',
# 'www.xinxing-pipes.com',
# 'cn.zpmc.com',
# 'www.sdtny.copm',
# 'www.dfac.com',
# 'www.as-hitech.com',
# 'avicopter.avic.com',
# 'flepc.com.cn',
# 'www.kraussmaffei.ltd',
# 'www.hhkj.chemchina.com',
# 'www.bgrimmtec.com',
# 'www.yunnan-copper.com',
# 'www.chinacoalenergy.com',
# 'www.zemic.com.cn',
# 'www.octholding.com',
# 'www.sedind.com',
# 'www.mintungsten.com',
# 'www.hbjir.com',
# 'www.nmghlhltmy.com',
# 'www.sdiccapital.com',
# 'www.chnau99999.com',
# 'www.tontec.cn',
# 'www.chinasatcom.com',
# 'www.chinaecec.com',
# 'www.sdicpower.com',
# 'www.mccchina.com',
# 'www.sinotrans.com',
# 'www.ynlygf.com',
# 'www.complant-ltd.com',
# 'www.hdjse.com.cn',
# 'www.crsc.cn',
# 'www.csicl.com.cn',
# 'www.chgf.com.cn',
# 'www.first-tractor.com.cn',
# 'www.ctc.ac.cn',
# 'www.cmexpressway.com',
# 'www.dongeejiao.com',
# 'www.crhc-culture.com',
# 'www.tyen.com.cn',
# 'www.chinabluechem.com.cn',
# 'www.ceec.net.cn',
# 'www.ncschina.com',
# 'www.coscol.com.cn',
# 'www.sinomach-auto.com',
# 'www.cscec.com',
# 'www.icbc.com.cn,www.icbc-ltd.com',
# 'www.zjky.cn',
# 'www.wzgroup.cn',
# 'www.gree.com.cn',
# 'www.saicmotor.com',
# 'www.scg.com.cn',
# 'www.weichaipower.com',
# 'www.spdb.com.cn',
# 'www.sphchina.com',
# 'www.cmbchina.com',
# 'www.chinaunicom-a.com']
#
# all_data = []
# for url in urls:
# sql2=sql1.replace("[url]",url)
# cursor.execute(sql2)
# result_data = cursor.fetchall()
# for row in tqdm(result_data):
# try:
# rd = {'id': row[0],
# '编码': row[1],
# '网站名称': row[2],
# '栏目名称': row[3],
# '栏目地址': row[4]
# }
# all_data.append(rd)
# except:
# print("查询失败!!")
#
# df_out = pd.DataFrame(data=all_data)
# df_out.to_excel('导出数据.xlsx', engine='xlsxwriter', index=False)
# -*- coding: utf-8 -*-
import pymysql
import pandas as pd
from tqdm import tqdm
import xlsxwriter
import openpyxl
from urllib.parse import urlparse
def pipeiName(qiyedatas):
sql1 = """select id, info_source_code, web_site_name, site_name , site_uri from info_source WHERE web_site_name like '%[name]%' """
cont=1;
qynot=[]
qyin=[]
for qy in qiyedatas:
name=qy['name']
if name is None:
uqynot,uqyin=pipeiURL(qy)
if uqynot:
qynot.append(uqynot[0])
if uqyin:
qyin.append(uqyin[0])
continue
if ''==name:
uqynot,uqyin=pipeiURL(qy)
if uqynot:
qynot.append(uqynot[0])
if uqyin:
qyin.append(uqyin[0])
continue
try:
sql2=sql1.replace("[name]",name)
cursor.execute(sql2)
except Exception as e:
uqynot,uqyin=pipeiURL(qy)
if uqynot:
qynot.append(uqynot[0])
if uqyin:
qyin.append(uqyin[0])
continue
result_data = cursor.fetchall()
if(len(result_data)<1):
uqynot,uqyin=pipeiURL(qy)
if uqynot:
qynot.append(uqynot[0])
if uqyin:
qyin.append(uqyin[0])
else:
cont+=1
print(cont)
qyin.append(qy)
# for row2 in tqdm(result_data):
# try:
# rd = {'id': row2[0],
# '编码': row2[1],
# '网站名称': row2[2],
# '栏目名称': row2[3],
# '栏目地址': row2[4],
# '企业名称': qy['name']
# }
# qyin.append(rd)
# except Exception as e:
# print(e)
# print("查询失败!!"+sql2)
print(qyin)
df_in = pd.DataFrame(data=qyin)
df_in.to_excel('n企业情况在平台中有数据2.xlsx', engine='xlsxwriter', index=False)
print(qynot)
df_out = pd.DataFrame(data=qynot)
df_out.to_excel('n企业情况在平台中没有数据2.xlsx', engine='xlsxwriter', index=False)
def pipeiURL(qy):
uqynot=[]
uqyin=[]
url=qy['url']
sql1 = """select id, info_source_code, web_site_name, site_name , site_uri from info_source WHERE site_uri like '%[url]%' """
if url is None:
uqynot.append(qy)
return uqynot,uqyin
try:
parsed_url = urlparse(url)
domain = parsed_url.netloc
if ''==domain:
uqynot.append(qy)
return uqynot,uqyin
except Exception as e:
uqynot.append(qy)
return uqynot,uqyin
sql2=sql1.replace("[url]",domain)
cursor.execute(sql2)
result_data = cursor.fetchall()
# if(len(result_data)>1):
if(len(result_data)<1):
uqynot.append(qy)
else:
uqyin.append(qy)
# for row2 in tqdm(result_data):
# try:
# rd = {'id': row2[0],
# '编码': row2[1],
# '网站名称': row2[2],
# '栏目名称': row2[3],
# '栏目地址': row2[4],
# '企业名称': qy['name']
# }
# uqyin.append(rd)
# except Exception as e:
# print(e)
# print("查询失败!!"+sql2)
return uqynot,uqyin
if __name__ == '__main__':
# 打开Excel文件
workbook = openpyxl.load_workbook('name.xlsx')
# 获取工作表对象
worksheet = workbook.active
qiyedatas=[]
# 遍历工作表的行
for row in worksheet.iter_rows(values_only=True):
qiyemsg={
'yname':row[0],
'name':row[1],
'url':row[2]
}
qiyedatas.append(qiyemsg)
# 打印每行的数据
# print(row)
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
pipeiName(qiyedatas)
# -*- coding: utf-8 -*-
import pymysql
import pandas as pd
from tqdm import tqdm
import xlsxwriter
import openpyxl
from urllib.parse import urlparse
# 打开Excel文件
workbook = openpyxl.load_workbook('name.xlsx')
# 获取工作表对象
worksheet = workbook.active
qiyedatas=[]
# 遍历工作表的行
for row in worksheet.iter_rows(values_only=True):
qiyemsg={
'yname':row[0],
'name':row[1],
'url':row[2]
}
qiyedatas.append(qiyemsg)
# 打印每行的数据
# print(row)
conn = pymysql.Connect(host='114.116.44.11', port=3306, user='root', passwd='f7s0&7qqtK', db='clb_project',
charset='utf8')
cursor = conn.cursor()
sql1 = """select id, info_source_code, web_site_name, site_name , site_uri from info_source WHERE web_site_name like '%[name]%' """
cont=1;
qynot=[]
qyin=[]
for qy in qiyedatas:
name=qy['name']
if name is None:
qynot.append(qy)
continue
if ''==name:
continue
try:
sql2=sql1.replace("[name]",name)
cursor.execute(sql2)
except Exception as e:
print('异常企业:'+name)
continue
result_data = cursor.fetchall()
if(len(result_data)<1):
qynot.append(qy)
else:
cont+=1
print(cont)
# qyin.append(qy)
for row2 in tqdm(result_data):
try:
rd = {'id': row2[0],
'编码': row2[1],
'网站名称': row2[2],
'栏目名称': row2[3],
'栏目地址': row2[4],
'企业名称': qy['name']
}
qyin.append(rd)
except Exception as e:
print(e)
print("查询失败!!"+sql2)
df_out = pd.DataFrame(data=qyin)
df_out.to_excel('n企业情况在平台中有数据.xlsx', engine='xlsxwriter', index=False)
df_out = pd.DataFrame(data=qynot)
df_out.to_excel('n企业情况在平台中没有数据.xlsx', engine='xlsxwriter', index=False)
# for row in tqdm(result_data):
#
# urls=['www.shtsp.com',
# 'www.onlyedu.com',
# 'www.shanghai-electric.com',
# 'www.skmic.sh.cn',
# 'www.szby.cn',
# 'www.xatourism.com',
# 'www.netac.com.cn',
# 'www.improve-medical.com',
# 'www.shanghaiyatong.com',
# 'www.nsrd.com.cn',
# 'www.400mn.com',
# 'www.efort.com.cn',
# 'www.jbdc.com.cn',
# 'www.wanma-cable.cn',
# 'www.lugangwool.com',
# 'www.cohc.citic',
# 'www.yafco.com',
# 'www.yccable.cn',
# 'www.ctv-media.com.cn',
# 'www.dehong.com.cn',
# 'www.tong-feng.com',
# 'www.bankgy.cn',
# 'www.viti.net.cn',
# 'www.hcsemitek.com',
# 'www.hrdq.cn',
# 'www.jlgsgl.com',
# 'www.cachet.com.cn',
# 'www.alcha.com',
# 'nj-port.com',
# 'www.qt300061.com',
# 'www.rrlgou.com',
# 'www.ronglian.com',
# 'www.oumasoft.com',
# 'www.cseg.cn',
# 'www.fjgs.com.cn',
# 'www.cr-jh.cn',
# 'www.tckg.cn',
# 'htk.hrbrail.cn',
# 'www.hnainfrastructure.com',
# 'www.chinahaiyue.com',
# 'www.nh.com.cn',
# 'www.hwatsing.com',
# 'www.kingenta.com',
# 'www.nantex.com.cn',
# 'www.fuda.com',
# 'www.nbtp.com.cn',
# 'www.shahe.cn',
# 'www.yulonggold.com',
# 'www.hnss.net.cn',
# 'www.hifuture.com',
# 'www.sye.com.cn',
# 'www.xjh-sc.com',
# 'www.jyzs.com.cn',
# 'www.szmeizhi.com',
# 'www.htrd.cn',
# 'www.wushang.com.cn',
# 'www.sfjt.com.cn',
# 'www.swpdi.com',
# 'www.qjtourism.com',
# 'www.bjtyz.com',
# 'www.leaguerme.com',
# 'www.kingsemi.com',
# 'www.dongri.com',
# 'www.cimcvehiclesgroup.com',
# 'www.bocichina.com',
# 'www.inzonegroup.cn',
# 'www.bchtpharm.com',
# 'www.unifull.com',
# 'www.jingui-silver.com',
# 'www.nbocc.com',
# 'www.zjzhongda.com',
# 'www.zwhlgroup.com',
# 'www.cebbank.com',
# 'www.zhonghongmedical.com',
# 'www.zoje.com',
# 'www.geovis.com.cn',
# 'www.capg.com.cn',
# 'www.crpcc.com.cn',
# 'www.cs.ecitic.com',
# 'www.nesc.cn',
# 'www.ellingtonpcb.com',
# 'www.sanjing.com.cn',
# 'www.hi-expressway.com',
# 'www.joyware.com',
# 'www.yuneng.com.cn',
# 'jngxfz.com',
# 'www.rtxc.com',
# 'www.jsxq.com',
# 'www.jsgaoke.com',
# 'www.cre8direct.cn',
# 'www.qijing-m.com',
# 'www.yaboo-cn.com',
# 'www.cictmobile.com',
# 'www.cqgt.cn',
# 'san-mu.com',
# 'www.zygs.com',
# 'www.hntz.com.cn',
# 'www.gemac-cn.com',
# 'www.ivo.com.cn',
# 'www.cdbio.cn',
# 'www.qdfood.com',
# 'www.sacredsun.cn',
# 'www.glsc.com.cn',
# 'www.bohai-water.com',
# 'www.nsig.com',
# 'www.ticw.com.cn',
# 'www.jjtz.com',
# 'www.fmsh.com',
# 'www.hirisun.com',
# 'www.bjtkgd.com',
# 'www.gj000096.com',
# 'www.ddmcgroup.cn',
# 'www.chinaxaperi.com',
# 'www.xjxlgf.com.cn',
# 'www.xcsg.cn',
# 'www.omnijoi.com',
# 'www.xakaili.com',
# 'www.tbjijian.com',
# 'www.sztechand.com.cn',
# 'www.xhchem.com',
# 'www.cbmt.com.cn',
# 'www.gsgf.com',
# 'www.hisunplas.com',
# 'www.cimc.com',
# 'www.cqggf.com.cn',
# 'www.afrlaser.com',
# 'www.dezhanhealthcare.com',
# 'www.minfa.com',
# 'www.fjzbgf.com',
# 'www.fjcement.com',
# 'www.hhink.com',
# 'www.hbjnhg.com',
# 'www.baiyang.com',
# 'www.cpttg.com.cn',
# 'www.jxcy.com.cn',
# 'www.bhgmall.com.cn',
# 'www.eastsoft.com.cn',
# 'www.dareway.com.cn',
# 'www.sxsanwei.com',
# 'www.hncde.cn',
# 'www.dlrd.com',
# 'www.dirui.com.cn',
# 'www.dxzq.net',
# 'www.lzbank.com',
# 'www.mdep.com.cn',
# 'www.ahtgtb.com',
# 'www.guanfu.com',
# 'www.jszs-group.com',
# 'www.ebjb.com',
# 'www.sunwill.com.cn',
# 'www.gdep.com.cn',
# 'www.gsfins.com',
# 'www.600083.com',
# 'www.htw.cn',
# 'www.china-boya.com',
# 'www.holsin.cn',
# 'www.fhzy.cn',
# 'www.nyocor.com',
# 'tech.csg.cn',
# 'www.bringspring.com',
# 'www.zh-echem.com',
# 'www.crcchem.com',
# 'www.longgaogf.com',
# 'www.sinotherapeutics.com',
# 'www.cesgroup.com.cn',
# 'www.shdyyy.com.cn',
# 'www.shmetro.com',
# 'www.worldunion.com.cn',
# 'www.jsti.com',
# 'www.szweiye.com',
# 'www.szmtc.com.cn',
# 'www.scjd.cn',
# 'www.tfzq.com',
# 'www.tielingnewcity.com.cn',
# 'www.cset.ac.cn',
# 'www.sunnyloantop.cn',
# 'www.vanfund.cn',
# 'www.chinayaxing.com',
# 'www.wrcb.com.cn',
# 'www.600168.com.cn',
# 'www.xacbank.com',
# 'www.goldwind.com.cn',
# 'www.vvgroup.com',
# 'www.roadrover.cn',
# 'www.tellus.cn',
# 'www.sdses.com',
# 'www.ytsc.cn',
# 'www.tande.cn',
# 'www.600773sh.com',
# 'www.lixinner.com',
# 'www.bankofchangsha.com',
# 'www.zjcbcm.com',
# 'www.zhefuet.com',
# 'www.ywgf.cn',
# 'www.zftc.net',
# 'www.zjjgf.com',
# 'www.zhangzidao.com',
# 'www.e-tongcheng.com',
# 'www.conbapharm.com',
# '10.150.48.9',
# 'www.izpec.com',
# 'www.cicc.com',
# 'www.chinastock.com.cn',
# 'www.znhi.com.cn',
# 'www.jiuhuashan.cc',
# 'www.winallseed.com',
# 'ahhui-expressway.net',
# 'www.bossco.cc',
# 'www.gzzjsy.com',
# 'www.hongtastock.com',
# 'www.dazhitech.com',
# '000509.ipo.hk',
# 'www.qxzs.com',
# 'www.lzminbai.com.cn',
# 'www.lzzhuangyuan.com',
# 'www.mosopower.com',
# 'www.nmgxhfxjt.com',
# 'www.sunseagroup.com',
# 'www.rzpcl.com',
# 'www.kaiwenedu.com',
# 'www.avicuas.com',
# 'www.jinsenforestry.com',
# 'www.gpedcl.com',
# 'www.njuae.cn',
# 'www.originwater.com',
# 'www.motic-electric.com',
# 'www.huatongreli.com',
# 'www.dfzq.com.cn',
# 'www.td300321.com',
# 'www.ahjnhg.com',
# 'www.xtc-xny.com',
# 'www.qdtnp.com',
# 'www.sgsbgroup.com',
# 'www.xhmedia.com',
# 'www.sh-sfc.com',
# 'www.shtdcy.com',
# 'www.fd-zj.com',
# 'www.wkyy.com',
# 'www.sdlomon.com',
# 'www.newssc.org',
# 'cs.xunyou.com',
# 'www.whghjt.com',
# ' www.xjxbmy.com',
# 'www.swhygh.com',
# 'www.geron-china.com',
# 'www.yb-zy.com',
# 'www.zjwh.com.cn',
# 'www.tangde.com.cn',
# 'www.crscl.com.cn',
# 'www.insigma.com.cn',
# 'www.cs-grkj.com',
# 'www.psbc.com',
# 'www.csgholding.com',
# 'www.e-chinalife.com',
# 'bank.ecitic.com',
# 'www.cqlummy.com',
# 'www.dlg-expo.com',
# 'www.aadri.com',
# 'www.ghkg000529.com',
# 'www.hzbank.com.cn',
# 'www.hangjintechnology.com',
# 'www.hljjt.com',
# 'www.nxz.com.cn',
# 'www.csrcbank.com',
# 'www.jscnnet.com',
# 'www.jsbchina.cn',
# 'www.zjrcbank.com',
# 'www.hrflanges.com',
# 'www.lander.com.cn',
# 'www.xmgw.com.cn',
# 'www.sdcbcm.com',
# 'www.foundersc.com',
# 'www.sunvim.com',
# 'www.guangyuyuan.com',
# 'www.ghzq.com.cn',
# 'www.hljcbgf.com',
# 'www.crmicro.com',
# 'www.hy-online.com',
# 'www.jiawei.com',
# 'www.saintycorp.com',
# 'www.jrjkg.com.cn',
# 'www.dinghantech.com',
# 'www.semifg.com',
# 'www.srcb.com',
# 'www.tongyuheavy.com',
# 'www.chinalihu.com',
# 'www.amec-inc.com',
# 'www.zoomlion.com',
# 'www.hlxl.com',
# 'www.bbcm.com.cn',
# 'www.ntfan.com',
# 'www.molonggroup.com',
# 'www.bosuntools.com',
# 'www.s10000.com',
# 'www.gtja.com',
# 'www.omhgroup.com',
# 'www.hxb.com.cn',
# 'www.saimo.cn',
# 'www.zjfzgroup.com',
# 'www.longking.com.cn',
# 'www.informrack.com',
# 'www.bosc.cn',
# 'www.cecm.com.cn',
# 'www.zmee.com.cn',
# ' www.ccnew.com ',
# 'www.szclou.com',
# 'www.chengduair.com',
# 'www.zts.com.cn',
# 'www.cofcoet.com',
# 'www.zhengyee.com',
# 'www.bd-zg.com',
# 'www.rainbowco.com.cn',
# 'www.bjcx.cn',
# 'www.xbcy.nx.cn',
# 'www.cylico.com',
# 'www.yundingkeji.cn',
# 'www.gbdz.net',
# 'www.th600281.cn',
# 'www.pdjs.com.cn',
# 'www.xjxfkj.com',
# 'www.zfsycf.com.cn',
# 'www.600228.net',
# 'www.sxbctv.com',
# 'www.sd-wit.com',
# 'www.hzrdjt.com',
# 'www.guofeng.com',
# 'www.jdee.com.cn',
# 'www.hfgf.cn',
# 'www.foton.com.cn',
# 'www.dongjiang.com.cn',
# 'zhujiangpijiu.tmall.com',
# 'www.jin-fu.cn',
# 'www.yanzhoucoal.com.cn',
# 'www.wxm.com',
# 'www.cggc.cn',
# 'www.chinagci.com',
# 'www.jx-blackcat.com',
# 'www.tfny.com',
# 'www.kkcc.com.cn',
# 'www.hengshuilaobaigan.net',
# 'www.smsc.sh.cn',
# 'www.hbkangxin.com.cn',
# 'www.000899.com',
# 'www.rundamedical.com',
# 'www.fjec.com.cn',
# 'www.luckyfilm.com.cn',
# 'www.ansteel.com.cn',
# 'www.rendongholdings.com',
# 'www.toppcb.com',
# 'www.reht.com',
# 'www.portshanghai.com.cn',
# 'scxcdl.com',
# 'www.96335.com',
# 'www.bbmg.com.cn',
# 'www.dahaobj.com',
# 'www.hayao.com',
# 'www.moon-tech.com',
# '210.5.145.194',
# 'www.weldatlantic.com',
# 'www.tvzone.cn',
# 'www.cutc.com.cn',
# 'www.china-htdl.com',
# 'www.jinshiyuan.com.cn',
# 'www.jiangong.com.cn',
# 'www.hnair.net',
# 'www.sxjh.com.cn',
# 'www.gzport.com',
# 'pearlriveryq.tmall.com',
# 'www.hdbp.com',
# 'mecc.sinosteel.com',
# 'www.newworld-china.com',
# 'www.hebgtgf.com',
# 'www.xiangyu.cn',
# 'www.conch.cn',
# 'www.cygs.com',
# 'www.swsc.com.cn',
# 'www.hangyang.com',
# 'www.helichina.com',
# 'www.gybys.com.cn',
# 'www.jlyy1999.com',
# 'www.honglitronic.com',
# 'www.grgbanking.com',
# 'www.xzmbgf.com',
# 'www.yilida.com',
# 'www.westsecu.com',
# 'www.tefafuwu.com',
# 'www.sinopack.com.cn',
# 'www.jsleasing.cn',
# 'www.Gsysgf.com',
# 'www.baoti.com',
# 'www.citicguoaninfo.com',
# 'www.scte.com.cn',
# 'www.whchem.com',
# 'www.scmc-xa.com',
# 'cn.changhong.com',
# 'www.angelyeast.com',
# 'www.hbklgroup.cn',
# 'www.scimee.com',
# 'www.desaysv.com',
# 'www.zthj.com',
# 'www.shaaas.com',
# '218.22.147.82',
# 'www.jlsg.com.cn',
# 'www.iflytek.com',
# 'www.yuedainvest.com',
# 'www.chengzhi.com.cn',
# 'www.jxgtyjy.com',
# 'www.bhcc.cn',
# 'www.gssok.com',
# 'www.whzb.com',
# 'www.citichmc.com',
# 'www.artall.com.cn',
# 'www.china-ftz.com',
# 'www.jiuzhoutech.com.cn',
# 'www.desaybattery.com',
# 'www.zhenye.com',
# 'www.jinjianghotels.sh.cn',
# 'www.fawer.com.cn',
# 'www.chenmingpaper.com',
# 'www.start.com.cn',
# 'www.jhgf.com.cn',
# 'www.xztianlu.com',
# 'www.jljgdq.cn',
# 'www.300188.cn',
# 'www.slkg1949.com',
# 'www.gujing.com',
# 'www.jsjsyh.com',
# 'www.baiyunairport.com',
# 'www.gzr.com.cn',
# 'www.wnq.com.cn',
# 'www.xiamenairport.com.cn',
# 'www.ymhg.com.cn',
# 'www.tjkjsy.com.cn',
# 'www.jlucdi.com',
# 'www.hxfz.com.cn',
# 'www.naura.com',
# 'www.unistrong.com',
# 'www.people.cn',
# 'www.wenergy.cn',
# 'www.navinfo.com',
# 'www.hnfzgf.com',
# 'www.shibeiht.com',
# 'www.abchina.com',
# 'www.gmdi.cn',
# 'www.boc.cn',
# 'ny.csg.cn',
# 'www.chinacnd.com',
# 'www.lxct.cn',
# 'www.hengyun.com.cn',
# 'www.qingshanpaper.com',
# 'www.e-cbest.com',
# 'www.cnyeic.com',
# 'www.cqcy.com',
# 'www.chinawuyi.com.cn',
# 'www.first-panel.com',
# 'www.snxhchem.com',
# 'www.ebscn.com',
# 'www.china-hbp.com',
# 'www.xmklm.com.cn',
# 'www.gsrc.com',
# 'www.gsgczx.cn',
# 'www.cpepgc.com',
# 'www.hwgf757.com',
# 'www.moutaichina.com',
# 'www.fspg.com.cn',
# 'www.bez.com.cn',
# 'www.inspur.com',
# 'www.inesa-it.com',
# 'www.kingyork.biz',
# 'www.chinahitech.com.cn',
# 'www.sanyou-chem.com.cn',
# 'www.jdcmoly.com',
# 'www.lzgf.cn',
# 'www.lshec.com',
# 'www.pub.citic.com',
# 'www.bjca.cn',
# 'www.motimo.com',
# 'www.bluestar-adisseo.com',
# 'www.zgpgc.com',
# 'www.shanghaidragon.com.cn',
# 'www.itg.com.cn',
# 'www.sc-aaa.com',
# 'www.nnsugar.com',
# 'www.600064.com',
# 'www.luxichemical.com',
# 'www.zzce.com.cn',
# 'www.hbshkj.cn',
# 'www.liugong.com',
# 'www.xjpharma.com',
# 'www.xdtz.net',
# 'www.tsingtao.com.cn',
# 'www.cdgxfz.com',
# 'www.shanghai-electric.com',
# 'www.wxtj.com',
# 'www.xinye-tex.com',
# 'www.chinasec.cn',
# 'gufen.luckyfilm.com.cn',
# 'www.szwg.com',
# 'www.hnboyun.com.cn',
# 'www.szmicrogate.com',
# 'www.i-scip.com',
# 'www.yatai.com',
# 'www.ljz.com.cn',
# 'www.hucd.cn',
# 'www.sxzq.com',
# 'www.qdhuaren.com',
# 'www.siti.com.cn',
# 'www.scrbc.com.cn',
# 'www.flzc.com',
# 'www.jx9394.com',
# 'www.cdgas.com',
# 'www.xingfagroup.com',
# 'www.tjsjgf.com.cn',
# 'www.san-huan.com.cn',
# 'www.cqgasgis.com',
# 'www.baoguang.com.cn',
# 'www.lishengpharma.com',
# 'www.arcplus.com.cn',
# 'www.binhaienergy.com',
# 'www.sdjlky.com',
# 'www.zthx.com',
# 'www.lshfz.com.cn',
# 'www.jingnengpower.com',
# 'www.ppm.cn',
# 'www.shenhuo.com',
# 'www.silverbasis.com',
# 'www.chinawindey.com',
# 'www.jialing.com.cn',
# 'www.catec-ltd.cn',
# 'www.jzjt.com',
# 'www.chalkistomato.com',
# 'www.huatian-hotel.com',
# 'www.xhzy.com',
# 'www.shantui.com',
# 'www.chinahuamao.net',
# 'www.sugon.com',
# 'www.mogao.com',
# 'www.valin.cn',
# 'www.ahhymd.com.cn',
# 'www.jei.com.cn',
# 'www.bjhhny.com',
# 'www.liuguo.com',
# 'www.easy-visible.com',
# 'www.xemc.com.cn',
# 'www.jinbei.com.cn',
# 'www.gzgsgf.com.cn',
# 'www.600373.com.cn',
# 'www.dhseed.com',
# 'www.up-china.com',
# 'www.ght-china.com',
# 'www.c-wmm.com',
# 'www.712.cn',
# 'www.czdh.chemchina.com',
# 'www.dgholdings.cn',
# 'www.ykplc.com',
# 'www.cecep.cn',
# 'www.njtc.com.cn',
# 'www.qfcgroup.com',
# 'www.ymnygf.com',
# 'www.zncmjt.com',
# 'www.jianfeng.com.cn',
# 'www.zzepc.com.cn',
# 'www.chengda.com.cn',
# 'www.cindare.com',
# 'www.slpharm.com.cn',
# 'www.ycne.com.cn',
# 'www.brightdairy.com',
# 'www.meichen.cc',
# 'www.jxcgc.com',
# 'www.sanbian.cn',
# 'www.sino-platinum.com.cn',
# 'www.copote.com',
# 'www.gzjiulian.com',
# 'www.tfxingfujia.com',
# 'www.hg-oa.com',
# 'www.beijingnorthstar.com',
# 'www.nfzje.com',
# 'www.hakim.com.cn',
# 'www.hqbeer.com',
# 'www.cnpubc.com',
# 'www.shengheholding.com',
# 'www.pku-hc.com',
# 'www.sanxia.com',
# 'www.zzbank.cn',
# 'www.casit.com.cn',
# 'www.shpdjq.com',
# 'www.xntsgs.com',
# 'www.zjorient.com',
# 'www.capitalwater.cn',
# 'www.mfspchina.com',
# 'www.elecspn.com',
# 'www.glasstex.cn',
# 'www.cpic.com.cn',
# 'www.pingan.cn',
# 'www.tlys.cn',
# 'www.hgbdzhyl.com',
# 'www.stec.net',
# 'www.lpht.com.cn',
# 'www.ezjzy.com',
# 'www.sdec.com.cn',
# 'www.szap.com',
# 'www.cqphar.com',
# 'www.szgas.com.cn',
# 'facs.com.cn',
# 'www.gac.com.cn',
# 'www.changshantex.com',
# 'www.newwf.com',
# 'www.000607.cn',
# 'www.ctsec.com',
# 'www.ecsponline.com',
# 'www.dynagreen.com.cn',
# 'www.grandblue.cn',
# 'www.daqintielu.com',
# 'www.portqhd.com',
# 'www.sanyuan.com.cn',
# 'www.sypglass.com',
# 'www.hx168.com.cn',
# 'www.athub.com',
# 'www.ygsoft.com',
# 'www.chinaddn.com',
# 'yqmy.ymjt.com.cn',
# 'www.jiebai.com',
# 'www.zjjiaoke.com',
# 'www.htsec.com',
# 'www.guilintravel.com',
# 'www.htsc.com.cn',
# 'www.cnooc.com.cn',
# 'www.spacesat.com.cn',
# 'www.hua-yi.cn',
# 'www.starlake.com.cn',
# 'www.xj-tianye.com',
# 'www.boe.com',
# 'www.longjianlq.com',
# 'www.shenhuachina.com',
# 'www.sinovatio.com',
# 'www.chinayanghe.com',
# 'www.wz-zhongheng.com',
# 'www.hbyl.cn',
# 'www.tayho.com.cn',
# 'www.castech.com',
# 'www.foundertech.com',
# 'www.hua-ying.com',
# 'www.csig158.com',
# 'www.leadmanbio.com',
# 'www.highly.cc',
# 'www.kjtbao.com',
# 'www.hrtn.com.cn',
# 'www.jsjnsw.com',
# 'www.chinazhjt.com.cn',
# 'www.xindeco.com',
# 'www.intmedic.com',
# 'pvc.conch.cn',
# 'www.cjtz.cn',
# 'www.fengle.com.cn',
# 'www.shoukaigufen.com',
# 'www.aucma.com',
# 'www.shxcoal.com',
# 'www.soyea.com.cn',
# 'www.ccrq.com.cn',
# 'www.hac.com.cn',
# 'www.ncfc.cn',
# 'www.xinsteel.com.cn',
# 'www.energas.cn',
# 'www.tcsw.com.cn',
# 'www.cchbds.com.cn',
# 'www.gimc.cn',
# 'www.thvow.com',
# 'www.sdecl.com.cn',
# 'www.600689.com',
# 'www.gdgzrb.com',
# 'demo.cge.com.cn',
# 'www.smtcl.com',
# 'www.topnewinfo.cn',
# 'www.phoenix.com.cn',
# 'www.szwuye.com.cn',
# 'www.hitech-develop.com',
# 'www.ntjtc.com',
# 'hxdq.hisense.com',
# 'www.meiling.com',
# 'www.fenjiu.com.cn',
# 'www.lggf.com.cn',
# 'www.myorbita.net',
# 'wrgold.cn',
# 'www.tqcc.cn',
# 'www.citymedia.cn',
# 'www.sndnt.com',
# 'www.grandbuy.com.cn',
# 'www.chalieco.com.cn',
# 'www.bucid.com',
# 'www.xjqscc.com',
# 'www.tjcep.com',
# 'www.scacc.com',
# 'www.sinotruk.com',
# 'www.hgtech.com.cn',
# 'www.fenghua-advanced.com',
# 'www.nantian.com.cn',
# 'www.hisense.com',
# 'www.gztyre.com',
# 'www.opg.cn',
# 'www.yggf.com.cn',
# 'www.bjev.com.cn',
# 'www.hjjs.com',
# 'www.topway.com.cn',
# 'www.sxjgkg.com',
# 'www.shaanxigas.com',
# 'holitech.net',
# 'www.gngf.cn',
# 'www.nonfemet.com',
# 'www.yunnanbaiyao.com.cn',
# 'www.bxlq.com',
# 'www.gepiced.com',
# 'www.yuegui.cn',
# 'www.bbwport.cn',
# 'www.yyth.com.cn',
# 'www.yuexiu-finance.com',
# 'www.wuliangye.com.cn',
# 'www.petrochina.com.cn',
# 'www.ahtrq.com',
# 'www.cxtc.com',
# 'www.lubeichem.com',
# 'www.sgmg.com.cn',
# 'www.600796.com',
# 'www.zzdc.com.cn',
# 'www.jsgxgf.com',
# 'www.yyxc0819.com',
# 'www.tedastock.com',
# 'www.changchai.com',
# 'www.nj-public.com',
# 'gf.tongrentang.com',
# 'www.htchuav.com',
# 'www.000793.com',
# 'www.cnzgc.com',
# 'www.hnhlc.com',
# 'www.xiagong.com',
# 'www.qingdao-port.com',
# 'www.hngoldcorp.com',
# 'www.zhongtong.com',
# 'www.hghngroup.com',
# 'www.dggf.cn',
# 'www.cecl.com.cn',
# 'www.nexhome.cn',
# 'www.sh600649.com',
# 'www.chinasthc.com',
# 'www.tagen.cn',
# 'www.yimingroup.com',
# 'www.cqrcb.com',
# 'www.dongfangelec.com',
# 'www.sinodmc.com',
# 'www.shenergy.net.cn',
# 'www.xjjtjt.com',
# 'www.jzz.cn',
# 'www.yngreen.com',
# 'www.tik.com.cn',
# 'www.dhidcw.com',
# 'www.fjhxhb.com',
# 'www.xinhuanet.com',
# 'www.sidlgroup.com',
# 'www.zjgold.com',
# 'www.taiji.com',
# 'www.cqsxsl.com',
# 'www.bthhotels.com',
# 'www.picc.com',
# 'www.999.com.cn',
# 'home.cofcotunhe.com',
# 'www.gzlnholdings.com',
# 'tgbx.tisco.com.cn',
# 'www.xinzhu.com',
# 'www.zjhengshun.com',
# 'www.yunneidongli.com',
# 'www.weichaihm.com',
# 'acbc.com.cn',
# 'www.shidaiwanheng.com',
# 'www.wxrope.com',
# 'bldmo.blemall.com',
# 'www.hisunpharm.com',
# 'www.sz-expressway.com',
# 'www.chasesun.cn',
# 'www.bgctv.com.cn',
# 'www.c-wst.com',
# 'www.whty.com.cn',
# 'www.fnorient.com',
# 'www.chinagwe.com',
# 'www.yantangmilk.com',
# 'www.600608.net',
# 'www.lhpharma.com',
# 'www.shanghaipower.com',
# 'www.stocke.com.cn',
# 'www.inspur.com',
# 'www.600757.com.cn',
# 'www.chinatelling.com',
# 'www.benefo.tj.cn',
# 'www.chinaadvance.com',
# 'www.sopo.com.cn',
# 'www.xyzq.com.cn',
# 'www.butone.com',
# 'www.fudanfuhua.com',
# 'www.jjmy.cn',
# 'www.szibr.com',
# 'www.hirub.cn',
# 'www.ntjhzy.com',
# 'www.600269.cn',
# 'www.ciwen.com.cn',
# 'www.cytsonline.com',
# 'www.infinova.com.cn',
# 'www.gtiggm.com',
# 'www.scfrkj.cn',
# 'www.nupmg.com',
# 'www.cecep.cn',
# 'www.globalprinting.cn',
# 'www.winshare.com.cn',
# 'www.jlcfc.com',
# 'www.westmining.com',
# 'www.0507.com.cn',
# 'www.taloph.com',
# 'www.zhongxinp.com',
# 'www.guosen.com.cn',
# 'www.fsgas.com',
# 'www.sns-china.com',
# 'www.leaguer.com.cn',
# 'www.daangene.com',
# 'www.jsac.com.cn',
# 'www.xnskg.cn',
# 'www.cogogl.com.hk',
# 'www.sdlqgf.com',
# 'www.000551.cn',
# 'www.zgdygf.com',
# 'www.wwgf.com.cn',
# 'www.jac.com.cn',
# 'www.xsmd.cc',
# 'www.hbctgs.com',
# 'www.jsne.com.cn',
# 'www.camc.cc',
# 'www.grgtest.com',
# 'www.ghtchina.com',
# 'www.shkdchem.com',
# 'www.eascs.com',
# 'www.e-ande.com',
# 'www.qdzzzc.com',
# 'www.china-ceco.com',
# 'www.shjb600838.com',
# 'www.greedc.com',
# 'www.ahccjg.com.cn',
# 'www.ged.com.cn',
# 'www.cwjt.com',
# 'www.cssd.com.cn',
# 'www.sggf.com.cn',
# 'www.chinatypical.com',
# 'www.bhpiston.com',
# 'www.blower.cn',
# 'www.baose.com',
# 'www.guhan.com',
# 'www.pmta.com.cn',
# 'www.ytl.com.cn',
# 'www.dfzk.com',
# 'www.sxbychem.com',
# 'www.prolto.com',
# 'www.jmjj.com',
# 'www.siasun.com',
# 'www.zhzb.cecep.cn',
# 'www.sanju.cn',
# 'www.shaoneng.com.cn,www.sn0601.com',
# 'www.ldjt.com.cn',
# 'www.weifu.com.cn',
# 'www.nfcb.com.cn',
# 'www.600617.com.cn',
# 'www.ankai.com',
# 'www.thunis.com',
# 'www.asiastarbus.com',
# 'www.qjwater.com',
# 'www.chinaemd.com',
# 'www.hl-hengsheng.com',
# 'www.cnqjyy.com',
# 'www.cndl155.com',
# 'www.600822sh.com',
# 'www.600463.com.cn',
# 'www.aceg.com.cn',
# 'www.sdgi.com.cn',
# 'www.000803.com',
# 'xgjx.xcmg.com',
# 'www.nafine.com',
# 'www.yanjing.com.cn',
# 'www.tjsemi.com',
# 'www.hbiszy.com',
# 'www.hacl.cn',
# 'www.orientscape.com',
# 'www.hnsc.cc',
# 'www.tus-est.com',
# 'www.press-mart.com',
# 'www.scmeif.com',
# 'www.cisri-gaona.com.cn',
# 'www.fengguang.com',
# 'www.guangjipharm.com',
# 'www.oie.com.cn',
# 'www.bhferry.com',
# 'www.bnmc.com',
# 'www.changyu.com.cn',
# 'www.gdg.com.cn',
# 'www.huapengglass.com',
# 'www.eal-ceair.com',
# 'www.shenma.com',
# 'www.ihandy.cn',
# 'www.gxlcwater.com',
# 'www.gfx.com.cn',
# 'www.cncqsw.com',
# 'www.yinhai.com',
# 'www.pjgf.cn',
# 'www.magang.com.cn',
# 'www.magang.com.hk',
# 'www.wjwlg.com',
# 'www.chinacdi.com',
# 'www.95559.com.cn',
# 'www.jxcc.com',
# 'www.vanke.com',
# 'www.ccb.com',
# 'www.sinomach-he.cn',
# 'www.newchinalife.com',
# 'www.cofco.com',
# 'www.bxsteel.com',
# 'www.xntg.com',
# 'www.qinchuan.com',
# 'www.000860.com',
# 'www.tymqh.com',
# 'www.lzlj.com',
# 'www.jinfengwine.com',
# 'www.luanhn.com',
# 'www.njyy.com',
# 'www.hzsteel.com',
# 'www.nhbiogroup.com',
# 'www.lzhg.cn',
# 'www.chinacrt.com',
# 'www.sunasia.com',
# 'www.snbc.cn',
# 'www.ybty.com',
# 'www.gqy.com.cn',
# 'www.snm.gd.cn',
# 'www.shkg.com.cn',
# 'www.ssgf.com.cn',
# 'www.qrcb.com.cn',
# 'www.hengfengpaper.com',
# 'www.sec.com.cn',
# 'www.jishimedia.com',
# 'www.pharmglass.com',
# 'www.zyddcm.com',
# 'www.600844.com',
# 'www.zte.com.cn',
# 'www.600780.com.cn',
# 'www.huajinct.com',
# 'www.feidaep.com',
# 'www.citicsteel.com',
# 'www.dtmy.com.cn',
# 'www.king1.com.cn',
# 'www.qhyhgf.com',
# 'www.gsafety.com',
# 'www.yonglin.com',
# 'www.jznygf.com',
# 'www.gd-goworld.com',
# 'www.cnsjy.com',
# 'www.shanghaimaling.com',
# 'www.hbcoal.com',
# 'www.duzhepmc.com',
# 'www.hbyh.cn',
# 'www.nationstar.com',
# 'www.jtport.com.cn',
# 'www.smgjny.com',
# 'www.600633.cn',
# 'www.roadmaint.com',
# 'www.token-ito.com',
# 'www.doublestar.com.cn',
# 'www.yasheng.com.cn',
# 'www.yapp.com',
# 'www.eurasiagroup.com.cn',
# 'www.shchinafortune.com',
# 'www.gyzq.com',
# 'www.blackpeony.com',
# 'nb.nnbh.cn',
# 'www.skiad.com.cn',
# 'www.lonkey.com.cn',
# 'www.hazq.com',
# 'www.ncpc.cn',
# 'www.zmd.com.cn',
# 'www.tyhi.com.cn',
# 'www.lierchem.com',
# 'www.sh-shenda.com',
# 'www.yantian-port.com',
# 'www.palm-la.com',
# 'www.simei.cc',
# 'www.shinva.net',
# 'www.mgtv.com',
# 'www.chinesekk.com',
# 'www.suntien.com',
# 'www.shenzhou-gaotie.com',
# 'www.nbmc.com.cn',
# 'www.luyin.cn',
# 'www.sclth.com',
# 'www.shaangu.com',
# 'www.chinalanhua.com',
# 'www.huakongseg.com.cn',
# 'www.chinafsl.com',
# 'www.sunward.com.cn',
# 'www.lpxdgf.cn',
# 'www.gdsdej.com',
# 'www.jtlfans.com',
# 'www.guangrigf.com',
# 'www.zmj.com',
# 'www.shanghaizhongyida.com',
# 'www.wasu.com',
# 'www.jymdgs.com',
# 'www.ls.com.cn',
# 'www.langold.com.cn',
# 'ylgf.chinalco.com.cn',
# 'www.ht-saae.com',
# 'www.cgnmc.com',
# 'www.highhope.com',
# 'www.segcl.com.cn',
# 'www.nbport.com.cn',
# 'www.aysteel.com.cn',
# 'www.tianjin-port.com',
# 'www.zhangzepower.com',
# 'www.xjtrry.com',
# 'www.chinafirstpencil.com',
# 'www.dlport.cn',
# 'www.cccgroup.com.cn',
# 'www.hunan-huasheng.com',
# 'www.furielec.com',
# 'www.hndayou.com.cn',
# 'www.gosinoic.com',
# 'www.shuangtafood.com',
# 'www.hnnlmb.com',
# 'www.wfj.com.cn',
# 'www.ccht.jl.cn',
# 'www.0898hq.com',
# 'www.netposa.com',
# 'www.doublecoinholdings.com',
# 'www.baoganggf.com',
# 'www.jingchenggf.com.cn',
# 'www.hasco-group.com',
# 'www.shaoxingwine.com.cn',
# 'www.sinosoft.com.cn',
# 'www.b-raymedia.com',
# 'www.zpug.net',
# 'www.fenghuo.cn',
# 'www.bocd.com.cn',
# 'www.jisco.cn',
# 'www.masonled.com',
# 'www.qingxin.com.cn',
# 'www.elht.com',
# 'www.fczy.com',
# 'www.jinlinghotel.com',
# 'www.jxhcsy.com',
# 'www.andty.com',
# 'www.pinggao.com',
# 'www.lsrfzy.com',
# 'www.yhwins.com',
# 'www.chxz.com',
# 'www.laibao.com.cn',
# 'www.jianghang.com',
# 'www.quanjude.com.cn',
# 'www.dldc.com.cn',
# 'www.bailu.com',
# 'www.jsexpressway.com',
# 'www.lkpc.com',
# 'www.liusteel.com',
# 'www.gzgdwl.com',
# 'www.xinsai.com.cn',
# 'www.jdsn.com.cn',
# 'www.cdxrec.com',
# 'www.acdi.ah.cn',
# 'www.lmz.com.cn',
# 'www.cnhuafas.com',
# 'www.hfbh.com.cn',
# 'www.cceg.cn',
# 'www.xjyilite.com',
# 'www.sdhjgf.com.cn',
# 'www.zzpzh.com',
# 'www.xnkf.com',
# 'www.china-meili.com',
# 'www.tyjd.cc',
# 'www.thtf.com.cn',
# 'www.fdm.com.cn',
# 'www.sun-create.com',
# 'www.cgws.com',
# 'www.fsg.com.cn',
# 'www.600683.com',
# 'www.sinopharmholding.com',
# 'www.zhixindianqi.com',
# 'www.lypower.com',
# 'www.yypaper.com',
# 'www.tiantanbio.com',
# 'www.shyndec.com',
# 'www.tpv-tech.com',
# 'www.raycuslaser.com',
# 'www.torchcn.com',
# 'www.czst.com',
# 'www.fawjiefang.com.cn',
# 'www.cmie.csic.com.cn',
# 'zbhj.norincogroup.com.cn',
# 'www.yinglitechem.com',
# 'www.wsdl.com.cn',
# 'www.hlbn.cc',
# 'www.cydl.com.cn',
# 'www.polycn.com',
# 'www.qm.cn',
# 'www.khjt.com.cn',
# 'www.tianhong.cn',
# 'www.griam.cn',
# 'www.dec-ltd.cn',
# 'www.cccgreg.com',
# 'www.cofc.com.cn',
# 'www.segroup.cn',
# 'www.xjgt.com',
# 'www.avicem.com',
# 'www.aaec.com.cn',
# 'www.aeolustyre.com',
# 'bhgf.norincogroup.com.cn',
# 'www.gzqydl.cn',
# 'www.hmavic.com',
# 'www.meheco.com',
# 'www.aisino.com',
# 'www.jinxiaxle.com',
# 'www.accelink.com',
# 'www.chalco.com.cn',
# 'www.wutos.com',
# 'www.casic-addsino.com',
# 'www.huajinchem.com',
# 'www.jemlc.com',
# 'www.sdiczl.com',
# 'www.zdydep.com',
# 'comec.cssc.net.cn',
# 'www.camsl.com',
# 'www.cmsk1979.com',
# 'www.002163.com',
# 'www.trp.com.cn',
# 'www.chinatelecom-h.com',
# 'tech.coscoshipping.com',
# 'hold.coscoshipping.com',
# 'www.guotone.com',
# 'www.cmreltd.com',
# 'www.dcpc.com',
# 'www.dfmg.com.cn',
# 'www.hikvision.com',
# 'www.hpi.com.cn',
# 'www.atmcn.com',
# 'www.triumphltd.cn',
# 'www.hnlcj.cn',
# 'www.phenixoptics.com.cn',
# 'www.ehualu.com',
# 'www.jihua-mall.com',
# 'www.cdt-gxi.com',
# 'www.gyfz000537.com',
# 'www.bygt.com.cn',
# 'www.baosight.com',
# 'www.qlssn.com',
# 'www.otic.com.cn',
# 'www.sinopec.com',
# 'www.huake.com',
# 'www.cecsec.cn',
# 'www.fiytagroup.com',
# 'ynxygf.com',
# 'www.fiberhome.com',
# 'www.lida-oe.com',
# 'www.chinanhl.com',
# 'www.chinasufa.com.cn',
# 'www.besttoneh.com',
# 'www.hdenergy.com',
# 'www.belling.com.cn',
# 'www.baosteelpackaging.com',
# 'www.konkamobile.com',
# 'www.nmhdwz.com',
# 'www.eastcom.com',
# 'www.hbny.com.cn',
# 'www.datang.com',
# 'www.sdic.com.cn',
# 'www.scfast.cn',
# 'www.changan.com.cn',
# 'www.zhglb.com',
# 'www.nxyxny.com.cn',
# 'www.poly.com.cn',
# 'www.chinajinmao.cn',
# 'www.panda.cn',
# 'www.bfdh.com.cn',
# 'www.faway.com',
# 'yjjt.norincogroup.com.cn',
# 'www.camce.com.cn',
# 'www.cnpccapital.cn',
# 'www.spichebei.com',
# 'www.minfinance.com.cn',
# 'www.gzghgf.com',
# 'www.nmgpzny.com',
# 'www.greatwall.cn',
# 'www.baoshengcable.com',
# 'www.csrgc.com.cn',
# 'www.ccccltd.cn',
# 'www.jmc.com.cn',
# 'www.ccyd.com.cn',
# 'www.sinoma-ec.cn',
# 'www.powerchina.cn',
# 'www.szcwh.com',
# 'www.twbb.com.cn',
# 'www.crec.cn',
# 'www.cosl.com.cn',
# 'www.ctgdutyfree.com.cn',
# 'www.gzhtdq.com.cn',
# 'www.sinomach-pi.cn',
# 'www.lingyun.com.cn',
# 'www.grandjoy.com',
# 'www.ycig.com',
# 'www.sychem.com',
# 'www.nfc.com.cn',
# 'www.scc.com.cn',
# 'www.cncec.com.cn',
# 'www.ty-magnet.com',
# 'www.taiji.com.cn',
# 'www.aircraft_co.avic.com',
# 'www.sinoma.com.cn',
# 'www.600316.com.cn',
# 'www.crhic.cn',
# 'www.ceovu.com',
# 'www.csair.com',
# 'www.cncm.com',
# 'www.xdect.com.cn',
# 'www.adama.com',
# 'ssc.sinopec.com',
# 'csscholdings.cssc.net.cn',
# 'www.csic446.com',
# 'www.spc.com.cn',
# 'www.cmschina.com',
# 'www.sinomatech.com',
# 'www.cypc.com.cn',
# 'www.sgss.com.cn',
# 'www.avichina.com',
# 'www.cnecc.com',
# 'www.css.com.cn',
# 'www.caeri.com.cn',
# 'www.tianma.cn',
# 'www.gohigh.com.cn',
# 'znfzy.cnadc.com.cn',
# 'www.bjruitai.com',
# 'www.sinoma-tianshan.cn',
# 'www.ceair.com',
# 'www.chinaunicom.com.hk',
# 'www.cdt-re.com',
# 'www.glaruntech.com',
# 'www.kaifa.cn',
# 'www.clypg.com.cn',
# 'www.baosteel.com',
# 'www.aerocom.cn',
# 'www.hdpi.com.cn',
# 'cwcg.cscec.com',
# 'www.minlist.com.cn',
# 'www.jnhi.com',
# 'www.cac-citc.com',
# 'www.dtpower.com',
# 'www.jwgf.com',
# 'www.chinahaihua.com',
# 'www.westone.com.cn',
# 'www.detc.com.cn',
# 'www.eastcompeace.com',
# 'www.sinochemintl.com',
# 'www.hypower.com.cn',
# 'www.crcce.com.cn',
# 'www.cahic.com',
# 'www.travelsky.net',
# 'www.cdc.com.cn',
# 'www.cnbmltd.com',
# 'www.mjsdgs.com',
# 'www.jonhon.cn',
# 'www.polyculture.com.cn',
# 'www.airchina.com',
# 'www.hfdl.aecc.com',
# 'www.cgnnt.com.cn',
# 'www.cmenergyshipping.com',
# 'www.cei1958.com',
# 'www.sac-china.com',
# 'www.northeo.com,www.sicong.com',
# 'www.pgvt.cn',
# 'www.shecc.com',
# 'www.tdtec.com',
# 'www.600795.com.cn',
# 'www.bnbm.com.cn',
# 'www.yangnongchem.com',
# 'www.tjfaw.com',
# 'www.aviconics.com.cn',
# 'www.sumec.com',
# 'www.lanpec.com',
# 'www.xjgc.sgcc.com.cn',
# 'www.spicjl.com',
# 'www.cnoocengineering.com',
# 'cpec.cnpc.com.cn',
# 'www.cgnp.com.cn',
# 'www.aritime.com',
# 'www.cmstd.com.cn',
# 'www.guanhao.com',
# 'www.tec.crrczic.cc',
# 'energy.coscoshipping.com',
# 'development.coscoshipping.com',
# 'www.cnnp.com.cn',
# 'www.crcc.cn',
# 'www.jushi.com',
# 'www.cfhi.com',
# 'www.ctsfreight.com',
# 'www.norinco-intl.com',
# 'www.cmpo1914.com',
# 'www.easpring.com',
# 'www.aviccapital.com',
# 'www.mxdl.com.cn',
# 'www.crrcgc.cc',
# 'www.szaccord.com.cn',
# 'www.naritech.cn',
# 'www.hhi.com.cn',
# 'www.daae.com.cn',
# 'www.jiuguijiu000799.com',
# 'www.4008874005.com',
# 'www.zssw.com',
# 'www.lantaicn.com',
# 'www.crhms.cn',
# 'www.china-csicpower.com.cn',
# 'www.avic-sac.com',
# 'www.sinopecgroup.com',
# 'www.ascf.com.cn',
# 'www.haisum.com',
# 'cenertech.cnooc.com.cn',
# 'www.aerosun.cn',
# 'www.xinxing-pipes.com',
# 'cn.zpmc.com',
# 'www.sdtny.copm',
# 'www.dfac.com',
# 'www.as-hitech.com',
# 'avicopter.avic.com',
# 'flepc.com.cn',
# 'www.kraussmaffei.ltd',
# 'www.hhkj.chemchina.com',
# 'www.bgrimmtec.com',
# 'www.yunnan-copper.com',
# 'www.chinacoalenergy.com',
# 'www.zemic.com.cn',
# 'www.octholding.com',
# 'www.sedind.com',
# 'www.mintungsten.com',
# 'www.hbjir.com',
# 'www.nmghlhltmy.com',
# 'www.sdiccapital.com',
# 'www.chnau99999.com',
# 'www.tontec.cn',
# 'www.chinasatcom.com',
# 'www.chinaecec.com',
# 'www.sdicpower.com',
# 'www.mccchina.com',
# 'www.sinotrans.com',
# 'www.ynlygf.com',
# 'www.complant-ltd.com',
# 'www.hdjse.com.cn',
# 'www.crsc.cn',
# 'www.csicl.com.cn',
# 'www.chgf.com.cn',
# 'www.first-tractor.com.cn',
# 'www.ctc.ac.cn',
# 'www.cmexpressway.com',
# 'www.dongeejiao.com',
# 'www.crhc-culture.com',
# 'www.tyen.com.cn',
# 'www.chinabluechem.com.cn',
# 'www.ceec.net.cn',
# 'www.ncschina.com',
# 'www.coscol.com.cn',
# 'www.sinomach-auto.com',
# 'www.cscec.com',
# 'www.icbc.com.cn,www.icbc-ltd.com',
# 'www.zjky.cn',
# 'www.wzgroup.cn',
# 'www.gree.com.cn',
# 'www.saicmotor.com',
# 'www.scg.com.cn',
# 'www.weichaipower.com',
# 'www.spdb.com.cn',
# 'www.sphchina.com',
# 'www.cmbchina.com',
# 'www.chinaunicom-a.com']
#
# all_data = []
# for url in urls:
# sql2=sql1.replace("[url]",url)
# cursor.execute(sql2)
# result_data = cursor.fetchall()
# for row in tqdm(result_data):
# try:
# rd = {'id': row[0],
# '编码': row[1],
# '网站名称': row[2],
# '栏目名称': row[3],
# '栏目地址': row[4]
# }
# all_data.append(rd)
# except:
# print("查询失败!!")
#
# df_out = pd.DataFrame(data=all_data)
# df_out.to_excel('导出数据.xlsx', engine='xlsxwriter', index=False)
# -*- coding: utf-8 -*-
import time
import urllib
import requests
from pyquery import PyQuery as pq
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import wget
from openpyxl import Workbook
import pandas as pd
def createDriver():
chrome_driver =r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
path = Service(chrome_driver)
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location =r'D:\crawler\baidu_crawler\tool\Google\Chrome\Application\chrome.exe'
# 设置代理
# proxy = "127.0.0.1:8080" # 代理地址和端口
# chrome_options.add_argument('--proxy-server=http://' + proxy)
driver = webdriver.Chrome(service=path,chrome_options=chrome_options)
return driver
def listPage():
driver=createDriver()
for i in range(0,6):
size=i*20
url = f'https://www.bis.doc.gov/index.php/smart-search?searchword=Russia&searchphrase=all&start={size}'
driver.get(url)
html=driver.page_source
soup=paserUrl(html,url)
text=str(soup.prettify())
doc=pq(text)
titles=doc('dl[class="search-results"]>dt')
dates=doc('dl[class="search-results"]>dd[class="result-created"]')
for i in range(0,len(titles)):
detailList=[]
tt=titles[i]
dd=dates[i]
dddoc=pq(dd)
ttdoc=pq(tt)
title=ttdoc('a').text()
date=dddoc('dd[class="result-created"]').text()
url=ttdoc('a').attr('href')
pdfurl,content=detail(driver,url)
if pdfurl:
pdfpath="D:/cis/"+title+".pdf"
download_file(pdfurl,pdfpath)
else:
pdfpath=''
detailmsg={
"title":title,
"date":date,
"url":url,
"content":content,
"pdfurl":pdfurl,
"pdfpath":pdfpath,
}
detailList.append(detailmsg)
writerToExcel(detailList)
# print(f'title:{title} date:{date} url:{url}')
def detail(driver,url):
k=0
html=''
while k<5:
k+=1
try:
# proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
# response = requests.get(url, proxies=proxy, verify=False,timeout=10)
# html=response.text
driver.get(url)
time.sleep(3)
html=driver.page_source
soup=paserUrl(html,url)
html=str(soup.prettify())
except Exception as e:
html=''
if html:
break
text=paserUrl(html,url)
docc=pq(text.encode('utf-8'))
try:
pdfurl= docc('div[class="docman_download"]>a').attr('href')
except Exception as e:
pdfurl=''
try:
content=docc('div[class="item-page"]').text()
except Exception as e:
content=''
# print(url)
return pdfurl,content
# 将数据追加到excel
def writerToExcel(detailList):
# filename='baidu搜索.xlsx'
# 读取已存在的xlsx文件
existing_data = pd.read_excel(filename)
# 创建新的数据
new_data = pd.DataFrame(data=detailList)
# 将新数据添加到现有数据的末尾
combined_data = existing_data.append(new_data, ignore_index=True)
# 将结果写入到xlsx文件
combined_data.to_excel(filename, index=False)
def download_file(url, save_path):
k=1
while True:
if k>5:
print(url)
break
k+=1
try:
header = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Cookie':'b099bcecf0be876536bb9d4826b25ba8=e2horegllcbddejsiveijp7of0; cookiesession1=678A3E12247313FBD6F74569925F4EFD; _ga=GA1.1.840765784.1693040959; __cf_bm=4DTZeDEU67Xjr5nt9OsbE1g1UTdVuOGdQlhj4KD5U2I-1693190695-0-AW81rfvAFUnclDkFVJYqD8+RWrC8FngMzW0dJ+bVHA+JwmPUVpc9/ogA0jhXrKLFYWun2BoK0R/hqWgGZAw/I1Y=; referrer_site=https%3A%2F%2Fwww.bis.doc.gov%2Findex.php%2Fsmart-search%3Fsearchword%3DRussia%26searchphrase%3Dall; csrf_token=a0d03e256a36d037708a809220564f407dee78bc; _ga_TPRT7QB30Y=GS1.1.1693190696.4.1.1693190720.0.0.0',
'Host':'www.bis.doc.gov',
'Pragma':'no-cache',
'Referer':'https://www.bis.doc.gov/index.php/documents/product-guidance/3300-russia-medical-related-license-application-guidance-fpd-final-incorp-occ-and-3f-cmts-clean-071323',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"'
}
proxy = {'https': 'http://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'}
response = requests.get(url, proxies=proxy, headers=header, verify=False,timeout=10)
# response = requests.get(url,verify=False)
with open(save_path, 'wb') as file:
file.write(response.content)
break
except Exception as e:
time.sleep(5)
print(e)
return save_path
def download_file3(url, save_path):
header = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Cookie':'b099bcecf0be876536bb9d4826b25ba8=e2horegllcbddejsiveijp7of0; cookiesession1=678A3E12247313FBD6F74569925F4EFD; _ga=GA1.1.840765784.1693040959; __cf_bm=4DTZeDEU67Xjr5nt9OsbE1g1UTdVuOGdQlhj4KD5U2I-1693190695-0-AW81rfvAFUnclDkFVJYqD8+RWrC8FngMzW0dJ+bVHA+JwmPUVpc9/ogA0jhXrKLFYWun2BoK0R/hqWgGZAw/I1Y=; referrer_site=https%3A%2F%2Fwww.bis.doc.gov%2Findex.php%2Fsmart-search%3Fsearchword%3DRussia%26searchphrase%3Dall; csrf_token=a0d03e256a36d037708a809220564f407dee78bc; _ga_TPRT7QB30Y=GS1.1.1693190696.4.1.1693190720.0.0.0',
'Host':'www.bis.doc.gov',
'Pragma':'no-cache',
'Referer':'https://www.bis.doc.gov/index.php/documents/product-guidance/3300-russia-medical-related-license-application-guidance-fpd-final-incorp-occ-and-3f-cmts-clean-071323',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'sec-ch-ua':'"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"'
}
req = urllib.request.Request(url, headers=header)
wget.download(url,save_path)
# 将html中的相对地址转换成绝对地址
def paserUrl(html,listurl):
soup = BeautifulSoup(html, 'html.parser')
# 获取所有的<a>标签和<img>标签
links = soup.find_all(['a', 'img'])
# 遍历标签,将相对地址转换为绝对地址
for link in links:
if 'href' in link.attrs:
link['href'] = urljoin(listurl, link['href'])
elif 'src' in link.attrs:
link['src'] = urljoin(listurl, link['src'])
return soup
if __name__ == '__main__':
# # 创建一个工作簿
filename='cis.xlsx'
workbook = Workbook()
workbook.save(filename)
listPage()
# driver=createDriver()
# url='https://www.bis.doc.gov/index.php/policy-guidance/deemed-exports/deemed-exports-faqs/faq/116-what-areas-are-considered-russia-for-purposes-of-these-sanctions'
# detail(driver,url)
...@@ -30,7 +30,7 @@ cursor = baseCore.cursor ...@@ -30,7 +30,7 @@ cursor = baseCore.cursor
def job_2(): def job_2():
log.info('----开始采集---俄罗斯国家杂志----') log.info('----开始采集---俄罗斯国家杂志----')
path = 'D:chrome/chromedriver.exe' path = r'C:\Users\WIN10\DataspellProjects\crawlerProjectDemo\tmpcrawler\cmd100\chromedriver.exe'
driverContent = baseCore.buildDriver(path, headless=False) driverContent = baseCore.buildDriver(path, headless=False)
url = 'http://publication.pravo.gov.ru/documents/block/president' url = 'http://publication.pravo.gov.ru/documents/block/president'
req = requests.get(url,headers) req = requests.get(url,headers)
...@@ -45,4 +45,3 @@ def job_2(): ...@@ -45,4 +45,3 @@ def job_2():
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论