#!/usr/bin/python# -*- coding: UTF-8 -*-# @date: 2020/2/27 16:31# @name: Fofa_Spider# @author:Mke2fsfrom tld import get_tldimport requests,re,time,base64,urllibimport requestsimport randomimport reimport timefrom threading import Threadfrom selenium import webdriverfrom selenium.webdriver.chrome.options import OptionsHost = 'https://fofa.so/'#cookies每次都要新加进去,修改cookie就能跑data= {\'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2','Accept-Encoding': 'gzip, deflate','_fofapro_ars_session': '3a96d0e7e6caf3b25d06723f682ef807','Connection': 'close','Upgrade-Insecure-Requests': '1','If-None-Match':'W/"fc89a1c5bc61e3b8e515db61cef74ac0','Cache-Control': 'max-age=0' }zhanzhang_headers = { 'Host': 'rank.chinaz.com', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': 'BDTUJIAID=febc82b216a29e116730505bc1e471a9; inputbox_urls=%5b%22passivcashincome.com%22%2c%22feifeizuida.com%22%5d; UM_distinctid=16e63892b4e3b1-031b6053dcfc9f-7711b3e-100200-16e63892b4fa8a; Hm_lvt_aecc9715b0f5d5f7f34fba48a3c511d6=1579746706; CNZZDATA433095=cnzz_eid%3D297046501-1578041490-null%26ntime%3D1583974744; CNZZDATA5082706=cnzz_eid%3D902178444-1578044637-null%26ntime%3D1583975389; qHistory=aHR0cDovL3Rvb2wuY2hpbmF6LmNvbV/nq5nplb/lt6Xlhbd8aHR0cDovL3JhbmsuY2hpbmF6LmNvbV/nmb7luqbmnYPph43mn6Xor6J8aHR0cDovL3Rvb2wuY2hpbmF6LmNvbS90b29scy9lc2NhcGUuYXNweF9Fc2NhcGXliqDlr4Yv6Kej5a+GfGh0dHA6Ly93aG9pcy5jaGluYXouY29tL3JldmVyc2UrV2hvaXPlj43mn6V8aHR0cDovL3dob2lzLmNoaW5hei5jb20vK1dob2lz5p+l6K+i', 'Connection': 'close'}cookies = {'_fofapro_ars_session': '3a96d0e7e6caf3b25d06723f682ef807'}def getdata(Host): html = requests.get(Host, headers=data, cookies=cookies).content # print(requests.get(Host,data=data).cookies) #print(html.decode("utf-8")) IP=re.findall \ ('<a target="_blank" href="(.*)">', html.decode('utf-8')) #print(IP) aa=[] for ii in IP: pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式只匹配web服务 #pattern = r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b" # 匹配IP if re.findall(pattern, ii) !=[]: reallist=re.findall(pattern, ii) reallist=sorted(set(reallist), key=reallist.index) #去除列表重复项 reallist="".join(reallist) #列表转字符,二层嵌套转换为单层列表 aa.append(reallist) return aadef init(search_content,pages): Host = 'https://fofa.so/' for page in range(3,pages+1): #页数控制 quary = 'result?qbase64=' + str(base64.b64encode(search_content.encode("utf-8")), "utf-8") + '&page=' + str(page) Hosts = Host + quary print(Hosts,'剩余查询次数'+str(500-page)) getlist=getdata(str(Hosts)) time.sleep(random.randint(1,3)) getlist=" ".join(getlist) getlist=getlist.replace('http://beian.miit.gov.cn','') #print(getlist) print(list(getlist.split(' '))) ###暂时关闭权重查询模块 #IRank(list(getlist.split(' '))) write2file(list(getlist.split(' ')))def write2file(sites): for ii in sites: with open('Coremail-2020.txt', 'a', encoding='utf-8') as l: l.write(ii + '\n')def IRank(sub): print('[+] 正在后台打开谷歌浏览器...') chrome_option = Options() chrome_option.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度 chrome_option.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败 chrome_option.add_experimental_option('excludeSwitches', ['enable-logging'])#关闭控制台日志,看着太乱 driver=webdriver.Chrome(options=chrome_option) driver.set_page_load_timeout(5000) print('[+] 正在查询中,请稍等 ~') num=0 for line in sub: try: quanzhong=line.strip('\n') site=quanzhong.strip('https://') driver.get('https://www.aizhan.com/seo/{domain}'.format(domain=site)) baidurank_pattern = re.compile(r'<img src="//statics.aizhan.com/images/br/(.*?).png') try: html_text = driver.page_source.encode('utf-8') baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0] except: time.sleep(random.randint(1,3)) html_text = driver.page_source.encode('utf-8') baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0] num=num+1 print("[+] 正在查询第"+str(num)+"条"+" 百度权重:"+str(baidurank)+" url: "+site) if int(baidurank) > 0: with open('iRank_Thinkcmf.txt','a',encoding='utf-8') as l: l.write(site+'\n') except Exception as e: pass driver.close()if __name__ == "__main__": rule='app="Coremail-邮件系统" && host=".com"' #输入查询参数跑到第700页 p2=999 init(rule, p2)"""规则库:Struts2:app="struts2" && country="CN" && host=".com"app="ThinkPHP" && region="Shanghai" && host=".com"app="thinkcmf" && region="Zhejiang" && host=".com"app="Coremail-邮件系统" && country="CN" && host=".com"app="Coremail-邮件系统" && host=".com""""