网上不少文章关于代理IP的验证方法
import urllib.request
proxy=urllib.request.ProxyHandler({"http": "http://120.77.249.46:8080"})
opener=urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
data = urllib.request.urlopen('http://www.baidu.com',timeout = 2).read().decode('utf-8','ignore')
try:
if(len(data) > 5000):
print(thisIP + ':可用')
else:
print(thisIP + ':无效')
except :
print(thisIP + ':无效!!!')
经测试,发现存在以下问题:
虽然代理无效,也会返回一个网页,但不是百度,其内容大于5000,因此存在bug。
import telnetlib
try:
telnetlib.Telnet(ip, port, timeout=2)
print("代理IP有效!")
except:
print("代理IP无效!")
经测试,发现存在以下问题:
虽然某些代理可以用telnet测试通过,但实际仍然上无效。有兴趣的可在windows 终端中测试。
说明:利用的http://icanhazip.com/返回的IP进行校验,如返回的是代理池的IP,说明代理有效,否则实际代理无效
import random
IPAgents = [
"118.190.95.35:9001",
]
try:
requests.adapters.DEFAULT_RETRIES = 3
IP = random.choice(IPAgents)
thisProxy = "http://" + IP
res = requests.get(url="http://icanhazip.com/",timeout=8,proxies={"http":thisProxy})
proxyIP = res.text
if(proxyIP == thisProxy):
print("代理IP:'"+ proxyIP + "'有效!")
else:
print("代理IP无效!")
except:
print("代理IP无效!")
# -*- coding:utf-8 -*-
'''
Create time:
author:
Function:
Check http proxy
'''
import time,urllib, urllib3,urllib.request,random, requests
class CheckProxy():
def __init__(self):
self.headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'accept-language':'zh-CN,zh;q=0.9',
'accept-encoding':'gzip, deflate',
'Connection':'keep-alive',
# 'Host:':'icanhazip.com',
}
self.urls_inland = ['https://www.baidu.com']
self.urls_foreign = ["http://www.google.com"]
self.proxy_info = {"user": "dm", "pass": "innodealing"}
self.timeout = 5
def get_data(self,url, headers={}):
try:
req = urllib.request.Request(url, headers=headers)
ret = urllib.request.urlopen(req, timeout=self.timeout)
return url, ret, 1
# except urllib2.URLError, e:
except Exception as e:
# raise Exception(e)
# print("Check Failed: %s" %e)
return url, e, 0
def check_proxy_main1(self,proxy, foreign=0):
if foreign:
urls = self.urls_foreign
else:
urls = self.urls_inland
print("Starting check proxy = %s..." % proxy)
for url in urls:
#设置代理
proxy_support = urllib.request.ProxyHandler({"http" : "http://%(user)s:%(pass)s@" % self.proxy_info + proxy })
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
url, ret, flag = self.get_data(url,headers=self.headers)
if flag:
if ret.code:
print(" Check %s is OK." %url )
else:
print(" Check %s is ERROR: http_code error." %url )
else:
print(" Check %s is ERROR: GET error [ detail: %s ]." % (url, ret) )
def check_proxy_main2(self,ip):
requests.adapters.DEFAULT_RETRIES = 3
try:
proxies={
"http":"http://%s"%ip,
"https": "https://%s" % ip,
}
url="http://icanhazip.com/"
# url='http://myip.ipip.net/'
response = requests.get(url,timeout=self.timeout,proxies=proxies)
proxy_ip = response.text.strip()#.encode(response.encoding).decode(response.apparent_encoding,errors = 'ignore')
print(proxy_ip)
if proxy_ip == ip.split(":")[0]:
print("代理IP:%s 有效!"%ip)
return 1
else:
print("%s 代理IP无效!"%ip)
return 0
except:
print("error %s 代理IP无效!"%ip)
return 0
if __name__ == '__main__':
ips=[
# '47.110.65.99:3100', # 阿里ip(2019-03-22)
# '47.110.225.239:3100', # 阿里ip(2019-03-22)
# '47.110.75.231:3100', # 阿里ip(2019-03-22)
'106.57.23.132:5412'
]
try:
CP=CheckProxy()
for ip in ips:
# CP.check_proxy_main1(proxy, foreign=0)
CP.check_proxy_main2(ip)
except Exception as e:
import traceback
ex_msg = '{exception}'.format(exception=traceback.format_exc())
print(ex_msg)