网上不少文章关于代理IP的验证方法
- import urllib.request
- proxy=urllib.request.ProxyHandler({"http": "http://120.77.249.46:8080"})
- opener=urllib.request.build_opener(proxy)
- urllib.request.install_opener(opener)
- data = urllib.request.urlopen('http://www.baidu.com',timeout = 2).read().decode('utf-8','ignore')
- try:
- if(len(data) > 5000):
- print(thisIP + ':可用')
- else:
- print(thisIP + ':无效')
- except :
- print(thisIP + ':无效!!!')
-
经测试,发现存在以下问题:
虽然代理无效,也会返回一个网页,但不是百度,其内容大于5000,因此存在bug。
- import telnetlib
- try:
- telnetlib.Telnet(ip, port, timeout=2)
- print("代理IP有效!")
- except:
- print("代理IP无效!")
-
经测试,发现存在以下问题:
虽然某些代理可以用telnet测试通过,但实际仍然上无效。有兴趣的可在windows 终端中测试。
说明:利用的http://icanhazip.com/返回的IP进行校验,如返回的是代理池的IP,说明代理有效,否则实际代理无效
- import random
- IPAgents = [
- "118.190.95.35:9001",
- ]
-
- try:
- requests.adapters.DEFAULT_RETRIES = 3
- IP = random.choice(IPAgents)
- thisProxy = "http://" + IP
- res = requests.get(url="http://icanhazip.com/",timeout=8,proxies={"http":thisProxy})
- proxyIP = res.text
- if(proxyIP == thisProxy):
- print("代理IP:'"+ proxyIP + "'有效!")
- else:
- print("代理IP无效!")
- except:
- print("代理IP无效!")
-
- # -*- coding:utf-8 -*-
- '''
- Create time:
- author:
- Function:
- Check http proxy
- '''
- import time,urllib, urllib3,urllib.request,random, requests
-
-
- class CheckProxy():
-
- def __init__(self):
- self.headers = {
- 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
- 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
- 'accept-language':'zh-CN,zh;q=0.9',
- 'accept-encoding':'gzip, deflate',
- 'Connection':'keep-alive',
- # 'Host:':'icanhazip.com',
- }
- self.urls_inland = ['https://www.baidu.com']
- self.urls_foreign = ["http://www.google.com"]
- self.proxy_info = {"user": "dm", "pass": "innodealing"}
- self.timeout = 5
-
- def get_data(self,url, headers={}):
- try:
- req = urllib.request.Request(url, headers=headers)
- ret = urllib.request.urlopen(req, timeout=self.timeout)
- return url, ret, 1
- # except urllib2.URLError, e:
- except Exception as e:
- # raise Exception(e)
- # print("Check Failed: %s" %e)
- return url, e, 0
-
- def check_proxy_main1(self,proxy, foreign=0):
- if foreign:
- urls = self.urls_foreign
- else:
- urls = self.urls_inland
- print("Starting check proxy = %s..." % proxy)
- for url in urls:
- #设置代理
- proxy_support = urllib.request.ProxyHandler({"http" : "http://%(user)s:%(pass)s@" % self.proxy_info + proxy })
- opener = urllib.request.build_opener(proxy_support)
- urllib.request.install_opener(opener)
- url, ret, flag = self.get_data(url,headers=self.headers)
- if flag:
- if ret.code:
- print(" Check %s is OK." %url )
- else:
- print(" Check %s is ERROR: http_code error." %url )
- else:
- print(" Check %s is ERROR: GET error [ detail: %s ]." % (url, ret) )
-
- def check_proxy_main2(self,ip):
- requests.adapters.DEFAULT_RETRIES = 3
- try:
- proxies={
- "http":"http://%s"%ip,
- "https": "https://%s" % ip,
- }
- url="http://icanhazip.com/"
- # url='http://myip.ipip.net/'
- response = requests.get(url,timeout=self.timeout,proxies=proxies)
- proxy_ip = response.text.strip()#.encode(response.encoding).decode(response.apparent_encoding,errors = 'ignore')
- print(proxy_ip)
- if proxy_ip == ip.split(":")[0]:
- print("代理IP:%s 有效!"%ip)
- return 1
- else:
- print("%s 代理IP无效!"%ip)
- return 0
- except:
- print("error %s 代理IP无效!"%ip)
- return 0
-
-
- if __name__ == '__main__':
- ips=[
- # '47.110.65.99:3100', # 阿里ip(2019-03-22)
- # '47.110.225.239:3100', # 阿里ip(2019-03-22)
- # '47.110.75.231:3100', # 阿里ip(2019-03-22)
- '106.57.23.132:5412'
- ]
- try:
- CP=CheckProxy()
- for ip in ips:
- # CP.check_proxy_main1(proxy, foreign=0)
- CP.check_proxy_main2(ip)
- except Exception as e:
- import traceback
- ex_msg = '{exception}'.format(exception=traceback.format_exc())
- print(ex_msg)
-