实例:(使用IP代理池,去分页下载爱思官网的铃声)
#ulits.py工具模块中
import re
import random
RE_MP3_STEMPLE = re.compile(r'height="28" alt="(.*?)" title')
RE_MP3_URL = re.compile(r'data-mp3="(.*?).mp3"')
HEADER = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"
}
Request_is_ok = ' 请求成功...'
Response_is_ok = '响应成功...'
WORK = ' 解析完毕,现在开始下载歌曲...'
NUMBERS = 1
def IP_list():
IP = [
{'https://':'112.14.47.6:52024'},
{'https://''171.35.175.112:9999'},
{'https://':'113.195.157.253'},
{'https://':'123.55.98.129:9999'},
{'https://':'123.55.98.129:8080'}
]
IP_index = random.randint(0,4)
return IP[IP_index]
#urlib3.py主项目中
import re
from random import randint
from time import sleep
import requests
from pachong.utils import RE_MP3_STEMPLE,HEADER,Request_is_ok,RE_MP3_URL,WORK
IP = [
{'https://': '112.14.47.6:52024'},
{'https://':'171.35.175.112:9999'},
{'https://': '113.195.157.253'},
{'https://': '123.55.98.129:9999'},
{'https://': '123.55.98.129:8080'}
]
def MP3_NAME_URL() ->list:
NUMBERS = 1
for v in range(1,501):
PROJECT_URL = f'https://www.i4.cn/ring_4_5_{v}.html'
response = requests.get(url=PROJECT_URL,headers=HEADER)
if response.status_code == 200:
response = response.text
print(f' 第{v}页{Request_is_ok}请稍后...')
sleep(3)
MP3_name = re.findall(RE_MP3_STEMPLE,response)
MP3_URL = re.findall(RE_MP3_URL,response)
print(f' 第{v}页{WORK}请稍后...')
sleep(3)
for i in range(len(MP3_URL)):
IPNOW = IP[randint(0,4)]
print(f" 使用{IPNOW.get('https://')}来下载!")
url = MP3_URL[i]
print(f" URL:{url}",'\n' + f' NAME:{MP3_name[i]}' + '\n' + '-'* 130)
resalt = requests.get(url=url,headers=HEADER,proxies=IPNOW).content
try:
with open(f'./ll/{MP3_name[i]}.mp3','wb') as f:
f.write(resalt)
sleep(0.2)
NUMBERS += 1
print(f' 开始下载第{NUMBERS}首...')
except FileNotFoundError:
File_error = True
print( '找不到根路径ll文件夹,即将终止下载...')
break
else:
print('请求失败,请尝试稍后在操作...')
if __name__ == '__main__':
MP3_NAME_URL()