- #网址:https://pic.netbian.com/ 我们爬取:https://pic.netbian.com/4kmeinv/
- from lxml import etree
- import requests
- import os
-
- if __name__=="__main__":
- if not os.path.exists('zhaopian'):
- os.mkdir('zhaopian')
-
- headers={
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
- }
- url='https://pic.netbian.com/4kmeinv/'
-
- response=requests.get(url=url,headers=headers)
- #response.encoding= 'utf-8'
- resp=response.text
- #数据解析
- tree=etree.HTML(resp)
-
-
- li_list=tree.xpath('//div[@class="slist"]/ul/li')
-
- for li in li_list:
- tupian_src='https://pic.netbian.com'+li.xpath('./a/img/@src')[0]
- #print(tupian_src)
- name=li.xpath('./a/b/text()')[0]+'.jpg'
- name = name.encode('iso-8859-1').decode('gbk')
- #print(name)
-
- data=requests.get(url=tupian_src,headers=headers).content
- #存储地址
- path='zhaopian/'+name
-
- with open(path,'wb') as fp:
- fp.write(data)
- print("下载成功!!!")
-
- import os
- import requests
- from lxml import etree
- header = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.5211 SLBChan/25"
- }
-
- url = "https://pic.netbian.com/4kmeinv/"
-
- girl_data=requests.get(url=url,headers=header).text
- # 处理乱码 .decode('gbk') 可去掉
- girl_data=girl_data.encode("iso-8859-1").decode('gbk')
- #实例化模型
- girl_etree=etree.HTML(girl_data)
-
- #xpath表达式代码后说明
- picture_loc=girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@src")
- picture_name_list=girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@alt")
-
- #新增文件夹
- if not os.path.exists("you_knew_about_picture"):
- os.mkdir("./you_knew_about_picture")
-
- #增加索引定 picture_name_list
- for i,each_loc in enumerate(picture_loc):
- #完整网址
- new_loc="https://pic.netbian.com/"+each_loc
- #爬取图片
- each_picture_data=requests.get(new_loc,headers=header).content
- #each_picture_name由文件路径和名组成
- each_picture_name="you_knew_about_picture/"+picture_name_list[i]+".jpg"
- #打开文件
- fp=open(each_picture_name,mode="wb")
- #写入
- fp.write(each_picture_data)
- fp.close()
- #提示完成
- print(each_picture_name.split("/")[-1]+" have been over")
-
- #爬取多页:第一页;https://pic.netbian.com/4kmeinv/
- #第二页:https://pic.netbian.com/4kmeinv/index_2.html
- #第三页:https://pic.netbian.com/4kmeinv/index_3.html
- import requests
- from lxml import etree
- import os
-
- #爬取2-4页
- if __name__=="__main__":
- if not os.path.exists('zhao'):
- os.mkdir('zhao')
- headers = {
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
- }
-
- for i in range(2,5):
- url=f'https://pic.netbian.com/4kmeinv/index_{i}.html'
- #print(url)
- resp=requests.get(url=url,headers=headers).text
-
- tree=etree.HTML(resp)
- li_list=tree.xpath('//*[@id="main"]/div[3]/ul/li')
- for li in li_list:
- src_url='https://pic.netbian.com'+li.xpath('./a/img/@src')[0]
- #print(src_url)
- src_name=li.xpath('./a/b/text()')[0]+'.jpg'
- name=src_name.encode('iso-8859-1').decode('gbk')
- #print(name)
- data=requests.get(url=src_url,headers=headers).content
- path='zhao/'+name
-
- with open(path,'wb') as fp:
- fp.write(data)
- print('下载成功!!!')
-
-
-
别人代码
- import os
- import requests
- from lxml import etree
-
- header = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.5211 SLBChan/25"
- }
-
- picture_loc = [] # 图片存地址
- picture_name_list = [] # 存图片名
- # 第2,3页图片,可自行调节
- # 不能包括1,因为1页面网址和后面网址不一样,如想包括,可添加if条件判断
- for i in range(2, 4):
- # 一定要在循环内,否则一直为"https://pic.netbian.com/4kmeinv/index_2.html"
- # 关于为什么后面是/4kmeinv/index_{0}.html 代码后讲解
- url = "https://pic.netbian.com/4kmeinv/index_{0}.html"
- url = url.format(i)
- girl_data = requests.get(url=url, headers=header).text
- girl_data = girl_data.encode("iso-8859-1").decode('gbk')
- girl_etree = etree.HTML(girl_data, )
- # 地址压入
- picture_loc.extend(girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@src"))
- # 图片名压入
- picture_name_list.extend(girl_etree.xpath("//ul[@class='clearfix']/li/a/b/text()"))
-
- if not os.path.exists("you_knew_about_picture"):
- os.mkdir("./you_knew_about_picture")
-
- a = 0 # 记录图片个数
- for i, each_loc in enumerate(picture_loc):
- new_loc = "https://pic.netbian.com/" + each_loc
-
- each_picture_data = requests.get(new_loc, headers=header).content
-
- each_picture_name = "you_knew_about_picture/" + str(a) + " . " + picture_name_list[i] + ".jpg"
-
- fp = open(each_picture_name, mode="wb")
- fp.write(each_picture_data)
- fp.close()
-
- print(each_picture_name.split("/")[-1] + " have been over")
- a = a + 1
- print(a)
-