python爬虫之抓取500彩票网大乐透走势图
- import requests
- from bs4 import BeautifulSoup
- import json
- results = []
-
- def find_tops(url):
-
-
- wb_data = requests.get(url)
- wb_data.encoding = 'utf-8'
-
- soup = BeautifulSoup(wb_data.text, "html.parser")
-
- alls = soup.select("#chartsTable tr")
-
- issues = alls[2:-15]
- tongjis = alls[-6:-2]
-
- for each in tongjis:
- data = {'title':'','content':[]}
- tds = each.select('td')
- for td in tds:
- if td.get('align') != None:
- data['title'] = td.get_text().strip()
- else:
- td_number = td.get_text().strip()
- data['content'].append({'color':'','number':td_number})
- results.append(data)
-
-
- for each in issues:
- data = {'title':'','content':[]}
- tds = each.select('td')
- for td in tds:
- if td.get('colspan') != None:
- continue
- if td.get('align') != None:
- data['title'] = td.get_text().strip()
- # print(data['title'])
- else:
- # print(td.get('colspan'))
- # print(td.get('class'))
- td_class = td.get('class')[0]
- td_number = td.get_text().strip()
- td_color = ''
- if td_class == "yl01":
- td_color = 'gray1'
- elif td_class == 'yl02':
- td_color = 'gray2'
- elif td_class == 'chartBall01':
- td_color = 'red'
- elif td_class == 'chartBall02':
- td_color = 'blue'
-
- data['content'].append({'color':td_color,'number':td_number})
- results.append(data)
-
-
-
-
- url = 'http://datachart.500.com/dlt/zoushi/newinc/jbzs_foreback.php?expect=50'
-
- find_tops(url)
- print(json.dumps(results))
-
至此已经拿到大乐透50期内走势图,可以将数据入库等供自己系统使用