python爬虫之抓取500彩票网大乐透走势图
import requests
from bs4 import BeautifulSoup
import json
results = []
def find_tops(url):
wb_data = requests.get(url)
wb_data.encoding = 'utf-8'
soup = BeautifulSoup(wb_data.text, "html.parser")
alls = soup.select("#chartsTable tr")
issues = alls[2:-15]
tongjis = alls[-6:-2]
for each in tongjis:
data = {'title':'','content':[]}
tds = each.select('td')
for td in tds:
if td.get('align') != None:
data['title'] = td.get_text().strip()
else:
td_number = td.get_text().strip()
data['content'].append({'color':'','number':td_number})
results.append(data)
for each in issues:
data = {'title':'','content':[]}
tds = each.select('td')
for td in tds:
if td.get('colspan') != None:
continue
if td.get('align') != None:
data['title'] = td.get_text().strip()
# print(data['title'])
else:
# print(td.get('colspan'))
# print(td.get('class'))
td_class = td.get('class')[0]
td_number = td.get_text().strip()
td_color = ''
if td_class == "yl01":
td_color = 'gray1'
elif td_class == 'yl02':
td_color = 'gray2'
elif td_class == 'chartBall01':
td_color = 'red'
elif td_class == 'chartBall02':
td_color = 'blue'
data['content'].append({'color':td_color,'number':td_number})
results.append(data)
url = 'http://datachart.500.com/dlt/zoushi/newinc/jbzs_foreback.php?expect=50'
find_tops(url)
print(json.dumps(results))
至此已经拿到大乐透50期内走势图,可以将数据入库等供自己系统使用