对全球接种疫苗的数据进行抓取可视化,借助腾讯的api接口,https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData
实际上就是json数据,这个就很简单了,我们就借助json模块的loads方法,将json字符串转为字典方便我们取值,代码如下:
# !/usr/bin python3
# encoding : utf-8 -*-
# @software : PyCharm
# @file : 全球疫苗接种数据.py
# @Time : 2021/6/7 8:19
import requests
import json
import csv
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
import numpy
import PIL.Image as Image
class Yimiao(object):
def __init__(self):
self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
}
def get_data(self):
response = requests.post(url=self.url, headers=self.headers, timeout=3)
return response
def parse_data(self, response):
data = response.text
data_dict = json.loads(data)
all_data = data_dict['data']['VaccineSituationData']
temp = []
for al in all_data:
country = al['country']
date = al['date']
type_c = al['vaccinations']
all = str(al['total_vaccinations'])
every = str(al['total_vaccinations_per_hundred'])
at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
temp.append(at)
csv_writer.writerow([country,date,type_c,all,every])
print('国家:'+country+' '+'日期:'+date+' '+'接种类型:'+type_c+' '+'累计接种/亿剂:'+all+' '+'每百人/亿剂:'+every)
def run(self):
response = self.get_data()
temp = self.parse_data(response)
if __name__ == '__main__':
f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
csv_writer = csv.writer(f)
csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
yimiao = Yimiao()
yimiao.run()
这样的话数据我们就提取出来了,控制台打印
写入excel效果图:
我们只拿数据就没有意思,我们要对数据可视化分析,生成词云展示,三个展示方式,一个是折线统计图,还有柱状图、饼图,代码展示如下:
# 折线图
def matplot_plot(self, temp):
country_list = []
y = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
country_list.append(country)
y.append(all)
x = list(range(len(country_list)))
plt.figure(figsize=(20, 8), dpi=100)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
plt.plot(x, y)
y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
plt.xticks(x, country_list, fontsize=12)
plt.yticks(y_tick[::3], fontsize=12)
plt.xlabel("国家", fontsize=24)
plt.ylabel("接种/亿剂", fontsize=24)
plt.title("全球疫苗接种可视图", fontsize=32)
plt.xlim(-0.5, 20)
plt.grid(True, linestyle='--', alpha=0.5)
plt.savefig("./1.png")
plt.show()
# 柱状图
def matplot_bar(self, temp):
country_list = []
y = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
country_list.append(country)
y.append(all)
x = range(len(country_list))
plt.figure(figsize=(20, 8), dpi=100)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
plt.xticks(x,country_list[:20], fontsize=12)
plt.yticks(y_tick[::2])
plt.grid(linestyle="--", alpha=0.5)
plt.title("全球疫苗接种可视图", fontsize=25)
# plt.xlim(-0.5,20)
plt.xlim(-0.5, 19.5)
plt.xlabel("国家", fontsize=28)
plt.ylabel("接种/亿剂", fontsize=25)
plt.savefig('./2.png')
plt.show()
# 饼图
def matplot_pie(self, temp):
countries_list = []
size = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
countries_list.append(country)
size.append(all)
color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
'#666699', '#CCFF00']
plt.figure(figsize=(20, 8), dpi=100)
labels = countries_list[:20]
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
colors=color,explode=explode)
for l in l_text:
l.set_size(15)
for p in p_text:
p.set_size(10)
plt.grid()
plt.legend(loc=1, ncol=2)
plt.title("全球疫苗接种可视图", fontsize=28)
plt.savefig("./3.png")
plt.show()
整体代码:
# !/usr/bin python3
# encoding : utf-8 -*-
import requests
import json
import csv
import matplotlib.pyplot as plt
import numpy as np
import numpy
import PIL.Image as Image
class Yimiao(object):
def __init__(self):
self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
}
self.count = 0
def get_data(self):
response = requests.post(url=self.url, headers=self.headers, timeout=3)
return response
def parse_data(self, response):
data = response.text
data_dict = json.loads(data)
all_data = data_dict['data']['VaccineSituationData']
temp = []
for al in all_data:
self.count += 1
country = al['country']
date = al['date']
type_c = al['vaccinations']
all = str(al['total_vaccinations'])
every = str(al['total_vaccinations_per_hundred'])
at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
temp.append(at)
csv_writer.writerow([country,date,type_c,all,every])
print('国家:'+country+' '+'日期:'+date+' '+'接种类型:'+type_c+' '+'累计接种/亿剂:'+all+' '+'每百人/亿剂:'+every)
return temp
# 折线图
def matplot_plot(self, temp):
country_list = []
y = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
country_list.append(country)
y.append(all)
x = list(range(len(country_list)))
plt.figure(figsize=(20, 8), dpi=100)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
plt.plot(x, y)
y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
plt.xticks(x, country_list, fontsize=12)
plt.yticks(y_tick[::3], fontsize=12)
plt.xlabel("国家", fontsize=24)
plt.ylabel("接种/亿剂", fontsize=24)
plt.title("全球疫苗接种可视图", fontsize=32)
plt.xlim(-0.5, 20)
plt.grid(True, linestyle='--', alpha=0.5)
plt.savefig("./1.png")
plt.show()
# 柱状图
def matplot_bar(self, temp):
country_list = []
y = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
country_list.append(country)
y.append(all)
x = range(len(country_list))
plt.figure(figsize=(20, 8), dpi=100)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
plt.xticks(x,country_list[:20], fontsize=12)
plt.yticks(y_tick[::2])
plt.grid(linestyle="--", alpha=0.5)
plt.title("全球疫苗接种可视图", fontsize=25)
# plt.xlim(-0.5,20)
plt.xlim(-0.5, 19.5)
plt.xlabel("国家", fontsize=28)
plt.ylabel("接种/亿剂", fontsize=25)
plt.savefig('./2.png')
plt.show()
# 饼图
def matplot_pie(self, temp):
countries_list = []
size = []
for te in temp:
country = te['country']
all = int(te['all']) / 100000000
countries_list.append(country)
size.append(all)
color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
'#666699', '#CCFF00']
plt.figure(figsize=(20, 8), dpi=100)
labels = countries_list[:20]
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
colors=color,explode=explode)
for l in l_text:
l.set_size(15)
for p in p_text:
p.set_size(10)
plt.grid()
plt.legend(loc=1, ncol=2)
plt.title("全球疫苗接种可视图", fontsize=28)
plt.savefig("./3.png")
plt.show()
def run(self):
response = self.get_data()
temp = self.parse_data(response)
# self.Chinese_jieba(temp)
self.matplot_plot(temp)
self.matplot_bar(temp)
self.matplot_pie(temp)
if __name__ == '__main__':
f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
csv_writer = csv.writer(f)
csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
yimiao = Yimiao()
yimiao.run()
如果报错:ValueError: The number of FixedLocator locations (13), usually from a call to set_ticks, does not ma…,
解决方案:pip install -i https://pypi.douban.com/simple matplotlib==3.0.3