对全球接种疫苗的数据进行抓取可视化,借助腾讯的api接口,https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData
实际上就是json数据,这个就很简单了,我们就借助json模块的loads方法,将json字符串转为字典方便我们取值,代码如下:
- # !/usr/bin python3
- # encoding : utf-8 -*-
- # @software : PyCharm
- # @file : 全球疫苗接种数据.py
- # @Time : 2021/6/7 8:19
-
- import requests
- import json
- import csv
- import matplotlib.pyplot as plt
- import numpy as np
- from wordcloud import WordCloud
- import jieba
- import numpy
- import PIL.Image as Image
-
-
- class Yimiao(object):
-
- def __init__(self):
- self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
- self.headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
- }
-
- def get_data(self):
- response = requests.post(url=self.url, headers=self.headers, timeout=3)
- return response
-
- def parse_data(self, response):
- data = response.text
- data_dict = json.loads(data)
- all_data = data_dict['data']['VaccineSituationData']
- temp = []
- for al in all_data:
- country = al['country']
- date = al['date']
- type_c = al['vaccinations']
- all = str(al['total_vaccinations'])
- every = str(al['total_vaccinations_per_hundred'])
- at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
- temp.append(at)
- csv_writer.writerow([country,date,type_c,all,every])
- print('国家:'+country+' '+'日期:'+date+' '+'接种类型:'+type_c+' '+'累计接种/亿剂:'+all+' '+'每百人/亿剂:'+every)
-
-
- def run(self):
- response = self.get_data()
- temp = self.parse_data(response)
-
-
- if __name__ == '__main__':
- f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
- csv_writer = csv.writer(f)
- csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
- yimiao = Yimiao()
- yimiao.run()
-
-
这样的话数据我们就提取出来了,控制台打印
写入excel效果图:
我们只拿数据就没有意思,我们要对数据可视化分析,生成词云展示,三个展示方式,一个是折线统计图,还有柱状图、饼图,代码展示如下:
- # 折线图
- def matplot_plot(self, temp):
- country_list = []
- y = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- country_list.append(country)
- y.append(all)
- x = list(range(len(country_list)))
- plt.figure(figsize=(20, 8), dpi=100)
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
-
- plt.plot(x, y)
- y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
-
- plt.xticks(x, country_list, fontsize=12)
- plt.yticks(y_tick[::3], fontsize=12)
-
- plt.xlabel("国家", fontsize=24)
- plt.ylabel("接种/亿剂", fontsize=24)
- plt.title("全球疫苗接种可视图", fontsize=32)
- plt.xlim(-0.5, 20)
-
- plt.grid(True, linestyle='--', alpha=0.5)
- plt.savefig("./1.png")
- plt.show()
-
- # 柱状图
- def matplot_bar(self, temp):
- country_list = []
- y = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- country_list.append(country)
- y.append(all)
- x = range(len(country_list))
- plt.figure(figsize=(20, 8), dpi=100)
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
- plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
- y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
- plt.xticks(x,country_list[:20], fontsize=12)
- plt.yticks(y_tick[::2])
- plt.grid(linestyle="--", alpha=0.5)
- plt.title("全球疫苗接种可视图", fontsize=25)
- # plt.xlim(-0.5,20)
- plt.xlim(-0.5, 19.5)
- plt.xlabel("国家", fontsize=28)
- plt.ylabel("接种/亿剂", fontsize=25)
- plt.savefig('./2.png')
- plt.show()
-
- # 饼图
- def matplot_pie(self, temp):
- countries_list = []
- size = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- countries_list.append(country)
- size.append(all)
-
- color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
- '#666699', '#CCFF00']
- plt.figure(figsize=(20, 8), dpi=100)
- labels = countries_list[:20]
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
- explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
- patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
- colors=color,explode=explode)
- for l in l_text:
- l.set_size(15)
- for p in p_text:
- p.set_size(10)
- plt.grid()
- plt.legend(loc=1, ncol=2)
- plt.title("全球疫苗接种可视图", fontsize=28)
- plt.savefig("./3.png")
- plt.show()
-
-
整体代码:
- # !/usr/bin python3
- # encoding : utf-8 -*-
- import requests
- import json
- import csv
- import matplotlib.pyplot as plt
- import numpy as np
- import numpy
- import PIL.Image as Image
-
-
- class Yimiao(object):
- def __init__(self):
- self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
- self.headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
- }
- self.count = 0
-
- def get_data(self):
- response = requests.post(url=self.url, headers=self.headers, timeout=3)
- return response
-
- def parse_data(self, response):
- data = response.text
- data_dict = json.loads(data)
- all_data = data_dict['data']['VaccineSituationData']
- temp = []
- for al in all_data:
- self.count += 1
- country = al['country']
- date = al['date']
- type_c = al['vaccinations']
- all = str(al['total_vaccinations'])
- every = str(al['total_vaccinations_per_hundred'])
- at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
- temp.append(at)
- csv_writer.writerow([country,date,type_c,all,every])
- print('国家:'+country+' '+'日期:'+date+' '+'接种类型:'+type_c+' '+'累计接种/亿剂:'+all+' '+'每百人/亿剂:'+every)
- return temp
-
-
-
- # 折线图
- def matplot_plot(self, temp):
- country_list = []
- y = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- country_list.append(country)
- y.append(all)
- x = list(range(len(country_list)))
- plt.figure(figsize=(20, 8), dpi=100)
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
-
- plt.plot(x, y)
- y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
-
- plt.xticks(x, country_list, fontsize=12)
- plt.yticks(y_tick[::3], fontsize=12)
-
- plt.xlabel("国家", fontsize=24)
- plt.ylabel("接种/亿剂", fontsize=24)
- plt.title("全球疫苗接种可视图", fontsize=32)
- plt.xlim(-0.5, 20)
-
- plt.grid(True, linestyle='--', alpha=0.5)
- plt.savefig("./1.png")
- plt.show()
-
- # 柱状图
- def matplot_bar(self, temp):
- country_list = []
- y = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- country_list.append(country)
- y.append(all)
- x = range(len(country_list))
- plt.figure(figsize=(20, 8), dpi=100)
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
- plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
- y_tick = np.arange(0, 10, 0.1) # 生成0到1,不包括1 以0.1为间隔的序列
- plt.xticks(x,country_list[:20], fontsize=12)
- plt.yticks(y_tick[::2])
- plt.grid(linestyle="--", alpha=0.5)
- plt.title("全球疫苗接种可视图", fontsize=25)
- # plt.xlim(-0.5,20)
- plt.xlim(-0.5, 19.5)
- plt.xlabel("国家", fontsize=28)
- plt.ylabel("接种/亿剂", fontsize=25)
- plt.savefig('./2.png')
- plt.show()
-
- # 饼图
- def matplot_pie(self, temp):
- countries_list = []
- size = []
- for te in temp:
- country = te['country']
- all = int(te['all']) / 100000000
- countries_list.append(country)
- size.append(all)
-
- color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
- '#666699', '#CCFF00']
- plt.figure(figsize=(20, 8), dpi=100)
- labels = countries_list[:20]
- plt.rcParams['font.sans-serif'] = 'SimHei'
- plt.rcParams['axes.unicode_minus'] = False # 设置正常显示符号
- explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
- patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
- colors=color,explode=explode)
- for l in l_text:
- l.set_size(15)
- for p in p_text:
- p.set_size(10)
- plt.grid()
- plt.legend(loc=1, ncol=2)
- plt.title("全球疫苗接种可视图", fontsize=28)
- plt.savefig("./3.png")
- plt.show()
-
- def run(self):
- response = self.get_data()
- temp = self.parse_data(response)
- # self.Chinese_jieba(temp)
- self.matplot_plot(temp)
- self.matplot_bar(temp)
- self.matplot_pie(temp)
-
-
- if __name__ == '__main__':
- f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
- csv_writer = csv.writer(f)
- csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
- yimiao = Yimiao()
- yimiao.run()
-
-
如果报错:ValueError: The number of FixedLocator locations (13), usually from a call to set_ticks, does not ma…,
解决方案:pip install -i https://pypi.douban.com/simple matplotlib==3.0.3