您当前的位置:首页 > 计算机 > 编程开发 > Python

爬取全球疫苗接种信息可视化分析

时间:03-29来源:作者:点击数:

对全球接种疫苗的数据进行抓取可视化,借助腾讯的api接口,https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData

在这里插入图片描述

实际上就是json数据,这个就很简单了,我们就借助json模块的loads方法,将json字符串转为字典方便我们取值,代码如下:

# !/usr/bin python3                                 
# encoding    : utf-8 -*-                                                          
# @software   : PyCharm      
# @file       :   全球疫苗接种数据.py
# @Time       :   2021/6/7 8:19

import requests
import json
import csv
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
import numpy
import PIL.Image as Image


class Yimiao(object):

    def __init__(self):
        self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
        }

    def get_data(self):
        response = requests.post(url=self.url, headers=self.headers, timeout=3)
        return response

    def parse_data(self, response):
        data = response.text
        data_dict = json.loads(data)
        all_data = data_dict['data']['VaccineSituationData']
        temp = []
        for al in all_data:
            country = al['country']
            date = al['date']
            type_c = al['vaccinations']
            all = str(al['total_vaccinations'])
            every = str(al['total_vaccinations_per_hundred'])
            at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
            temp.append(at)
            csv_writer.writerow([country,date,type_c,all,every])
            print('国家:'+country+'  '+'日期:'+date+'  '+'接种类型:'+type_c+'  '+'累计接种/亿剂:'+all+'  '+'每百人/亿剂:'+every)


    def run(self):
        response = self.get_data()
        temp = self.parse_data(response)


if __name__ == '__main__':
    f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
    csv_writer = csv.writer(f)
    csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
    yimiao = Yimiao()
    yimiao.run()

这样的话数据我们就提取出来了,控制台打印

在这里插入图片描述

写入excel效果图:

在这里插入图片描述

我们只拿数据就没有意思,我们要对数据可视化分析,生成词云展示,三个展示方式,一个是折线统计图,还有柱状图、饼图,代码展示如下:

 # 折线图
    def matplot_plot(self, temp):
        country_list = []
        y = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            country_list.append(country)
            y.append(all)
        x = list(range(len(country_list)))
        plt.figure(figsize=(20, 8), dpi=100)
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号

        plt.plot(x, y)
        y_tick = np.arange(0, 10, 0.1)  # 生成0到1,不包括1 以0.1为间隔的序列

        plt.xticks(x, country_list, fontsize=12)
        plt.yticks(y_tick[::3], fontsize=12)

        plt.xlabel("国家", fontsize=24)
        plt.ylabel("接种/亿剂", fontsize=24)
        plt.title("全球疫苗接种可视图", fontsize=32)
        plt.xlim(-0.5, 20)

        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig("./1.png")
        plt.show()

    # 柱状图
    def matplot_bar(self, temp):
        country_list = []
        y = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            country_list.append(country)
            y.append(all)
        x = range(len(country_list))
        plt.figure(figsize=(20, 8), dpi=100)
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号
        plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
        y_tick = np.arange(0, 10, 0.1)  # 生成0到1,不包括1 以0.1为间隔的序列
        plt.xticks(x,country_list[:20], fontsize=12)
        plt.yticks(y_tick[::2])
        plt.grid(linestyle="--", alpha=0.5)
        plt.title("全球疫苗接种可视图", fontsize=25)
        # plt.xlim(-0.5,20)
        plt.xlim(-0.5, 19.5)
        plt.xlabel("国家", fontsize=28)
        plt.ylabel("接种/亿剂", fontsize=25)
        plt.savefig('./2.png')
        plt.show()

    # 饼图
    def matplot_pie(self, temp):
        countries_list = []
        size = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            countries_list.append(country)
            size.append(all)

        color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
                 '#666699', '#CCFF00']
        plt.figure(figsize=(20, 8), dpi=100)
        labels = countries_list[:20]
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号
        explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
        patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
                                           colors=color,explode=explode)
        for l in l_text:
            l.set_size(15)
        for p in p_text:
            p.set_size(10)
        plt.grid()
        plt.legend(loc=1, ncol=2)
        plt.title("全球疫苗接种可视图", fontsize=28)
        plt.savefig("./3.png")
        plt.show()

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

整体代码:

# !/usr/bin python3
# encoding    : utf-8 -*-
import requests
import json
import csv
import matplotlib.pyplot as plt
import numpy as np
import numpy
import PIL.Image as Image


class Yimiao(object):
    def __init__(self):
        self.url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=VaccineSituationData'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
        }
        self.count = 0

    def get_data(self):
        response = requests.post(url=self.url, headers=self.headers, timeout=3)
        return response

    def parse_data(self, response):
        data = response.text
        data_dict = json.loads(data)
        all_data = data_dict['data']['VaccineSituationData']
        temp = []
        for al in all_data:
            self.count += 1
            country = al['country']
            date = al['date']
            type_c = al['vaccinations']
            all = str(al['total_vaccinations'])
            every = str(al['total_vaccinations_per_hundred'])
            at = {"country": country, 'all': all, 'date': date,'type_c':type_c}
            temp.append(at)
            csv_writer.writerow([country,date,type_c,all,every])
            print('国家:'+country+'  '+'日期:'+date+'  '+'接种类型:'+type_c+'  '+'累计接种/亿剂:'+all+'  '+'每百人/亿剂:'+every)
        return temp



    # 折线图
    def matplot_plot(self, temp):
        country_list = []
        y = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            country_list.append(country)
            y.append(all)
        x = list(range(len(country_list)))
        plt.figure(figsize=(20, 8), dpi=100)
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号

        plt.plot(x, y)
        y_tick = np.arange(0, 10, 0.1)  # 生成0到1,不包括1 以0.1为间隔的序列

        plt.xticks(x, country_list, fontsize=12)
        plt.yticks(y_tick[::3], fontsize=12)

        plt.xlabel("国家", fontsize=24)
        plt.ylabel("接种/亿剂", fontsize=24)
        plt.title("全球疫苗接种可视图", fontsize=32)
        plt.xlim(-0.5, 20)

        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig("./1.png")
        plt.show()

    # 柱状图
    def matplot_bar(self, temp):
        country_list = []
        y = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            country_list.append(country)
            y.append(all)
        x = range(len(country_list))
        plt.figure(figsize=(20, 8), dpi=100)
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号
        plt.bar(x, y, width=0.9, color=['b', 'r', 'g', 'y', 'c', 'm', 'y', 'k', 'c', 'g', 'b'])
        y_tick = np.arange(0, 10, 0.1)  # 生成0到1,不包括1 以0.1为间隔的序列
        plt.xticks(x,country_list[:20], fontsize=12)
        plt.yticks(y_tick[::2])
        plt.grid(linestyle="--", alpha=0.5)
        plt.title("全球疫苗接种可视图", fontsize=25)
        # plt.xlim(-0.5,20)
        plt.xlim(-0.5, 19.5)
        plt.xlabel("国家", fontsize=28)
        plt.ylabel("接种/亿剂", fontsize=25)
        plt.savefig('./2.png')
        plt.show()

    # 饼图
    def matplot_pie(self, temp):
        countries_list = []
        size = []
        for te in temp:
            country = te['country']
            all = int(te['all']) / 100000000
            countries_list.append(country)
            size.append(all)

        color = ['#66CCCC', '#CCFF66', '#FF99CC', '#FF9999', '#FFCC99', '#666699', '#FF9900', '#993366', '#66CCCC',
                 '#666699', '#CCFF00']
        plt.figure(figsize=(20, 8), dpi=100)
        labels = countries_list[:20]
        plt.rcParams['font.sans-serif'] = 'SimHei'
        plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示符号
        explode = (0.2, 0, 0, 0, 0, 0,0,0,0,0,0,0.1,0.2,0.25,0,0.1,0.2,0.3,0.2,0)
        patches, l_text, p_text = plt.pie(size[:20], labels=labels, autopct="%1.2f%%",
                                           colors=color,explode=explode)
        for l in l_text:
            l.set_size(15)
        for p in p_text:
            p.set_size(10)
        plt.grid()
        plt.legend(loc=1, ncol=2)
        plt.title("全球疫苗接种可视图", fontsize=28)
        plt.savefig("./3.png")
        plt.show()

    def run(self):
        response = self.get_data()
        temp = self.parse_data(response)
        # self.Chinese_jieba(temp)
        self.matplot_plot(temp)
        self.matplot_bar(temp)
        self.matplot_pie(temp)


if __name__ == '__main__':
    f = open('全球疫苗接种数据.csv', 'a', newline='', encoding="gb18030")
    csv_writer = csv.writer(f)
    csv_writer.writerow(["国家", "日期", "疫苗种类","累计接种/亿剂","每百人/亿剂"])
    yimiao = Yimiao()
    yimiao.run()

如果报错:ValueError: The number of FixedLocator locations (13), usually from a call to set_ticks, does not ma…,

解决方案:pip install -i https://pypi.douban.com/simple matplotlib==3.0.3

方便获取更多学习、工作、生活信息请关注本站微信公众号城东书院 微信服务号城东书院 微信订阅号
推荐内容
相关内容
栏目更新
栏目热门