1
0
Fork 0
rki-impfparser/plot.py

551 lines
20 KiB
Python
Raw Normal View History

#!/usr/bin/python
# vim: set fileencoding=utf-8 :
2021-01-15 00:16:40 +01:00
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import re
import requests as req
import locale
import os.path
import shutil
from matplotlib.dates import date2num
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
site_folder = 'site/'
data_folder = 'data/'
2021-01-15 00:16:40 +01:00
einwohner_deutschland = 83190556
herd_immunity = 0.7
2021-01-15 00:16:40 +01:00
today = datetime.date.today()
print_today = today.isoformat()
filename_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
2021-01-15 00:16:40 +01:00
# DIN A4 Plots
plt.rcParams["figure.figsize"] = [11.69, 8.27]
# Download
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now)
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
with open(data_filename, 'wb') as outfile:
outfile.write(r.content)
#data_filename = 'data/20210118151908_Impfquotenmonitoring.xlsx'
rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl')
raw_data = rki_file['Impfungen_proTag']
2021-01-15 00:16:40 +01:00
impfungen = raw_data[:-1].dropna(subset=['Datum'])#.fillna(0)
2021-01-15 00:16:40 +01:00
dates = impfungen['Datum']
start_of_reporting_date = dates.iloc[0].date()
2021-01-15 00:16:40 +01:00
def calculate_vaccination_data(data):
cumulative = np.cumsum(data)
total = int(np.sum(data))
total_percentage = float(total) / einwohner_deutschland * 100
to_be_vaccinated = einwohner_deutschland - total
last_date = dates.iloc[-1].date()
start_of_vaccination_index = (data != 0).argmax(axis=0)
start_of_vaccination_date = dates[start_of_vaccination_index].date()
days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days
days_since_start_of_reporting = (last_date - start_of_reporting_date).days
mean_all_time = np.mean(data[start_of_vaccination_index:])
mean_seven_days = np.mean(data[-7:])
def extrapolate(rate, to_be_vaccinated):
days_extrapolated = int(np.ceil(to_be_vaccinated / rate))
extrapolated_dates = np.array([dates[0] + datetime.timedelta(days=i) for i in range(days_extrapolated)])
2021-01-15 00:16:40 +01:00
date_done = extrapolated_dates[-1]
date_herd_immunity = extrapolated_dates[int(np.ceil(days_extrapolated * herd_immunity))]
extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting)
return {
'rate': rate,
'rate_int': int(np.round(rate)),
'days_extrapolated': days_extrapolated,
'dates': extrapolated_dates,
'date_done': date_done,
'date_done_str': date_done.strftime('%d. %B %Y'),
'date_herd_immunity': date_herd_immunity,
'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'),
'extrapolated_vaccinations': extrapolated_vaccinations
}
extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated)
extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated)
extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated)
mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1))
return {
'daily': data,
'cumulative': cumulative,
'total': total,
'total_percentage': total_percentage,
'to_be_vaccinated': to_be_vaccinated,
'last_date': last_date,
'last_date_str': last_date.strftime('%d. %B %Y'),
'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index
'start_of_vaccination_date': start_of_vaccination_date,
'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'),
'extrapolation_mean_all_time': extrapolation_mean_all_time,
'extrapolation_last_rate': extrapolation_last_rate,
'extrapolation_mean_seven_days': extrapolation_mean_seven_days,
'mean_vaccination_rates_daily': mean_vaccination_rates_daily
}
data_first_vaccination = calculate_vaccination_data(impfungen['Erstimpfung'])
data_second_vaccination = calculate_vaccination_data(impfungen['Zweitimpfung'])
2021-01-15 00:16:40 +01:00
# Stand aus Daten auslesen
#stand = dates.iloc[-1]
#print_stand = stand.isoformat()
# Stand aus offiziellen Angaben auslesen
stand = rki_file['Erläuterung'].iloc[1][0]
2021-01-15 00:16:40 +01:00
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d\d:\d\d) Uhr$')
m = stand_regex.match(stand)
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
print_stand = stand_date.isoformat()
filename_stand = stand_date.strftime("%Y%m%d%H%M%S")
'''
# Infos der einzelnen Länder
details_sheet_name = (set(rki_file.keys()) - {'Erläuterung', 'Impfungen_proTag'}).pop()
details_sheet = rki_file[details_sheet_name]
regionalcodes = details_sheet['RS'].iloc[0:17]
land_names = details_sheet['Bundesland'].iloc[0:17]
total_vaccinations_by_land = details_sheet['Impfungen kumulativ'].iloc[0:17]
vaccination_per_mille_by_land = details_sheet['Impfungen pro 1.000 Einwohner'].iloc[0:17]
vaccination_reason_age_by_land = details_sheet['Indikation nach Alter*'].iloc[0:17]
vaccination_reason_job_by_land = details_sheet['Berufliche Indikation*'].iloc[0:17]
vaccination_reason_medical_by_land = details_sheet['Medizinische Indikation*'].iloc[0:17]
vaccination_reason_oldhome_by_land = details_sheet['Pflegeheim-bewohnerIn*'].iloc[0:17]
details_per_land = {}
details_per_land_formatted = {}
# Regionalcodes der Länder zu Abkürzung und Name (Plus gesamt)
laendernamen = [
('SH', 'Schleswig-Holstein'),
('HH', 'Hamburg'),
('NI', 'Niedersachsen'),
('HB', 'Bremen'),
('NW', 'Nordrhein-Westfalen'),
('HE', 'Hessen'),
('RP', 'Rheinland-Pfalz'),
('BW', 'Baden-Württemberg'),
('BY', 'Bayern'),
('SL', 'Saarland'),
('BE', 'Berlin'),
('BB', 'Brandenburg'),
('MV', 'Mecklenburg-Vorpommern'),
('SN', 'Sachsen'),
('ST', 'Sachsen-Anhalt'),
('TH', 'Thüringen'),
('𝚺', 'Gesamt')
]
def row_to_details(i):
regionalcode = regionalcodes[i] if i != 16 else 16
print(laendernamen[regionalcode])
shortname, name = laendernamen[regionalcode]
return {
'name': name,
'shortname': shortname,
'total_vaccinations': int(total_vaccinations_by_land[i]),
'total_vaccinations_percentage': vaccination_per_mille_by_land[i] / 10,
'vaccination_reason_age': int(vaccination_reason_age_by_land[i]),
'vaccination_reason_age_percentage': np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_job': int(vaccination_reason_job_by_land[i]),
'vaccination_reason_job_percentage': np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_medical': int(vaccination_reason_medical_by_land[i]),
'vaccination_reason_medical_percentage': np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_oldhome': int(vaccination_reason_oldhome_by_land[i]),
'vaccination_reason_oldhome_percentage': np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100),
}
def row_to_details_formatted(i):
regionalcode = regionalcodes[i] if i != 16 else 16
print(laendernamen[regionalcode])
shortname, name = laendernamen[regionalcode]
return {
'name': name,
'shortname': shortname,
'total_vaccinations': '{:n}'.format(int(total_vaccinations_by_land[i])).replace('.', ''),
'total_vaccinations_percentage': '{:.3n}'.format(np.round(vaccination_per_mille_by_land[i] / 10, 2)),
'vaccination_reason_age': '{:n}'.format(int(vaccination_reason_age_by_land[i])).replace('.', ''),
'vaccination_reason_age_percentage': '{:n}'.format(np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_job': '{:n}'.format(int(vaccination_reason_job_by_land[i])).replace('.', ''),
'vaccination_reason_job_percentage': '{:n}'.format(np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_medical': '{:n}'.format(int(vaccination_reason_medical_by_land[i])).replace('.', ''),
'vaccination_reason_medical_percentage': '{:n}'.format(np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_oldhome': '{:n}'.format(int(vaccination_reason_oldhome_by_land[i])).replace('.', ''),
'vaccination_reason_oldhome_percentage': '{:n}'.format(np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100))
}
for i in range(len(land_names) - 1):
details_per_land[land_names[i]] = row_to_details(i)
details_per_land_formatted[land_names[i]] = row_to_details_formatted(i)
details_total = row_to_details(16)
details_total_formatted = row_to_details_formatted(16)
'''
archive_folder = site_folder + 'archive/' + filename_stand
if os.path.isdir(archive_folder):
print('Archive folder {} already exists'.format(archive_folder))
else:
os.mkdir(archive_folder)
2021-01-15 00:16:40 +01:00
def plot_extrapolation_portion(percentage):
print_percentage = int(percentage * 100)
archive_plot_filename = '{}/extrapolated_to_{}_percent'.format(archive_folder, print_percentage)
latest_plot_filename = '{}/extrapolated_to_{}_percent'.format(site_folder, print_percentage)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
2021-01-15 00:16:40 +01:00
fig, ax = plt.subplots(1)
plt.title(
'Tägliche Impfrate (Erst- und Zweitimpfung), kumulierte Impfungen und lineare Extrapolation bis {:n} % der Bevölkerung Deutschlands\n'
'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'
'Erstimpfungen: {:n} ({:n} %), Durchschnittliche Impfrate: {:n} Impfungen/Tag (läuft seit {:n} Tagen)\n'
'Zweitimpfungen: {:n} ({:n} %), Durchschnittliche Impfrate: {:n} Impfungen/Tag (läuft seit {:n} Tagen)'.format(
print_percentage,
print_stand, print_today,
data_first_vaccination['total'], np.round(data_first_vaccination['total_percentage'], 2), data_first_vaccination['extrapolation_mean_all_time']['rate'], data_first_vaccination['days_since_start'],
data_second_vaccination['total'], np.round(data_second_vaccination['total_percentage'], 2), data_second_vaccination['extrapolation_mean_all_time']['rate'], data_second_vaccination['days_since_start']
)
2021-01-15 00:16:40 +01:00
)
ax2 = ax.twinx()
ax.bar(dates, data_first_vaccination['daily'], label='Tägliche Erstimpfungen', color='blue')
ax.bar(dates, data_second_vaccination['daily'], label='Tägliche Zweitimpfungen', color='lightblue')
ax.plot(dates, data_first_vaccination['mean_vaccination_rates_daily'], color='violet', label='Durchschnittliche Erstimpfrate\nbis zu diesem Tag (inkl.)')
ax.plot(dates, data_second_vaccination['mean_vaccination_rates_daily'], color='magenta', label='Durchschnittliche Zweitimpfrate\nbis zu diesem Tag (inkl.)')
2021-01-15 00:16:40 +01:00
ax2.set_ylim([0, einwohner_deutschland * percentage])
ax2.set_xlim(xmax=dates[0] + datetime.timedelta(days=percentage * data_first_vaccination['extrapolation_mean_all_time']['days_extrapolated']))
2021-01-15 00:16:40 +01:00
ax2.grid(True)
ax2.plot(dates, data_first_vaccination['cumulative'], color='red', label='Kumulierte Erstimpfungen')
ax2.plot(dates, data_second_vaccination['cumulative'], color='indianred', label='Kumulierte Zweitimpfungen')
ax2.plot(data_first_vaccination['extrapolation_mean_all_time']['dates'], data_first_vaccination['extrapolation_mean_all_time']['extrapolated_vaccinations'], color='orange', label='Extrap. kumulierte Erstimpfungen (Øgesamt)\n{:n} Impfungen/Tag'.format(data_first_vaccination['extrapolation_mean_all_time']['rate_int']))
ax2.plot(data_first_vaccination['extrapolation_mean_seven_days']['dates'], data_first_vaccination['extrapolation_mean_seven_days']['extrapolated_vaccinations'], color='goldenrod', label='Extrap. kumulierte Erstimpfungen (Ø7 Tage)\n{:n} Impfungen/Tag'.format(data_first_vaccination['extrapolation_mean_seven_days']['rate_int']))
ax2.plot()
ax2.plot(data_second_vaccination['extrapolation_mean_all_time']['dates'], data_second_vaccination['extrapolation_mean_all_time']['extrapolated_vaccinations'], color='orange', label='Extrap. kumulierte Zweitimpfungen (Øgesamt)\n{:n} Impfungen/Tag'.format(data_second_vaccination['extrapolation_mean_all_time']['rate_int']))
ax2.plot(data_second_vaccination['extrapolation_mean_seven_days']['dates'], data_second_vaccination['extrapolation_mean_seven_days']['extrapolated_vaccinations'], color='goldenrod', label='Extrap. kumulierte Zweitimpfungen (Ø7 Tage)\n{:n} Impfungen/Tag'.format(data_second_vaccination['extrapolation_mean_seven_days']['rate_int']))
2021-01-15 00:16:40 +01:00
#ax2.plot()
ax.legend(loc='upper left')
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
ax2.legend(loc='lower right')
2021-01-15 00:16:40 +01:00
ax2.get_yaxis().get_major_formatter().set_scientific(False)
# Estimated percentage for herd immunity
#ax2.axline((0, einwohner_deutschland * 0.7), slope=0, color='green')
2021-01-15 00:16:40 +01:00
ax2.set_ylabel('Kumulierte Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
2021-01-15 00:16:40 +01:00
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
2021-01-15 00:16:40 +01:00
plot_extrapolation_portion(0.1)
#plot_extrapolation_portion(0.7)
2021-01-15 00:16:40 +01:00
plot_extrapolation_portion(1.0)
def plot_vaccination_bar_graph_total_time():
archive_plot_filename = '{}/vaccination_bar_graph_total_time'.format(archive_folder)
latest_plot_filename = '{}/vaccination_bar_graph_total_time'.format(site_folder)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
fig, ax = plt.subplots(1)
plt.title(
'Tägliche Impfrate (Erst- und Zweitimpfung übereinander)\n'
'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format(
print_stand, print_today
)
)
ax.grid()
ax.bar(dates, data_first_vaccination['daily'], label='Tägliche Erstimpfungen', color='blue')
ax.bar(dates, data_second_vaccination['daily'], label='Tägliche Zweitimpfungen', color='lightblue', bottom=data_first_vaccination['daily'])
ax.set_ylim([0, np.max(data_first_vaccination['daily']) + np.max(data_second_vaccination['daily'])])
ax.legend(loc='upper left')
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
plot_vaccination_bar_graph_total_time()
def plot_vaccination_bar_graph_total_time_two_bars():
archive_plot_filename = '{}/vaccination_bar_graph_total_time_two_bars'.format(archive_folder)
latest_plot_filename = '{}/vaccination_bar_graph_total_time_two_bars'.format(site_folder)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
fig, ax = plt.subplots(1)
plt.title(
'Tägliche Impfrate (Erst- und Zweitimpfung nebeneinander)\n'
'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format(
print_stand, print_today
)
)
ax.grid()
date_numbers = date2num(dates)
ax.bar(date_numbers - 0.2, data_first_vaccination['daily'], width=0.4, label='Tägliche Erstimpfungen', color='blue')
ax.bar(date_numbers + 0.2, data_second_vaccination['daily'], width=0.4, label='Tägliche Zweitimpfungen', color='lightblue')
ax.set_ylim([0, np.max(data_first_vaccination['daily']) + np.max(data_second_vaccination['daily'])])
ax.legend(loc='upper left')
ax.xaxis_date()
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
plot_vaccination_bar_graph_total_time_two_bars()
def plot_vaccination_bar_graph_compare_both_vaccinations():
archive_plot_filename = '{}/vaccination_bar_graph_compare_both_vaccinations'.format(archive_folder)
latest_plot_filename = '{}/vaccination_bar_graph_compare_both_vaccinations'.format(site_folder)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
fig, ax = plt.subplots(1)
plt.title(
'Tägliche Impfrate (Erst- und Zweitimpfung um 21 Tage versetzt)\n'
'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format(
print_stand, print_today
)
)
ax.grid()
date_numbers_first = date2num(dates + datetime.timedelta(days=21))
date_numbers_second = date2num(dates)
ax.bar(date_numbers_first - 0.2, data_first_vaccination['daily'], width=0.4, label='Tägliche Erstimpfungen', color='blue')
ax.bar(date_numbers_second + 0.2, data_second_vaccination['daily'], width=0.4, label='Tägliche Zweitimpfungen', color='lightblue')
ax.set_ylim([0, np.max([np.max(data_first_vaccination['daily']), np.max(data_second_vaccination['daily'])])])
ax.legend(loc='upper left')
ax.xaxis_date()
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
plot_vaccination_bar_graph_compare_both_vaccinations()
def plot_cumulative_two_vaccinations():
archive_plot_filename = '{}/cumulative_two_vaccinations'.format(archive_folder)
latest_plot_filename = '{}/cumulative_two_vaccinations'.format(site_folder)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
fig, ax = plt.subplots(1)
plt.title(
'Kumulative Impfrate (Erst- und Zweitimpfung)\n'
'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format(
print_stand, print_today
)
)
ax.grid()
first_vaccinations_cumulative = data_first_vaccination['cumulative']
second_vaccinations_cumulative = data_second_vaccination['cumulative']
ax.fill_between(dates, first_vaccinations_cumulative, label='Erstimpfungen', color='blue')
ax.fill_between(dates, second_vaccinations_cumulative, label='Zweitimpfungen', color='lightblue')
ax.set_ylim([0, first_vaccinations_cumulative.iloc[-1]])
ax.legend(loc='upper left')
ax.xaxis_date()
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
plot_cumulative_two_vaccinations()
def render_dashboard():
dashboard_filename = 'site/index.xhtml'
dashboard_archive_filename = 'site/archive/{}/index.xhtml'.format(filename_stand)
stylesheet_filename = 'site/rki-dashboard.css'
stylesheet_archive_filename = 'site/archive/{}/rki-dashboard.css'.format(filename_stand)
if os.path.isfile(dashboard_archive_filename):
print('Dashboard {} already exists'.format(dashboard_archive_filename))
return
from jinja2 import Template, Environment, FileSystemLoader, select_autoescape
env = Environment(
loader=FileSystemLoader('./'),
autoescape=select_autoescape(['html', 'xml', 'xhtml'])
)
german_text_date_format = '%d. %B %Y'
df = german_text_date_format
german_text_datetime_format = '%d. %B %Y, %H:%M:%S Uhr'
dtf = german_text_datetime_format
latest_dashboard_filename = site_folder + 'index.xhtml'
archive_dashboard_filename = archive_folder
template = env.get_template('dashboard_template.xhtml')
template.stream(
stand = stand_date.strftime(dtf),
filename_stand = filename_stand,
einwohner_deutschland = '{:n}'.format(einwohner_deutschland).replace('.', ''),
herd_immunity = '{:n}'.format(int(herd_immunity * 100)),
data_first_vaccination = data_first_vaccination,
data_second_vaccination = data_second_vaccination,
#details_per_land = dict(sorted(details_per_land_formatted.items(), key=lambda item: item[0])),
#details_total = details_total_formatted
).dump('site/index.xhtml')
shutil.copyfile(dashboard_filename, dashboard_archive_filename)
shutil.copyfile(stylesheet_filename, stylesheet_archive_filename)
print('Created dashboard')
render_dashboard()