1
0
Fork 0

fix: Refactored parts of the script into functions

This commit is contained in:
Benedikt Bastin 2021-04-29 13:50:48 +02:00
parent e13e8f1df6
commit 0e0ad8bc0f
1 changed files with 118 additions and 109 deletions

227
plot.py
View File

@ -52,149 +52,158 @@ plt.rcParams["figure.figsize"] = [11.69, 8.27]
# Download
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now)
def download_rki(filename_prefix):
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_prefix)
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
with open(data_filename, 'wb') as outfile:
outfile.write(r.content)
with open(data_filename, 'wb') as outfile:
outfile.write(r.content)
#data_filename = 'data/20210118151908_Impfquotenmonitoring.xlsx'
return data_filename
rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl')
data_filename = download_rki(filename_now)
raw_data = rki_file['Impfungen_proTag']
def parse_rki(filename):
impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0)
rki_file = pd.read_excel(filename, sheet_name=None, engine='openpyxl')
impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row
raw_data = rki_file['Impfungen_proTag']
dates = impfungen['Datum']
impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0)
start_of_reporting_date = dates.iloc[0].date()
impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row
def calculate_vaccination_data(data):
dates = impfungen['Datum']
total = int(np.sum(data))
total_percentage = float(total) / einwohner_deutschland * 100
start_of_reporting_date = dates.iloc[0].date()
to_be_vaccinated = einwohner_deutschland - total
def calculate_vaccination_data(data):
last_date = dates.iloc[-1].date()
start_of_vaccination_index = (data != 0).argmax(axis=0)
start_of_vaccination_date = dates[start_of_vaccination_index].date()
days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days
days_since_start_of_reporting = (last_date - start_of_reporting_date).days
total = int(np.sum(data))
total_percentage = float(total) / einwohner_deutschland * 100
valid_data = data[start_of_vaccination_index:]
to_be_vaccinated = einwohner_deutschland - total
cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data)))
last_date = dates.iloc[-1].date()
start_of_vaccination_index = (data != 0).argmax(axis=0)
start_of_vaccination_date = dates[start_of_vaccination_index].date()
days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days
days_since_start_of_reporting = (last_date - start_of_reporting_date).days
mean_all_time = np.mean(valid_data)
mean_seven_days = np.mean(data[-7:])
valid_data = data[start_of_vaccination_index:]
vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data))
cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data)))
vaccinations_by_week = {}
mean_all_time = np.mean(valid_data)
mean_seven_days = np.mean(data[-7:])
for w, v in vaccinations_by_week_map:
if w in vaccinations_by_week:
vaccinations_by_week[w] = vaccinations_by_week[w] + v
else:
vaccinations_by_week[w] = v
vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data))
def extrapolate(rate, to_be_vaccinated):
days_extrapolated = int(np.ceil(to_be_vaccinated / rate))
days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate))
vaccinations_by_week = {}
weeks_extrapolated = int(np.ceil(days_extrapolated / 7))
weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7))
for w, v in vaccinations_by_week_map:
if w in vaccinations_by_week:
vaccinations_by_week[w] = vaccinations_by_week[w] + v
else:
vaccinations_by_week[w] = v
date_done = today + datetime.timedelta(days_extrapolated)
date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity)
def extrapolate(rate, to_be_vaccinated):
days_extrapolated = int(np.ceil(to_be_vaccinated / rate))
days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate))
extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting)
weeks_extrapolated = int(np.ceil(days_extrapolated / 7))
weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7))
date_done = today + datetime.timedelta(days_extrapolated)
date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity)
extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting)
return {
'rate': rate,
'rate_int': int(np.round(rate)),
'days_extrapolated': days_extrapolated,
'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity,
'weeks_extrapolated': weeks_extrapolated,
'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity,
'date_done': date_done,
'date_done_str': date_done.strftime('%d. %B %Y'),
'date_herd_immunity': date_herd_immunity,
'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'),
'extrapolated_vaccinations': extrapolated_vaccinations
}
extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated)
extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated)
extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated)
mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1))
vaccination_rates_daily_rolling_average = data.rolling(7).mean()
vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total
vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target
vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100
return {
'rate': rate,
'rate_int': int(np.round(rate)),
'days_extrapolated': days_extrapolated,
'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity,
'weeks_extrapolated': weeks_extrapolated,
'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity,
'date_done': date_done,
'date_done_str': date_done.strftime('%d. %B %Y'),
'date_herd_immunity': date_herd_immunity,
'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'),
'extrapolated_vaccinations': extrapolated_vaccinations
'daily': data,
'cumulative': cumulative,
'total': total,
'total_percentage': total_percentage,
'to_be_vaccinated': to_be_vaccinated,
'last_date': last_date,
'last_date_str': last_date.strftime('%d. %B %Y'),
'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index
'start_of_vaccination_date': start_of_vaccination_date,
'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'),
'vaccinations_by_week': vaccinations_by_week,
'extrapolation_mean_all_time': extrapolation_mean_all_time,
'extrapolation_last_rate': extrapolation_last_rate,
'extrapolation_mean_seven_days': extrapolation_mean_seven_days,
'mean_vaccination_rates_daily': mean_vaccination_rates_daily,
'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average,
'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)),
'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)),
'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage,
'vaccinations_last_day': data.iloc[-1],
'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100,
'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100,
'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1],
'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100,
'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100
}
if 'Erstimpfung' in impfungen:
raw_first_vaccinations = impfungen['Erstimpfung']
elif 'Einmal geimpft' in impfungen:
raw_first_vaccinations = impfungen['Einmal geimpft']
elif 'Begonnene Impfserie' in impfungen:
raw_first_vaccinations = impfungen['Begonnene Impfserie']
extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated)
extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated)
extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated)
if 'Zweitimpfung' in impfungen:
raw_second_vaccinations = impfungen['Zweitimpfung']
elif 'Vollständig geimpft' in impfungen:
raw_second_vaccinations = impfungen['Vollständig geimpft']
mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1))
vaccination_rates_daily_rolling_average = data.rolling(7).mean()
data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations)
data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations)
vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total
vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target
vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100
# Stand aus Daten auslesen
#stand = dates.iloc[-1]
#print_stand = stand.isoformat()
return {
'daily': data,
'cumulative': cumulative,
'total': total,
'total_percentage': total_percentage,
'to_be_vaccinated': to_be_vaccinated,
'last_date': last_date,
'last_date_str': last_date.strftime('%d. %B %Y'),
'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index
'start_of_vaccination_date': start_of_vaccination_date,
'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'),
'vaccinations_by_week': vaccinations_by_week,
'extrapolation_mean_all_time': extrapolation_mean_all_time,
'extrapolation_last_rate': extrapolation_last_rate,
'extrapolation_mean_seven_days': extrapolation_mean_seven_days,
'mean_vaccination_rates_daily': mean_vaccination_rates_daily,
'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average,
'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)),
'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)),
'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage,
'vaccinations_last_day': data.iloc[-1],
'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100,
'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100,
'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1],
'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100,
'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100
}
# Stand aus offiziellen Angaben auslesen
stand = rki_file['Erläuterung'].iloc[1][0]
if 'Erstimpfung' in impfungen:
raw_first_vaccinations = impfungen['Erstimpfung']
elif 'Einmal geimpft' in impfungen:
raw_first_vaccinations = impfungen['Einmal geimpft']
elif 'Begonnene Impfserie' in impfungen:
raw_first_vaccinations = impfungen['Begonnene Impfserie']
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$')
m = stand_regex.match(stand)
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
print_stand = stand_date.isoformat()
if 'Zweitimpfung' in impfungen:
raw_second_vaccinations = impfungen['Zweitimpfung']
elif 'Vollständig geimpft' in impfungen:
raw_second_vaccinations = impfungen['Vollständig geimpft']
return dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand
data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations)
data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations)
# Stand aus Daten auslesen
#stand = dates.iloc[-1]
#print_stand = stand.isoformat()
# Stand aus offiziellen Angaben auslesen
stand = rki_file['Erläuterung'].iloc[1][0]
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$')
m = stand_regex.match(stand)
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
print_stand = stand_date.isoformat()
dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand = parse_rki(filename=data_filename)
filename_stand = stand_date.strftime("%Y%m%d%H%M%S")