From 0e0ad8bc0f2b18de2828e2e6433a584698533ca0 Mon Sep 17 00:00:00 2001 From: Benedikt Bastin Date: Thu, 29 Apr 2021 13:50:48 +0200 Subject: [PATCH] fix: Refactored parts of the script into functions --- plot.py | 227 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 118 insertions(+), 109 deletions(-) diff --git a/plot.py b/plot.py index ca17b76..b1634c0 100644 --- a/plot.py +++ b/plot.py @@ -52,149 +52,158 @@ plt.rcParams["figure.figsize"] = [11.69, 8.27] # Download -data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now) +def download_rki(filename_prefix): + data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_prefix) -r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile') + r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile') -with open(data_filename, 'wb') as outfile: - outfile.write(r.content) + with open(data_filename, 'wb') as outfile: + outfile.write(r.content) -#data_filename = 'data/20210118151908_Impfquotenmonitoring.xlsx' + return data_filename -rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl') +data_filename = download_rki(filename_now) -raw_data = rki_file['Impfungen_proTag'] +def parse_rki(filename): -impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0) + rki_file = pd.read_excel(filename, sheet_name=None, engine='openpyxl') -impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row + raw_data = rki_file['Impfungen_proTag'] -dates = impfungen['Datum'] + impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0) -start_of_reporting_date = dates.iloc[0].date() + impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row -def calculate_vaccination_data(data): + dates = impfungen['Datum'] - total = int(np.sum(data)) - total_percentage = float(total) / einwohner_deutschland * 100 + start_of_reporting_date = dates.iloc[0].date() - to_be_vaccinated = einwohner_deutschland - total + def calculate_vaccination_data(data): - last_date = dates.iloc[-1].date() - start_of_vaccination_index = (data != 0).argmax(axis=0) - start_of_vaccination_date = dates[start_of_vaccination_index].date() - days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days - days_since_start_of_reporting = (last_date - start_of_reporting_date).days + total = int(np.sum(data)) + total_percentage = float(total) / einwohner_deutschland * 100 - valid_data = data[start_of_vaccination_index:] + to_be_vaccinated = einwohner_deutschland - total - cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data))) + last_date = dates.iloc[-1].date() + start_of_vaccination_index = (data != 0).argmax(axis=0) + start_of_vaccination_date = dates[start_of_vaccination_index].date() + days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days + days_since_start_of_reporting = (last_date - start_of_reporting_date).days - mean_all_time = np.mean(valid_data) - mean_seven_days = np.mean(data[-7:]) + valid_data = data[start_of_vaccination_index:] - vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data)) + cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data))) - vaccinations_by_week = {} + mean_all_time = np.mean(valid_data) + mean_seven_days = np.mean(data[-7:]) - for w, v in vaccinations_by_week_map: - if w in vaccinations_by_week: - vaccinations_by_week[w] = vaccinations_by_week[w] + v - else: - vaccinations_by_week[w] = v + vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data)) - def extrapolate(rate, to_be_vaccinated): - days_extrapolated = int(np.ceil(to_be_vaccinated / rate)) - days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate)) + vaccinations_by_week = {} - weeks_extrapolated = int(np.ceil(days_extrapolated / 7)) - weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7)) + for w, v in vaccinations_by_week_map: + if w in vaccinations_by_week: + vaccinations_by_week[w] = vaccinations_by_week[w] + v + else: + vaccinations_by_week[w] = v - date_done = today + datetime.timedelta(days_extrapolated) - date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity) + def extrapolate(rate, to_be_vaccinated): + days_extrapolated = int(np.ceil(to_be_vaccinated / rate)) + days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate)) - extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting) + weeks_extrapolated = int(np.ceil(days_extrapolated / 7)) + weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7)) + + date_done = today + datetime.timedelta(days_extrapolated) + date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity) + + extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting) + + return { + 'rate': rate, + 'rate_int': int(np.round(rate)), + 'days_extrapolated': days_extrapolated, + 'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity, + 'weeks_extrapolated': weeks_extrapolated, + 'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity, + 'date_done': date_done, + 'date_done_str': date_done.strftime('%d. %B %Y'), + 'date_herd_immunity': date_herd_immunity, + 'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'), + 'extrapolated_vaccinations': extrapolated_vaccinations + } + + + extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated) + extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated) + extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated) + + mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1)) + vaccination_rates_daily_rolling_average = data.rolling(7).mean() + + vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total + vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target + vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100 return { - 'rate': rate, - 'rate_int': int(np.round(rate)), - 'days_extrapolated': days_extrapolated, - 'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity, - 'weeks_extrapolated': weeks_extrapolated, - 'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity, - 'date_done': date_done, - 'date_done_str': date_done.strftime('%d. %B %Y'), - 'date_herd_immunity': date_herd_immunity, - 'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'), - 'extrapolated_vaccinations': extrapolated_vaccinations + 'daily': data, + 'cumulative': cumulative, + 'total': total, + 'total_percentage': total_percentage, + 'to_be_vaccinated': to_be_vaccinated, + 'last_date': last_date, + 'last_date_str': last_date.strftime('%d. %B %Y'), + 'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index + 'start_of_vaccination_date': start_of_vaccination_date, + 'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'), + 'vaccinations_by_week': vaccinations_by_week, + 'extrapolation_mean_all_time': extrapolation_mean_all_time, + 'extrapolation_last_rate': extrapolation_last_rate, + 'extrapolation_mean_seven_days': extrapolation_mean_seven_days, + 'mean_vaccination_rates_daily': mean_vaccination_rates_daily, + 'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average, + 'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)), + 'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)), + 'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage, + 'vaccinations_last_day': data.iloc[-1], + 'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100, + 'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100, + 'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1], + 'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100, + 'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100 } + if 'Erstimpfung' in impfungen: + raw_first_vaccinations = impfungen['Erstimpfung'] + elif 'Einmal geimpft' in impfungen: + raw_first_vaccinations = impfungen['Einmal geimpft'] + elif 'Begonnene Impfserie' in impfungen: + raw_first_vaccinations = impfungen['Begonnene Impfserie'] - extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated) - extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated) - extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated) + if 'Zweitimpfung' in impfungen: + raw_second_vaccinations = impfungen['Zweitimpfung'] + elif 'Vollständig geimpft' in impfungen: + raw_second_vaccinations = impfungen['Vollständig geimpft'] - mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1)) - vaccination_rates_daily_rolling_average = data.rolling(7).mean() + data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations) + data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations) - vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total - vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target - vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100 + # Stand aus Daten auslesen + #stand = dates.iloc[-1] + #print_stand = stand.isoformat() - return { - 'daily': data, - 'cumulative': cumulative, - 'total': total, - 'total_percentage': total_percentage, - 'to_be_vaccinated': to_be_vaccinated, - 'last_date': last_date, - 'last_date_str': last_date.strftime('%d. %B %Y'), - 'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index - 'start_of_vaccination_date': start_of_vaccination_date, - 'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'), - 'vaccinations_by_week': vaccinations_by_week, - 'extrapolation_mean_all_time': extrapolation_mean_all_time, - 'extrapolation_last_rate': extrapolation_last_rate, - 'extrapolation_mean_seven_days': extrapolation_mean_seven_days, - 'mean_vaccination_rates_daily': mean_vaccination_rates_daily, - 'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average, - 'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)), - 'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)), - 'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage, - 'vaccinations_last_day': data.iloc[-1], - 'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100, - 'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100, - 'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1], - 'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100, - 'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100 - } + # Stand aus offiziellen Angaben auslesen + stand = rki_file['Erläuterung'].iloc[1][0] -if 'Erstimpfung' in impfungen: - raw_first_vaccinations = impfungen['Erstimpfung'] -elif 'Einmal geimpft' in impfungen: - raw_first_vaccinations = impfungen['Einmal geimpft'] -elif 'Begonnene Impfserie' in impfungen: - raw_first_vaccinations = impfungen['Begonnene Impfserie'] + stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$') + m = stand_regex.match(stand) + stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M') + print_stand = stand_date.isoformat() -if 'Zweitimpfung' in impfungen: - raw_second_vaccinations = impfungen['Zweitimpfung'] -elif 'Vollständig geimpft' in impfungen: - raw_second_vaccinations = impfungen['Vollständig geimpft'] + return dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand -data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations) -data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations) - -# Stand aus Daten auslesen -#stand = dates.iloc[-1] -#print_stand = stand.isoformat() - -# Stand aus offiziellen Angaben auslesen -stand = rki_file['Erläuterung'].iloc[1][0] - -stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$') -m = stand_regex.match(stand) -stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M') -print_stand = stand_date.isoformat() +dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand = parse_rki(filename=data_filename) filename_stand = stand_date.strftime("%Y%m%d%H%M%S")