fix: Refactored parts of the script into functions
This commit is contained in:
parent
e13e8f1df6
commit
0e0ad8bc0f
1 changed files with 118 additions and 109 deletions
227
plot.py
227
plot.py
|
@ -52,149 +52,158 @@ plt.rcParams["figure.figsize"] = [11.69, 8.27]
|
|||
|
||||
# Download
|
||||
|
||||
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now)
|
||||
def download_rki(filename_prefix):
|
||||
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_prefix)
|
||||
|
||||
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
|
||||
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
|
||||
|
||||
with open(data_filename, 'wb') as outfile:
|
||||
outfile.write(r.content)
|
||||
with open(data_filename, 'wb') as outfile:
|
||||
outfile.write(r.content)
|
||||
|
||||
#data_filename = 'data/20210118151908_Impfquotenmonitoring.xlsx'
|
||||
return data_filename
|
||||
|
||||
rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl')
|
||||
data_filename = download_rki(filename_now)
|
||||
|
||||
raw_data = rki_file['Impfungen_proTag']
|
||||
def parse_rki(filename):
|
||||
|
||||
impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0)
|
||||
rki_file = pd.read_excel(filename, sheet_name=None, engine='openpyxl')
|
||||
|
||||
impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row
|
||||
raw_data = rki_file['Impfungen_proTag']
|
||||
|
||||
dates = impfungen['Datum']
|
||||
impfungen = raw_data[:-1].dropna(subset=['Datum']).fillna(0)
|
||||
|
||||
start_of_reporting_date = dates.iloc[0].date()
|
||||
impfungen.drop(impfungen.tail(1).index,inplace=True) # remove Gesamt row
|
||||
|
||||
def calculate_vaccination_data(data):
|
||||
dates = impfungen['Datum']
|
||||
|
||||
total = int(np.sum(data))
|
||||
total_percentage = float(total) / einwohner_deutschland * 100
|
||||
start_of_reporting_date = dates.iloc[0].date()
|
||||
|
||||
to_be_vaccinated = einwohner_deutschland - total
|
||||
def calculate_vaccination_data(data):
|
||||
|
||||
last_date = dates.iloc[-1].date()
|
||||
start_of_vaccination_index = (data != 0).argmax(axis=0)
|
||||
start_of_vaccination_date = dates[start_of_vaccination_index].date()
|
||||
days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days
|
||||
days_since_start_of_reporting = (last_date - start_of_reporting_date).days
|
||||
total = int(np.sum(data))
|
||||
total_percentage = float(total) / einwohner_deutschland * 100
|
||||
|
||||
valid_data = data[start_of_vaccination_index:]
|
||||
to_be_vaccinated = einwohner_deutschland - total
|
||||
|
||||
cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data)))
|
||||
last_date = dates.iloc[-1].date()
|
||||
start_of_vaccination_index = (data != 0).argmax(axis=0)
|
||||
start_of_vaccination_date = dates[start_of_vaccination_index].date()
|
||||
days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days
|
||||
days_since_start_of_reporting = (last_date - start_of_reporting_date).days
|
||||
|
||||
mean_all_time = np.mean(valid_data)
|
||||
mean_seven_days = np.mean(data[-7:])
|
||||
valid_data = data[start_of_vaccination_index:]
|
||||
|
||||
vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data))
|
||||
cumulative = np.concatenate(([math.nan] * (days_since_start_of_reporting - days_since_start_of_vaccination), np.cumsum(valid_data)))
|
||||
|
||||
vaccinations_by_week = {}
|
||||
mean_all_time = np.mean(valid_data)
|
||||
mean_seven_days = np.mean(data[-7:])
|
||||
|
||||
for w, v in vaccinations_by_week_map:
|
||||
if w in vaccinations_by_week:
|
||||
vaccinations_by_week[w] = vaccinations_by_week[w] + v
|
||||
else:
|
||||
vaccinations_by_week[w] = v
|
||||
vaccinations_by_week_map = map(lambda x: (Week.withdate(x[0]), x[1]), zip(dates, data))
|
||||
|
||||
def extrapolate(rate, to_be_vaccinated):
|
||||
days_extrapolated = int(np.ceil(to_be_vaccinated / rate))
|
||||
days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate))
|
||||
vaccinations_by_week = {}
|
||||
|
||||
weeks_extrapolated = int(np.ceil(days_extrapolated / 7))
|
||||
weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7))
|
||||
for w, v in vaccinations_by_week_map:
|
||||
if w in vaccinations_by_week:
|
||||
vaccinations_by_week[w] = vaccinations_by_week[w] + v
|
||||
else:
|
||||
vaccinations_by_week[w] = v
|
||||
|
||||
date_done = today + datetime.timedelta(days_extrapolated)
|
||||
date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity)
|
||||
def extrapolate(rate, to_be_vaccinated):
|
||||
days_extrapolated = int(np.ceil(to_be_vaccinated / rate))
|
||||
days_extrapolated_herd_immunity = int(np.ceil((einwohner_deutschland * herd_immunity - total) / rate))
|
||||
|
||||
extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting)
|
||||
weeks_extrapolated = int(np.ceil(days_extrapolated / 7))
|
||||
weeks_extrapolated_herd_immunity = int(np.ceil(days_extrapolated_herd_immunity / 7))
|
||||
|
||||
date_done = today + datetime.timedelta(days_extrapolated)
|
||||
date_herd_immunity = today + datetime.timedelta(days_extrapolated_herd_immunity)
|
||||
|
||||
extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting)
|
||||
|
||||
return {
|
||||
'rate': rate,
|
||||
'rate_int': int(np.round(rate)),
|
||||
'days_extrapolated': days_extrapolated,
|
||||
'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity,
|
||||
'weeks_extrapolated': weeks_extrapolated,
|
||||
'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity,
|
||||
'date_done': date_done,
|
||||
'date_done_str': date_done.strftime('%d. %B %Y'),
|
||||
'date_herd_immunity': date_herd_immunity,
|
||||
'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'),
|
||||
'extrapolated_vaccinations': extrapolated_vaccinations
|
||||
}
|
||||
|
||||
|
||||
extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated)
|
||||
extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated)
|
||||
extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated)
|
||||
|
||||
mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1))
|
||||
vaccination_rates_daily_rolling_average = data.rolling(7).mean()
|
||||
|
||||
vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total
|
||||
vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target
|
||||
vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100
|
||||
|
||||
return {
|
||||
'rate': rate,
|
||||
'rate_int': int(np.round(rate)),
|
||||
'days_extrapolated': days_extrapolated,
|
||||
'days_extrapolated_herd_immunity': days_extrapolated_herd_immunity,
|
||||
'weeks_extrapolated': weeks_extrapolated,
|
||||
'weeks_extrapolated_herd_immunity': weeks_extrapolated_herd_immunity,
|
||||
'date_done': date_done,
|
||||
'date_done_str': date_done.strftime('%d. %B %Y'),
|
||||
'date_herd_immunity': date_herd_immunity,
|
||||
'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'),
|
||||
'extrapolated_vaccinations': extrapolated_vaccinations
|
||||
'daily': data,
|
||||
'cumulative': cumulative,
|
||||
'total': total,
|
||||
'total_percentage': total_percentage,
|
||||
'to_be_vaccinated': to_be_vaccinated,
|
||||
'last_date': last_date,
|
||||
'last_date_str': last_date.strftime('%d. %B %Y'),
|
||||
'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index
|
||||
'start_of_vaccination_date': start_of_vaccination_date,
|
||||
'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'),
|
||||
'vaccinations_by_week': vaccinations_by_week,
|
||||
'extrapolation_mean_all_time': extrapolation_mean_all_time,
|
||||
'extrapolation_last_rate': extrapolation_last_rate,
|
||||
'extrapolation_mean_seven_days': extrapolation_mean_seven_days,
|
||||
'mean_vaccination_rates_daily': mean_vaccination_rates_daily,
|
||||
'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average,
|
||||
'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)),
|
||||
'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)),
|
||||
'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage,
|
||||
'vaccinations_last_day': data.iloc[-1],
|
||||
'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100,
|
||||
'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100,
|
||||
'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1],
|
||||
'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100,
|
||||
'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100
|
||||
}
|
||||
|
||||
if 'Erstimpfung' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Erstimpfung']
|
||||
elif 'Einmal geimpft' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Einmal geimpft']
|
||||
elif 'Begonnene Impfserie' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Begonnene Impfserie']
|
||||
|
||||
extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated)
|
||||
extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated)
|
||||
extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated)
|
||||
if 'Zweitimpfung' in impfungen:
|
||||
raw_second_vaccinations = impfungen['Zweitimpfung']
|
||||
elif 'Vollständig geimpft' in impfungen:
|
||||
raw_second_vaccinations = impfungen['Vollständig geimpft']
|
||||
|
||||
mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1))
|
||||
vaccination_rates_daily_rolling_average = data.rolling(7).mean()
|
||||
data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations)
|
||||
data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations)
|
||||
|
||||
vaccinations_missing_until_target = einwohner_deutschland * herd_immunity - total
|
||||
vaccination_rate_needed_for_target = vaccinations_missing_until_target / days_until_target
|
||||
vaccination_rate_needed_for_target_percentage = mean_all_time / vaccination_rate_needed_for_target * 100
|
||||
# Stand aus Daten auslesen
|
||||
#stand = dates.iloc[-1]
|
||||
#print_stand = stand.isoformat()
|
||||
|
||||
return {
|
||||
'daily': data,
|
||||
'cumulative': cumulative,
|
||||
'total': total,
|
||||
'total_percentage': total_percentage,
|
||||
'to_be_vaccinated': to_be_vaccinated,
|
||||
'last_date': last_date,
|
||||
'last_date_str': last_date.strftime('%d. %B %Y'),
|
||||
'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index
|
||||
'start_of_vaccination_date': start_of_vaccination_date,
|
||||
'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'),
|
||||
'vaccinations_by_week': vaccinations_by_week,
|
||||
'extrapolation_mean_all_time': extrapolation_mean_all_time,
|
||||
'extrapolation_last_rate': extrapolation_last_rate,
|
||||
'extrapolation_mean_seven_days': extrapolation_mean_seven_days,
|
||||
'mean_vaccination_rates_daily': mean_vaccination_rates_daily,
|
||||
'vaccination_rates_daily_rolling_average': vaccination_rates_daily_rolling_average,
|
||||
'vaccinations_missing_until_target': int(np.floor(vaccinations_missing_until_target)),
|
||||
'vaccination_rate_needed_for_target': int(np.floor(vaccination_rate_needed_for_target)),
|
||||
'vaccination_rate_needed_for_target_percentage': vaccination_rate_needed_for_target_percentage,
|
||||
'vaccinations_last_day': data.iloc[-1],
|
||||
'vaccinations_last_day_percentage': data.iloc[-1] / einwohner_deutschland * 100,
|
||||
'vaccinations_last_day_vaccination_percentage': data.iloc[-1] / total * 100,
|
||||
'vaccinations_last_week': vaccinations_by_week[Week.thisweek() - 1],
|
||||
'vaccinations_last_week_percentage': vaccinations_by_week[Week.thisweek() - 1] / einwohner_deutschland * 100,
|
||||
'vaccinations_last_week_vaccination_percentage': vaccinations_by_week[Week.thisweek() - 1] / total * 100
|
||||
}
|
||||
# Stand aus offiziellen Angaben auslesen
|
||||
stand = rki_file['Erläuterung'].iloc[1][0]
|
||||
|
||||
if 'Erstimpfung' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Erstimpfung']
|
||||
elif 'Einmal geimpft' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Einmal geimpft']
|
||||
elif 'Begonnene Impfserie' in impfungen:
|
||||
raw_first_vaccinations = impfungen['Begonnene Impfserie']
|
||||
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$')
|
||||
m = stand_regex.match(stand)
|
||||
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
|
||||
print_stand = stand_date.isoformat()
|
||||
|
||||
if 'Zweitimpfung' in impfungen:
|
||||
raw_second_vaccinations = impfungen['Zweitimpfung']
|
||||
elif 'Vollständig geimpft' in impfungen:
|
||||
raw_second_vaccinations = impfungen['Vollständig geimpft']
|
||||
return dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand
|
||||
|
||||
data_first_vaccination = calculate_vaccination_data(raw_first_vaccinations)
|
||||
data_second_vaccination = calculate_vaccination_data(raw_second_vaccinations)
|
||||
|
||||
# Stand aus Daten auslesen
|
||||
#stand = dates.iloc[-1]
|
||||
#print_stand = stand.isoformat()
|
||||
|
||||
# Stand aus offiziellen Angaben auslesen
|
||||
stand = rki_file['Erläuterung'].iloc[1][0]
|
||||
|
||||
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d?\d:\d\d) Uhr$')
|
||||
m = stand_regex.match(stand)
|
||||
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
|
||||
print_stand = stand_date.isoformat()
|
||||
dates, start_of_reporting_date, data_first_vaccination, data_second_vaccination, stand_date, print_stand = parse_rki(filename=data_filename)
|
||||
|
||||
filename_stand = stand_date.strftime("%Y%m%d%H%M%S")
|
||||
|
||||
|
|
Loading…
Reference in a new issue