diff options
author | Ta180m | 2020-04-29 14:56:39 -0500 |
---|---|---|
committer | GitHub | 2020-04-29 14:56:39 -0500 |
commit | 9835124f9d8036bdccf540b62b11ef28ca6b6ea6 (patch) | |
tree | 57f97e28288183ab71b8416e54bf3e11f49e9a85 | |
parent | fc580716cb59d26de8a36184c0290951c59df820 (diff) |
Create solver2.py
-rw-r--r-- | solver2.py | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/solver2.py b/solver2.py new file mode 100644 index 0000000..84924ca --- /dev/null +++ b/solver2.py @@ -0,0 +1,296 @@ +from datetime import datetime, timedelta + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scipy.integrate import solve_ivp +from scipy.optimize import minimize +import argparse +import os + +# TODO - Parse arguments for different model options +parser = argparse.ArgumentParser() + +parser.add_argument('--mode', '-m', dest = 'mode', help = 'change the mode of the model (SIR, Linear, ESIR, SEIR); default: SIR', default = 'SIR', choices = ['SIR', 'Linear', 'ESIR', 'SEIR']) +parser.add_argument('--data', '-d', dest = 'include_data', help = 'change the type of data to present in the graph (Actual, S, I, R, E); default: Actual S I R', nargs = '*', default = ['Actual', 'S', 'I', 'R'], choices = ['Actual', 'S', 'I', 'R', 'E']) +parser.add_argument('--folder', '-f', dest = 'folder', default = 'data', help = 'the folder in which to find the data files; defaults to looking in the data folder') +parser.add_argument('--disease', '-D', dest = 'disease', default = 'COVID-19', help = 'the disease to model; defaults to COVID-19') +parser.add_argument('--out', '-o', dest = 'out', default = None, help = 'the name of the graph and csv files; defaults to the name of the disease') +parser.add_argument('--start', '-s', dest = 'start', default = '1/22/20', help = 'the date where the data starts (defaults to the start date of COVID-19 (1/22/20))') +parser.add_argument('--end', '-e', dest = 'end', default = None, help = 'the date where the data stops (defaults to whereever the input data ends)') +parser.add_argument('--incubation', '-i', dest = 'incubation_period', default = None, help = 'the incubation period of the disease (only applicable if using SIRE model; ignored otherwise); none by default') +parser.add_argument('--predict', '-p', dest = 'prediction_range', default = None, help = 'the number of days to predict the course of the disease (defaults to None, meaning the model will not predict beyond the given data)') +args = parser.parse_args() + +S_0 = 13500/13501 +I_0 = 1/13501 +R_0 = 0 # Both are equal to 0/13501 +E_0 = 0 # Both are equal to 0/13501 + +# Running a model for 3.27 million population is quite hard, so here we've reduced the population to 13.5 thousand people, and modified +# the actual stats to match +correction_factor = 13502/3270000 if args.disease == 'COVID-19' else 1 + +class Learner(object): + def __init__(self, country): + self.country = country + + def load_confirmed(self, country): + """ + Load confirmed cases + """ + df = pd.read_csv(f'{args.folder}/{args.disease}-Confirmed.csv') + country_df = df[df['Country/Region'] == country] + + if args.end != None: + confirmed_sums = np.sum([reg.loc[args.start:args.end].values for reg in country_df.iloc], axis = 0) + else: + confirmed_sums = np.sum([reg.loc[args.start:].values for reg in country_df.iloc], axis = 0) + + if args.end != None: + new_data = pd.DataFrame(confirmed_sums, country_df.iloc[0].loc[args.start:args.end].index.tolist()) + else: + new_data = pd.DataFrame(confirmed_sums, country_df.iloc[0].loc[args.start:].index.tolist()) + + return new_data + + + def load_recovered(self, country): + """ + Load recovered cases + """ + df = pd.read_csv(f'{args.folder}/{args.disease}-Recovered.csv') + country_df = df[df['Country/Region'] == country] + + if args.end != None: + out = country_df.iloc[0].loc[args.start:args.end] + else: + out = country_df.iloc[0].loc[args.start:] + + return out + + def load_exposed(self, country): + """ + Load data for exposed persons + """ + df = pd.read_csv(f'{args.folder}/{args.disease}-Exposed.csv') + country_df = df[df['Country/Region'] == country] + + if args.end != None: + out = country_df.iloc[0].loc[args.start:args.end] + else: + out = country_df.iloc[0].loc[args.start:] + + return out + + + def extend_index(self, index, new_size): + values = index.values + current = datetime.strptime(index[-1], '%m/%d/%y') + while len(values) < new_size: + current = current + timedelta(days=1) + values = np.append(values, datetime.strftime(current, '%m/%d/%y')) + return values + + def predict(self, data, beta = None, gamma = None, mu = None, sigma = None): + """ + Predict how the number of people in each compartment can be changed through time toward the future. + The model is formulated with the given beta and gamma (or others). + Returns the "solved" system of initial value problems, to be "graded" by the loss function + """ + new_index = self.extend_index(data.index, args.prediction_range if args.prediction_range != None else len(data.index)) + size = len(new_index) + def model(t, y): + S = y[0] + I = y[1] + R = y[2] + + if args.mode == 'SEIR': + E = y[3] + + if args.mode == 'Linear': + return [-beta * S, beta * S - gamma * I, gamma * I] + elif args.mode == 'SIR': + return [-beta * S * I, beta * S * I - gamma * I, gamma * I] + elif args.mode == 'ESIR': + if mu != None: + return [mu - beta * S * I - mu * S, beta * S * I - gamma * I - mu * I, gamma * I - mu * R] + else: + raise Exception('Expected mu for ESIR model') + elif args.mode == 'SEIR': + if mu != None and sigma != None: + return [mu - beta * S * I - mu * S, beta * S * I - sigma * E - mu * E, sigma * E - gamma * I - mu * I, gamma * I - mu * R] + elif mu == None: + raise Exception('Expected mu for SEIR model') + elif sigma == None: + raise Exception('Expected sigma for SEIR model') + + extended_actual = np.concatenate((data.values.flatten(), [0] * (size - len(data.values)))) + + if args.mode == 'SEIR': + result = solve_ivp(model, [0, size], [S_0,I_0,R_0, E_0], t_eval=np.arange(0, size, 1)) + else: + result = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True) + + return new_index, extended_actual, result + + def train(self): + """ + Run the optimization to estimate the beta and gamma fitting the given confirmed cases. + """ + confirmed_data = self.load_confirmed(self.country) + recovered_data = self.load_recovered(self.country) + + if not os.path.isdir('out'): + os.mkdir('out') + + if args.mode == 'Linear': + optimal = minimize( + loss_linear, + [0.001, 0.001], + args=(confirmed_data, recovered_data), + method='L-BFGS-B', + bounds=[(0.00000001, 0.4), (0.00000001, 0.4)] + ) + beta, gamma = optimal.x + print(f'Beta: {beta}, Gamma: {gamma}, R0: {beta/gamma}') + new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma) + print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}') + df = compose_df(prediction, extended_actual, correction_factor, new_index) + with open(f'out/{args.disease}-data.csv', 'w+') as file: + file.write(f'Beta: {beta}\nGamma: {gamma}\nR0: {beta/gamma}') + elif args.mode == 'SIR': + optimal = minimize( + loss_sir, + [0.001, 0.001], + args=(confirmed_data, recovered_data), + method='L-BFGS-B', + bounds=[(0.00000001, 0.4), (0.00000001, 0.4)] + ) + beta, gamma = optimal.x + print(f'Beta: {beta}, Gamma: {gamma}, R0: {beta/gamma}') + new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma) + print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}') + df = compose_df(prediction, extended_actual, correction_factor, new_index) + with open(f'out/{args.disease}-data.csv', 'w+') as file: + file.write(f'Beta: {beta}\nGamma: {gamma}\nR0: {beta/gamma}') + elif args.mode == 'ESIR': + optimal = minimize( + loss_esir, + [0.001, 0.001, 0.001], + args=(confirmed_data, recovered_data), + method='L-BFGS-B', + bounds=[(0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4)] + ) + beta, gamma, mu = optimal.x + print(f'Beta: {beta}, Gamma: {gamma}, Mu: {mu} R0: {beta/(gamma + mu)}') + new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma, mu = mu) + print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}') + df = compose_df(prediction, extended_actual, correction_factor, new_index) + with open(f'out/{args.disease}-data.csv', 'w+') as file: + file.write(f'Beta: {beta}\nGamma: {gamma}\nMu: {mu}\nR0: {beta/(gamma + mu)}') + elif args.mode == 'SEIR': + exposed_data = self.load_exposed(self.country) + + optimal = minimize( + loss_seir, + [0.001, 0.001], + args=(confirmed_data, recovered_data, exposed_data), + method='L-BFGS-B', + bounds=[(0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4)] + ) + beta, gamma, mu, sigma = optimal.x + print(f'Beta: {beta}, Gamma: {gamma}, Mu: {mu}, Sigma: {sigma} R0: {(beta * sigma)/((mu + gamma) * (mu + sigma))}') + new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma, mu = mu) + print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}') + df = compose_df(prediction, extended_actual, correction_factor, new_index) + with open(f'out/{args.disease}-data.csv', 'w+') as file: + file.write(f'Beta: {beta}\nGamma: {gamma}\nMu: {mu}\nSigma: {sigma}\nR0: {(beta * sigma)/((mu + gamma) * (mu + sigma))}') + fig, ax = plt.subplots(figsize=(15, 10)) + ax.set_title(f'{args.disease} cases over time ({args.mode} Model)') + df.plot(ax=ax) + fig.savefig(f"{args.out if args.out != None else args.disease}.png") + df.to_csv(f'out/{args.disease}-prediction.csv') + +def filter_zeroes(arr): + out = np.array(arr) + for index in range(len(out)): + if out[index] == 0: + out[index] = None + return out + +def compose_df(prediction, actual, correction_factor, index): + df_dict = {} + + for data in args.include_data: + if data == 'Actual': + df_dict['Actual'] = filter_zeroes(actual * correction_factor) + elif data == 'S': + df_dict['S'] = prediction.y[0] * 13500 + elif data == 'I': + df_dict['I'] = prediction.y[1] * 13500 + elif data == 'R': + df_dict['R'] = prediction.y[2] * 13500 + elif data == 'E': + df_dict['E'] = prediction.y[3] * 13500 + + return pd.DataFrame(df_dict, index=index) + +# Loss Functions - used to "train" the model + +def loss_linear(point, confirmed, recovered): + size = len(confirmed) + beta, gamma = point + def model(t, y): + S = y[0] + I = y[1] + R = y[2] + return [-beta * S, beta * S - gamma * I, gamma * I] + solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True) + sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2)) + sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2)) + return sol_inf * 0.5 + sol_rec * 0.5 + +def loss_sir(point, confirmed, recovered): + size = len(confirmed) + beta, gamma = point + def model(t, y): + S = y[0] + I = y[1] + R = y[2] + return [-beta * S * I, beta * S * I - gamma * I, gamma * I] + solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True) + sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2)) + sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2)) + return sol_inf * 0.5 + sol_rec * 0.5 + +def loss_esir(point, confirmed, recovered): + size = len(confirmed) + beta, gamma, mu = point + def model(t, y): + S = y[0] + I = y[1] + R = y[2] + return [mu - beta * S * I - mu * S, beta * S * I - gamma * I - mu * I, gamma * I - mu * R] + solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True) + sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2)) + sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2)) + return sol_inf * 0.5 + sol_rec * 0.5 + +def loss_seir(point, confirmed, recovered, exposed): + size = len(confirmed) + beta, gamma, mu, sigma = point + def model(t, y): + S = y[0] + I = y[1] + R = y[2] + E = y[3] + return [mu - beta * S * I - mu * S, beta * S * I - sigma * E - mu * E, sigma * E * I - gamma * I - mu * I, gamma * I - mu * R] + solution = solve_ivp(model, [0, size], [S_0,E_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True) + sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2)) + sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2)) + sol_exp = np.sqrt(np.mean((solution.y[3] - (exposed.values * correction_factor/13500))**2)) + return sol_inf/3 + sol_rec/3 + sol_exp/3 + +my_learner = Learner('US') +my_learner.train() |