Create solver2.py

author: Ta180m 2020-04-29 14:56:39 -0500
committer: GitHub 2020-04-29 14:56:39 -0500
commit: 9835124f9d8036bdccf540b62b11ef28ca6b6ea6 (patch)
tree: 57f97e28288183ab71b8416e54bf3e11f49e9a85 /solver2.py
parent: fc580716cb59d26de8a36184c0290951c59df820 (diff)
1 files changed, 296 insertions, 0 deletions
diff --git a/solver2.py b/solver2.py
new file mode 100644
index 0000000..84924ca
--- /dev/null
+++ b/solver2.py
@@ -0,0 +1,296 @@
+from datetime import datetime, timedelta
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from scipy.integrate import solve_ivp
+from scipy.optimize import minimize
+import argparse
+import os
+
+# TODO - Parse arguments for different model options
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--mode', '-m', dest = 'mode', help = 'change the mode of the model (SIR, Linear, ESIR, SEIR); default: SIR', default = 'SIR', choices = ['SIR', 'Linear', 'ESIR', 'SEIR'])
+parser.add_argument('--data', '-d', dest = 'include_data', help = 'change the type of data to present in the graph (Actual, S, I, R, E); default: Actual S I R', nargs = '*', default = ['Actual', 'S', 'I', 'R'], choices = ['Actual', 'S', 'I', 'R', 'E'])
+parser.add_argument('--folder', '-f', dest = 'folder', default = 'data', help = 'the folder in which to find the data files; defaults to looking in the data folder')
+parser.add_argument('--disease', '-D', dest = 'disease', default = 'COVID-19', help = 'the disease to model; defaults to COVID-19')
+parser.add_argument('--out', '-o', dest = 'out', default = None, help = 'the name of the graph and csv files; defaults to the name of the disease')
+parser.add_argument('--start', '-s', dest = 'start', default = '1/22/20', help = 'the date where the data starts (defaults to the start date of COVID-19 (1/22/20))')
+parser.add_argument('--end', '-e', dest = 'end', default = None, help = 'the date where the data stops (defaults to whereever the input data ends)')
+parser.add_argument('--incubation', '-i', dest = 'incubation_period', default = None, help = 'the incubation period of the disease (only applicable if using SIRE model; ignored otherwise); none by default')
+parser.add_argument('--predict', '-p', dest = 'prediction_range', default = None, help = 'the number of days to predict the course of the disease (defaults to None, meaning the model will not predict beyond the given data)')
+args = parser.parse_args()
+
+S_0 = 13500/13501
+I_0 = 1/13501
+R_0 = 0 # Both are equal to 0/13501
+E_0 = 0 # Both are equal to 0/13501
+
+# Running a model for 3.27  million population is quite hard, so here we've reduced the population to 13.5 thousand people, and modified
+# the actual stats to match
+correction_factor = 13502/3270000 if args.disease == 'COVID-19' else 1
+
+class Learner(object):
+	def __init__(self, country):
+		self.country = country
+
+	def load_confirmed(self, country):
+		"""
+			Load confirmed cases
+		"""
+		df = pd.read_csv(f'{args.folder}/{args.disease}-Confirmed.csv')
+		country_df = df[df['Country/Region'] == country]
+
+		if args.end != None:
+			confirmed_sums = np.sum([reg.loc[args.start:args.end].values for reg in country_df.iloc], axis = 0)
+		else:
+			confirmed_sums = np.sum([reg.loc[args.start:].values for reg in country_df.iloc], axis = 0)
+		
+		if args.end != None:
+			new_data = pd.DataFrame(confirmed_sums, country_df.iloc[0].loc[args.start:args.end].index.tolist())
+		else:
+			new_data = pd.DataFrame(confirmed_sums, country_df.iloc[0].loc[args.start:].index.tolist())
+		
+		return new_data
+
+
+	def load_recovered(self, country):
+		"""
+			Load recovered cases
+		"""
+		df = pd.read_csv(f'{args.folder}/{args.disease}-Recovered.csv')
+		country_df = df[df['Country/Region'] == country]
+
+		if args.end != None:
+			out = country_df.iloc[0].loc[args.start:args.end]
+		else:
+			out = country_df.iloc[0].loc[args.start:]
+		
+		return out
+
+	def load_exposed(self, country):
+		"""
+			Load data for exposed persons 
+		"""
+		df = pd.read_csv(f'{args.folder}/{args.disease}-Exposed.csv')
+		country_df = df[df['Country/Region'] == country]
+
+		if args.end != None:
+			out = country_df.iloc[0].loc[args.start:args.end]
+		else:
+			out = country_df.iloc[0].loc[args.start:]
+		
+		return out
+
+
+	def extend_index(self, index, new_size):
+		values = index.values
+		current = datetime.strptime(index[-1], '%m/%d/%y')
+		while len(values) < new_size:
+			current = current + timedelta(days=1)
+			values = np.append(values, datetime.strftime(current, '%m/%d/%y'))
+		return values
+
+	def predict(self, data, beta = None, gamma = None, mu = None, sigma = None):
+		"""
+			Predict how the number of people in each compartment can be changed through time toward the future.
+			The model is formulated with the given beta and gamma (or others).
+			Returns the "solved" system of initial value problems, to be "graded" by the loss function
+		"""
+		new_index = self.extend_index(data.index, args.prediction_range if args.prediction_range != None else len(data.index))
+		size = len(new_index)
+		def model(t, y):
+			S = y[0]
+			I = y[1]
+			R = y[2]
+
+			if args.mode == 'SEIR':
+				E = y[3]
+
+			if args.mode == 'Linear':
+				return [-beta * S, beta * S - gamma * I, gamma * I]
+			elif args.mode == 'SIR':
+				return [-beta * S * I, beta * S * I - gamma * I, gamma * I]
+			elif args.mode == 'ESIR':
+				if mu != None:
+					return [mu - beta * S * I - mu * S, beta * S * I - gamma * I - mu * I, gamma * I - mu * R]
+				else:
+					raise Exception('Expected mu for ESIR model')
+			elif args.mode == 'SEIR':
+				if mu != None and sigma != None:
+					return [mu - beta * S * I - mu * S, beta * S * I - sigma * E - mu * E, sigma * E - gamma * I - mu * I, gamma * I - mu * R]
+				elif mu == None:
+					raise Exception('Expected mu for SEIR model')
+				elif sigma == None:
+					raise Exception('Expected sigma for SEIR model')
+		
+		extended_actual = np.concatenate((data.values.flatten(), [0] * (size - len(data.values))))
+
+		if args.mode == 'SEIR':
+			result = solve_ivp(model, [0, size], [S_0,I_0,R_0, E_0], t_eval=np.arange(0, size, 1))
+		else:
+			result = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True)
+
+		return new_index, extended_actual, result
+
+	def train(self):
+		"""
+			Run the optimization to estimate the beta and gamma fitting the given confirmed cases.
+		"""
+		confirmed_data = self.load_confirmed(self.country)
+		recovered_data = self.load_recovered(self.country)
+
+		if not os.path.isdir('out'):
+			os.mkdir('out')
+
+		if args.mode == 'Linear':
+			optimal = minimize(
+				loss_linear,
+				[0.001, 0.001],
+				args=(confirmed_data, recovered_data),
+				method='L-BFGS-B',
+				bounds=[(0.00000001, 0.4), (0.00000001, 0.4)]
+			)
+			beta, gamma = optimal.x
+			print(f'Beta: {beta}, Gamma: {gamma}, R0: {beta/gamma}')
+			new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma)
+			print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}')
+			df = compose_df(prediction, extended_actual, correction_factor, new_index)
+			with open(f'out/{args.disease}-data.csv', 'w+') as file:
+				file.write(f'Beta: {beta}\nGamma: {gamma}\nR0: {beta/gamma}')
+		elif args.mode == 'SIR':
+			optimal = minimize(
+				loss_sir,
+				[0.001, 0.001],
+				args=(confirmed_data, recovered_data),
+				method='L-BFGS-B',
+				bounds=[(0.00000001, 0.4), (0.00000001, 0.4)]
+			)
+			beta, gamma = optimal.x
+			print(f'Beta: {beta}, Gamma: {gamma}, R0: {beta/gamma}')
+			new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma)
+			print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}')
+			df = compose_df(prediction, extended_actual, correction_factor, new_index)
+			with open(f'out/{args.disease}-data.csv', 'w+') as file:
+				file.write(f'Beta: {beta}\nGamma: {gamma}\nR0: {beta/gamma}')
+		elif args.mode == 'ESIR':
+			optimal = minimize(
+				loss_esir,
+				[0.001, 0.001, 0.001],
+				args=(confirmed_data, recovered_data),
+				method='L-BFGS-B',
+				bounds=[(0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4)]
+			)
+			beta, gamma, mu = optimal.x
+			print(f'Beta: {beta}, Gamma: {gamma}, Mu: {mu} R0: {beta/(gamma + mu)}')
+			new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma, mu = mu)
+			print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}')
+			df = compose_df(prediction, extended_actual, correction_factor, new_index)
+			with open(f'out/{args.disease}-data.csv', 'w+') as file:
+				file.write(f'Beta: {beta}\nGamma: {gamma}\nMu: {mu}\nR0: {beta/(gamma + mu)}')
+		elif args.mode == 'SEIR':
+			exposed_data = self.load_exposed(self.country)
+
+			optimal = minimize(
+				loss_seir,
+				[0.001, 0.001],
+				args=(confirmed_data, recovered_data, exposed_data),
+				method='L-BFGS-B',
+				bounds=[(0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4), (0.00000001, 0.4)]
+			)
+			beta, gamma, mu, sigma = optimal.x
+			print(f'Beta: {beta}, Gamma: {gamma}, Mu: {mu}, Sigma: {sigma} R0: {(beta * sigma)/((mu + gamma) * (mu + sigma))}')
+			new_index, extended_actual, prediction = self.predict(confirmed_data, beta = beta, gamma = gamma, mu = mu)
+			print(f'Predicted I: {prediction.y[1][-1] * 13500}, Actual I: {extended_actual[-1] * correction_factor}')
+			df = compose_df(prediction, extended_actual, correction_factor, new_index)
+			with open(f'out/{args.disease}-data.csv', 'w+') as file:
+				file.write(f'Beta: {beta}\nGamma: {gamma}\nMu: {mu}\nSigma: {sigma}\nR0: {(beta * sigma)/((mu + gamma) * (mu + sigma))}')
+		fig, ax = plt.subplots(figsize=(15, 10))
+		ax.set_title(f'{args.disease} cases over time ({args.mode} Model)')
+		df.plot(ax=ax)
+		fig.savefig(f"{args.out if args.out != None else args.disease}.png")
+		df.to_csv(f'out/{args.disease}-prediction.csv')
+
+def filter_zeroes(arr):
+	out = np.array(arr)
+	for index in range(len(out)):
+		if out[index] == 0:
+			out[index] = None
+	return out
+
+def compose_df(prediction, actual, correction_factor, index):
+	df_dict = {}
+
+	for data in args.include_data:
+		if data == 'Actual':
+			df_dict['Actual'] = filter_zeroes(actual * correction_factor)
+		elif data == 'S':
+			df_dict['S'] = prediction.y[0] * 13500
+		elif data == 'I':
+			df_dict['I'] = prediction.y[1] * 13500
+		elif data == 'R':
+			df_dict['R'] = prediction.y[2] * 13500
+		elif data == 'E':
+			df_dict['E'] = prediction.y[3] * 13500
+
+	return pd.DataFrame(df_dict, index=index)
+
+# Loss Functions - used to "train" the model
+
+def loss_linear(point, confirmed, recovered):
+	size = len(confirmed)
+	beta, gamma = point
+	def model(t, y):
+		S = y[0]
+		I = y[1]
+		R = y[2]
+		return [-beta * S, beta * S - gamma * I, gamma * I]
+	solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True)
+	sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2))
+	sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2))
+	return sol_inf * 0.5 + sol_rec * 0.5
+
+def loss_sir(point, confirmed, recovered):
+	size = len(confirmed)
+	beta, gamma = point
+	def model(t, y):
+		S = y[0]
+		I = y[1]
+		R = y[2]
+		return [-beta * S * I, beta * S * I - gamma * I, gamma * I]
+	solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True)
+	sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2))
+	sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2))
+	return sol_inf * 0.5 + sol_rec * 0.5
+
+def loss_esir(point, confirmed, recovered):
+	size = len(confirmed)
+	beta, gamma, mu = point
+	def model(t, y):
+		S = y[0]
+		I = y[1]
+		R = y[2]
+		return [mu - beta * S * I - mu * S, beta * S * I - gamma * I - mu * I, gamma * I - mu * R]
+	solution = solve_ivp(model, [0, size], [S_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True)
+	sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2))
+	sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2))
+	return sol_inf * 0.5 + sol_rec * 0.5
+
+def loss_seir(point, confirmed, recovered, exposed):
+	size = len(confirmed)
+	beta, gamma, mu, sigma = point
+	def model(t, y):
+		S = y[0]
+		I = y[1]
+		R = y[2]
+		E = y[3]
+		return [mu - beta * S * I - mu * S, beta * S * I - sigma * E - mu * E, sigma * E * I - gamma * I - mu * I, gamma * I - mu * R]
+	solution = solve_ivp(model, [0, size], [S_0,E_0,I_0,R_0], t_eval=np.arange(0, size, 1), vectorized=True)
+	sol_inf = np.sqrt(np.mean((solution.y[1] - (confirmed.values.flatten() * correction_factor/13500))**2))
+	sol_rec = np.sqrt(np.mean((solution.y[2] - (recovered.values * correction_factor/13500))**2))
+	sol_exp = np.sqrt(np.mean((solution.y[3] - (exposed.values * correction_factor/13500))**2))
+	return sol_inf/3 + sol_rec/3 + sol_exp/3
+
+my_learner = Learner('US')
+my_learner.train()
author	Ta180m	2020-04-29 14:56:39 -0500
committer	GitHub	2020-04-29 14:56:39 -0500
commit	9835124f9d8036bdccf540b62b11ef28ca6b6ea6 (patch)
tree	57f97e28288183ab71b8416e54bf3e11f49e9a85 /solver2.py
parent	fc580716cb59d26de8a36184c0290951c59df820 (diff)