From 9fe35fcab06f2a41b373ed59adcf83648e9542e8 Mon Sep 17 00:00:00 2001 From: Axel Nilsson Date: Sun, 25 Feb 2024 02:35:24 +0100 Subject: [PATCH] WIP: Converting simulator to numpy from torch for increased performance. --- backtest.py | 8 +++++--- src/abacus/models/garch.py | 28 ++++++++++++++++++---------- src/abacus/optimizer/optimizer.py | 2 +- src/abacus/simulator/simulator.py | 24 ++++++++++++------------ 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/backtest.py b/backtest.py index 48d53de..07d51a6 100644 --- a/backtest.py +++ b/backtest.py @@ -29,22 +29,24 @@ # Date range for backtesting. start_date = "2020-01-02" -end_date = "2020-05-02" # "2023-05-31" +end_date = "2020-10-02" # "2023-05-31" date_range = pd.date_range(start=start_date, end=end_date, freq='B') solutions = {"2020-01-01": inital_weights} times = {} for date in date_range: - t1 = time.time() universe.date_today = date simulator = Simulator(universe) + t0 = time.time() simulator.calibrate() + print("Calibration", time.time() - t0) + t1 = time.time() simulator.run_simulation(time_steps=10, number_of_simulations=1000) + print("Simulation", time.time() - t1) optimizer = MPCMaximumReturn(universe, portfolio, simulator.return_tensor, gamma=2, l1_penalty=0, l2_penalty=0.02, covariance_matrix=simulator.covariance_matrix) optimizer.solve() solution = optimizer.solution - times[date] = time.time() - t1 portfolio.weights = solution solutions[date] = solution diff --git a/src/abacus/models/garch.py b/src/abacus/models/garch.py index 0eb5a3b..f7f313f 100644 --- a/src/abacus/models/garch.py +++ b/src/abacus/models/garch.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- import logging +import time import torch import numpy as np + +from scipy import stats from cytoolz import memoize from scipy.optimize import minimize from torch.distributions import Normal @@ -58,20 +61,19 @@ def calibrate(self): def transform_to_true(self, uniform_sample: torch.Tensor) -> torch.Tensor: self._check_calibration() - number_of_samples = len(uniform_sample) - normals = Normal(0,1).icdf(uniform_sample) - simulated_values = torch.zeros(number_of_samples) - parameters = torch.tensor(self._solution.x) + normals = stats.norm.ppf(uniform_sample) + simulated_values = np.empty(shape=number_of_samples) + parameters = self._solution.x mu_corr, mu_ewma = self._intermediary_parameters(parameters=parameters) - variance = self._compute_variance(parameters=torch.tensor(self._solution.x))[-1] + variance = self._compute_variance(parameters=self._solution.x)[-1] squared_return = self._squared_returns[-1] for i in range(number_of_samples): variance = self._update_variance(variance, squared_return, mu_corr, mu_ewma) - return_ = torch.sqrt(variance) * normals[i] - squared_return = torch.square(return_) + return_ = np.sqrt(variance) * normals[i] + squared_return = np.square(return_) simulated_values[i] = return_ return simulated_values @@ -130,7 +132,8 @@ def _compute_inital_variance(self) -> torch.Tensor: return torch.square(torch.std(self._data[:INITIAL_VARIANCE_GARCH_OBSERVATIONS])) return self._initial_squared_returns - def _update_variance(self, variance: torch.Tensor, squared_return: torch.Tensor, mu_corr, mu_ewma): + def _update_variance(self, variance: torch.Tensor | float, squared_return: torch.Tensor | float, + mu_corr: torch.Tensor | float , mu_ewma: torch.Tensor | float): return self._long_run_variance + mu_corr * (mu_ewma * variance + (1 - mu_ewma) * squared_return - self._long_run_variance) def _sanity_check(self): @@ -164,7 +167,7 @@ def _compute_variance(self, parameters: torch.Tensor) -> torch.Tensor: return variances @staticmethod - def _intermediary_parameters(parameters: torch.Tensor): + def _intermediary_parameters(parameters: torch.Tensor | np.ndarray): """Computes mu_corr and mu_ewma from z_corr and z_ewma. Args: @@ -173,7 +176,12 @@ def _intermediary_parameters(parameters: torch.Tensor): Returns: _type_: _description_ """ - mu = torch.exp(-torch.exp(-parameters)) + if isinstance(parameters, torch.Tensor): + mu = torch.exp(-torch.exp(-parameters)) + + if isinstance(parameters, np.ndarray): + mu = np.exp(-np.exp(-parameters)) + return mu[0], mu[1] @staticmethod diff --git a/src/abacus/optimizer/optimizer.py b/src/abacus/optimizer/optimizer.py index 298c1e8..1a32b65 100644 --- a/src/abacus/optimizer/optimizer.py +++ b/src/abacus/optimizer/optimizer.py @@ -153,7 +153,7 @@ def solution(self): @property def _return_expectation_tensor(self): - return torch.mean(self._simulation_tensor, dim=2) + return np.mean(self._simulation_tensor, axis=2) @property def _assets(self): diff --git a/src/abacus/simulator/simulator.py b/src/abacus/simulator/simulator.py index d3eadfa..9bde128 100644 --- a/src/abacus/simulator/simulator.py +++ b/src/abacus/simulator/simulator.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import time -import torch import numpy as np import pyvinecopulib as pv @@ -27,7 +26,7 @@ def __init__(self, universe: Universe): self._price_tensor = None @property - def covariance_matrix(self, data_type: DataTypes=DataTypes.LOG_RETURNS) -> torch.Tensor: + def covariance_matrix(self, data_type: DataTypes=DataTypes.LOG_RETURNS) -> np.ndarray: # TODO: Add explanation of stacking here. # TODO: Ensure working under odd data length inputs. # TODO: Add input for price through enum. @@ -40,24 +39,24 @@ def covariance_matrix(self, data_type: DataTypes=DataTypes.LOG_RETURNS) -> torch instrument_data = [instrument.art_returns_tensor for instrument in self._instruments] else: raise NotImplementedError(f"Data type {data_type} not supported to build covariance matrix.") - return torch.cov(torch.stack(instrument_data)) + return np.cov(np.stack(instrument_data)) @property - def return_tensor(self) -> torch.Tensor: + def return_tensor(self) -> np.ndarray: self._check_calibration() return self._return_tensor @property - def price_tensor(self) -> torch.Tensor: + def price_tensor(self) -> np.ndarray: self._check_calibration() if self._price_tensor is None: return_tensor = self.return_tensor inital_prices = self._inital_prices - self._price_tensor = inital_prices * torch.exp(torch.cumsum(return_tensor, dim=1)) + self._price_tensor = inital_prices * np.exp(np.cumsum(return_tensor, dim=1)) return self._price_tensor @property - def _uniform_samples(self) -> np.array: + def _uniform_samples(self) -> np.ndarray: # TODO: Compute size of array and fill it vectorized. Requires a consistent number of samples accessible. samples = [] for instrument in self._instruments: @@ -70,9 +69,9 @@ def _number_of_instruments(self) -> int: return len(self._instruments) @property - def _inital_prices(self) -> torch.Tensor: + def _inital_prices(self) -> np.ndarray: size = (self._number_of_instruments, ) - intial_prices = torch.empty(size=size) + intial_prices = np.empty(size=size) for i, instrument in enumerate(self._instruments): intial_prices[i] = instrument.initial_price return intial_prices[:, None, None] @@ -82,17 +81,18 @@ def calibrate(self): self._calibrate_copula() self._calibrated = True - def run_simulation(self, time_steps: int, number_of_simulations: int) -> torch.Tensor: + def run_simulation(self, time_steps: int, number_of_simulations: int) -> np.ndarray: assert isinstance(time_steps, int) and time_steps > 0, "Time step must be a positive integer." assert isinstance(number_of_simulations, int) and number_of_simulations > 0, "Number of simulations must be a positive integer." number_of_instruments = self._number_of_instruments size = (number_of_instruments, time_steps, number_of_simulations) - simulation_tensor = torch.empty(size=size) + simulation_tensor = np.empty(shape=size) for n in range(number_of_simulations): simulations = self._coupla.simulate(time_steps).T for i, simulation in enumerate(simulations): - simulation_tensor[i,:,n] = self._instruments[i].model.transform_to_true(torch.tensor(simulation)) + simulation_tensor[i,:,n] = self._instruments[i].model.transform_to_true(simulation) + self._return_tensor = simulation_tensor def _calibrate_instruments(self):