Skip to content

Commit

Permalink
gbrl version 1.0.2 - removed limitations of using only two optimizers
Browse files Browse the repository at this point in the history
  • Loading branch information
benja263 committed Jul 30, 2024
1 parent 8c85648 commit a090978
Show file tree
Hide file tree
Showing 27 changed files with 777 additions and 789 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
include(CMakeDependentOption)
option(USE_CUDA "Build with GPU acceleration" OFF)

if(DEFINED ENV{COVERAGE} AND $ENV{COVERAGE} STREQUAL "1")
if(DEFINED $ENV{COVERAGE} AND $ENV{COVERAGE} STREQUAL "1")
message(STATUS "Coverage build")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The following results, obtained using the `GBRL_SB3` repository, demonstrate the

## Getting started
### Prerequisites
- Python 3.7 or higher
- Python 3.9 or higher
- LLVM and OpenMP (macOS).

### Installation
Expand Down
8 changes: 4 additions & 4 deletions gbrl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
# https://nvlabs.github.io/gbrl/license.html
#
##############################################################################
__version__ = "1.0.1"
__version__ = "1.0.2"

from .ac_gbrl import (ActorCritic, GaussianActor, ContinuousCritic,
DiscreteCritic, ParametricActor)
from .gbt import GradientBoostingTrees
from .gbrl_cpp import GBRL
from .gbt import GBRL
from .gbrl_cpp import GBRL as GBRL_CPP

cuda_available = GBRL.cuda_available
cuda_available = GBRL_CPP.cuda_available

206 changes: 114 additions & 92 deletions gbrl/ac_gbrl.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gbrl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
#
##############################################################################
# shrinkage is not implemented yet
VALID_OPTIMIZER_ARGS = ['init_lr', 'scheduler', 'shrinkage', 'algo', 'beta_1', 'beta_2', 'eps', 'T']
VALID_OPTIMIZER_ARGS = ['init_lr', 'scheduler', 'shrinkage', 'algo', 'beta_1', 'beta_2', 'eps', 'T', 'start_idx', 'stop_idx']
APPROVED_OPTIMIZERS = ["Adam", "SGD"]
86 changes: 32 additions & 54 deletions gbrl/gbrl_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,11 @@
import numpy as np
import torch as th

# Define custom dtypes
numerical_dtype = np.dtype('float32')
categorical_dtype = np.dtype('S128')
from .gbrl_cpp import GBRL as GBRL_CPP
from .utils import get_input_dim, get_poly_vectors, process_array, to_numpy, numerical_dtype


from typing import Dict

from .gbrl_cpp import GBRL
from .utils import get_input_dim, get_poly_vectors

def process_array(arr: np.array)-> Tuple[np.array, np.array]:
""" Formats numpy array for C++ GBRL.
"""
if np.issubdtype(arr.dtype, np.floating) or np.issubdtype(arr.dtype, np.integer):
return np.ascontiguousarray(arr, dtype=numerical_dtype), None
else:
fixed_str = np.char.encode(arr.astype(str), 'utf-8').astype(categorical_dtype)
return None, np.ascontiguousarray(fixed_str)

def to_numpy(arr: Union[np.array, th.Tensor]) -> Union[np.array, np.array]:
def features_to_numpy(arr: Union[np.array, th.Tensor]) -> Tuple[np.array, np.array]:
if isinstance(arr, th.Tensor):
arr = arr.detach().cpu().numpy()
return np.ascontiguousarray(arr, dtype=numerical_dtype), None
Expand All @@ -59,7 +44,7 @@ def preprocess_features(arr: Union[np.array, th.Tensor]) -> Tuple[np.array, np.a
Tuple[np.array, np.array]
"""
input_dim = get_input_dim(arr)
num_arr, cat_arr = to_numpy(arr)
num_arr, cat_arr = features_to_numpy(arr)
if num_arr is not None and len(num_arr.shape) == 1:
if input_dim == 1:
num_arr = num_arr[np.newaxis, :]
Expand All @@ -78,21 +63,18 @@ def preprocess_features(arr: Union[np.array, th.Tensor]) -> Tuple[np.array, np.a


class GBTWrapper:
def __init__(self, output_dim: int, policy_dim: int, tree_struct: Dict, optimizer: Union[Dict, List], gbrl_params: Dict, verbose: int = 0, device: str = 'cpu'):
def __init__(self, output_dim: int, tree_struct: Dict, optimizer: Union[Dict, List], gbrl_params: Dict, verbose: int = 0, device: str = 'cpu'):
if 'T' in gbrl_params:
del gbrl_params['T']
self.params = {'output_dim': output_dim, 'policy_dim': policy_dim,
self.params = {'output_dim': output_dim,
'split_score_func': gbrl_params.get('split_score_func', 'Cosine'),
'generator_type': gbrl_params.get('generator_type', 'Quantile'),
'use_control_variates': gbrl_params.get('control_variates', False),
'verbose': verbose, 'device': device, **tree_struct}
self.tree_struct = tree_struct
self.output_dim = output_dim
self.policy_dim = policy_dim

self.device = device
self.optimizer = optimizer if isinstance(optimizer, list) else [optimizer]
self.optimizer = [opt for opt in self.optimizer if opt is not None]
self.optimizer = optimizer if isinstance(optimizer, list) or optimizer is None else [optimizer]
self.student_model = None
self.cpp_model = None
self.iteration = 0
Expand All @@ -101,22 +83,21 @@ def __init__(self, output_dim: int, policy_dim: int, tree_struct: Dict, optimize
self.verbose = verbose
feature_weights = gbrl_params.get('feature_weights', None)
if feature_weights is not None:
feature_weights, _ = to_numpy(feature_weights)
feature_weights = to_numpy(feature_weights)
feature_weights = feature_weights.flatten()
assert np.all(feature_weights >= 0), "feature weights contains non-positive values"
self.feature_weights = feature_weights

def reset(self) -> None:
if self.cpp_model is not None:
policy_lr, value_lr = self.cpp_model.get_scheduler_lrs()
self.optimizer[0]['init_lr'] = policy_lr
if len(self.optimizer) > 1:
self.optimizer[1]['init_lr'] = value_lr
self.cpp_model = GBRL(**self.params)
lrs = self.cpp_model.get_scheduler_lrs()
for i in range(len(self.optimizer)):
self.optimizer[i]['init_lr'] = lrs[i]

self.cpp_model = GBRL_CPP(**self.params)
if self.student_model is not None:
self.optimizer[0]['T'] -= self.total_iterations
if len(self.optimizer) > 1:
self.optimizer[1]['T'] -= self.total_iterations
for i in range(len(self.optimizer)):
self.optimizer[i]['T'] -= self.total_iterations
else:
self.total_iterations = 0
try:
Expand All @@ -126,14 +107,14 @@ def reset(self) -> None:
print(f"Caught an exception in GBRL: {e}")
feature_weights = self.gbrl_params.get('feature_weights', None)
if feature_weights is not None:
feature_weights, _ = to_numpy(feature_weights)
feature_weights = to_numpy(feature_weights)
feature_weights = feature_weights.flatten()
assert np.all(feature_weights >= 0), "feature weights contains non-positive values"
self.feature_weights = feature_weights

def step(self, features: Union[np.array, th.Tensor, Tuple], grads: Union[np.array, th.Tensor]) -> None:
num_features, cat_features = preprocess_features(features)
grads, _ = to_numpy(grads)
grads = to_numpy(grads)
grads = grads.reshape((len(grads), self.params['output_dim']))
input_dim = 0 if num_features is None else num_features.shape[1]
input_dim += 0 if cat_features is None else cat_features.shape[1]
Expand All @@ -147,7 +128,7 @@ def step(self, features: Union[np.array, th.Tensor, Tuple], grads: Union[np.arra

def fit(self, features: Union[np.array, th.Tensor], targets: Union[np.array, th.Tensor], iterations: int, shuffle: bool=True, loss_type: str='MultiRMSE') -> float:
num_features, cat_features = preprocess_features(features)
targets, _ = to_numpy(targets)
targets = to_numpy(targets)
targets = targets.reshape((len(targets), self.params['output_dim'])).astype(numerical_dtype)
input_dim = 0 if num_features is None else num_features.shape[1]
input_dim += 0 if cat_features is None else cat_features.shape[1]
Expand Down Expand Up @@ -178,6 +159,7 @@ def export(self, filename: str, modelname: str = None) -> None:
assert status == 0, "Failed to export model"
except RuntimeError as e:
print(f"Caught an exception in GBRL: {e}")

@classmethod
def load(cls, filename: str) -> "GBTWrapper":
filename = filename.rstrip('.')
Expand All @@ -186,7 +168,7 @@ def load(cls, filename: str) -> "GBTWrapper":
assert os.path.isfile(filename), "filename doesn't exist!"
try:
instance = cls.__new__(cls)
instance.cpp_model = GBRL.load(filename)
instance.cpp_model = GBRL_CPP.load(filename)
metadata = instance.cpp_model.get_metadata()
instance.tree_struct = {'max_depth': metadata['max_depth'],
'min_data_in_leaf': metadata['min_data_in_leaf'],
Expand All @@ -195,7 +177,6 @@ def load(cls, filename: str) -> "GBTWrapper":
'batch_size': metadata['batch_size'],
'grow_policy': metadata['grow_policy']}
instance.params = {'output_dim': metadata['output_dim'],
'policy_dim': metadata['policy_dim'],
'split_score_func': metadata['split_score_func'],
'generator_type': metadata['generator_type'],
'use_control_variates': metadata['use_control_variates'],
Expand All @@ -204,7 +185,6 @@ def load(cls, filename: str) -> "GBTWrapper":
**instance.tree_struct
}
instance.output_dim = metadata['output_dim']
instance.policy_dim = metadata['policy_dim']
instance.verbose = metadata['verbose']
instance.gbrl_params = {'split_score_func': metadata['split_score_func'],
'generator_type': metadata['generator_type'],
Expand Down Expand Up @@ -319,10 +299,10 @@ def predict(self, features: Union[np.array, th.Tensor], start_idx: int=0, stop_i

def distil(self, obs: Union[np.array, th.Tensor], targets: np.array, params: Dict, verbose: int=0) -> Tuple[int, Dict]:
num_obs, cat_obs = preprocess_features(obs)
distil_params = {'output_dim': self.params['output_dim'], 'policy_dim': self.params['output_dim'], 'split_score_func': 'L2',
distil_params = {'output_dim': self.params['output_dim'], 'split_score_func': 'L2',
'generator_type': 'Quantile', 'use_control_variates': False, 'device': self.device,
'max_depth': params.get('distil_max_depth', 6), 'verbose': verbose, 'batch_size': self.params.get('distil_batch_size', 2048)}
self.student_model = GBRL(**distil_params)
self.student_model = GBRL_CPP(**distil_params)
distil_optimizer = {'algo': 'SGD', 'init_lr': params.get('distil_lr', 0.1)}
try:
self.student_model.set_optimizer(**distil_optimizer)
Expand All @@ -348,26 +328,25 @@ def copy(self):
return self.__copy__()

def __copy__(self):
copy_ = GBTWrapper(self.output_dim, self.policy_dim, self.tree_struct.copy(), [opt.copy() if opt is not None else opt for opt in self.optimizer], self.gbrl_params, self.verbose, self.device)
copy_ = GBTWrapper(self.output_dim, self.tree_struct.copy(), [opt.copy() if opt is not None else opt for opt in self.optimizer], self.gbrl_params, self.verbose, self.device)
copy_.iteration = self.iteration
copy_.total_iterations = self.total_iterations
if self.cpp_model is not None:
copy_.cpp_model = GBRL(self.cpp_model)
copy_.cpp_model = GBRL_CPP(self.cpp_model)
if self.student_model is not None:
copy_.student_model = GBRL(self.student_model)
copy_.student_model = GBRL_CPP(self.student_model)
return copy_

class SeparateActorCriticWrapper:
def __init__(self, output_dim: int, tree_struct: Dict, policy_optimizer: Dict, value_optimizer: Dict, gbrl_params: Dict, verbose: int = 0, device: str = 'cpu'):
print('****************************************')
print(f'Separate GBRL Tree with output dim: {output_dim}, tree_struct: {tree_struct} policy_optimizer: {policy_optimizer} value_optimizer: {value_optimizer}')
print('****************************************')
self.policy_model = GBTWrapper(output_dim, output_dim, tree_struct, policy_optimizer, gbrl_params, verbose, device)
self.value_model = GBTWrapper(1, 1, tree_struct, value_optimizer, gbrl_params, verbose, device)
self.policy_model = GBTWrapper(output_dim - 1, tree_struct, policy_optimizer, gbrl_params, verbose, device)
self.value_model = GBTWrapper(1, tree_struct, value_optimizer, gbrl_params, verbose, device)
self.tree_struct = tree_struct
self.total_iterations = 0
self.output_dim = output_dim
self.policy_dim = output_dim
self.policy_optimizer = policy_optimizer
self.value_optimizer = value_optimizer
self.gbrl_params = gbrl_params
Expand Down Expand Up @@ -445,7 +424,6 @@ def load(cls, filename: str) -> "SeparateActorCriticWrapper":
instance.tree_struct = instance.policy_model.tree_struct
instance.total_iterations = instance.policy_model.iteration + instance.value_model.iteration
instance.output_dim = instance.policy_model.output_dim
instance.policy_dim = instance.policy_model.output_dim
instance.policy_optimizer = instance.policy_model.optimizer[0]
instance.value_optimizer = instance.value_model.optimizer[0]
instance.gbrl_params = instance.policy_model.gbrl_params
Expand Down Expand Up @@ -482,19 +460,19 @@ def __copy__(self) -> "SeparateActorCriticWrapper":
class SharedActorCriticWrapper(GBTWrapper):
def __init__(self, output_dim: int, tree_struct: Dict, policy_optimizer: Dict, value_optimizer: Dict=None, gbrl_params: Dict=dict(), verbose: int = 0, device: str = 'cpu'):
print('****************************************')
print(f'Shared GBRL Tree with output dim: {output_dim}, policy_dim: {output_dim - 1}, tree_struct: {tree_struct} policy_optimizer: {policy_optimizer} value_optimizer: {value_optimizer}')
print(f'Shared GBRL Tree with output dim: {output_dim}, tree_struct: {tree_struct} policy_optimizer: {policy_optimizer} value_optimizer: {value_optimizer}')
print('****************************************')
self.value_optimizer = value_optimizer
self.policy_optimizer = policy_optimizer
super().__init__(output_dim, output_dim - 1 if self.value_optimizer else output_dim, tree_struct, policy_optimizer, gbrl_params, verbose, device)
super().__init__(output_dim, tree_struct, policy_optimizer, gbrl_params, verbose, device)

def reset(self) -> None:
if self.cpp_model is not None:
policy_lr, value_lr = self.get_schedule_learning_rates()
self.policy_optimizer['init_lr'] = policy_lr
if self.value_optimizer:
self.value_optimizer['init_lr'] = value_lr
self.cpp_model = GBRL(**self.params)
self.cpp_model = GBRL_CPP(**self.params)
if self.student_model is not None:
self.policy_optimizer['T'] -= self.total_iterations
else:
Expand Down Expand Up @@ -569,9 +547,9 @@ def __copy__(self) -> "SharedActorCriticWrapper":
copy_.iteration = self.iteration
copy_.total_iterations = self.total_iterations
if self.cpp_model is not None:
copy_.model = GBRL(self.cpp_model)
copy_.model = GBRL_CPP(self.cpp_model)
if self.student_model is not None:
copy_.student_model = GBRL(self.student_model)
copy_.student_model = GBRL_CPP(self.student_model)
return copy_


Expand Down
Loading

0 comments on commit a090978

Please sign in to comment.