Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed training continuation #9

Merged
merged 51 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
4da9e27
testing mac
benja263 Sep 30, 2024
d8f9f14
fixing compilation issues with mac
benja263 Sep 30, 2024
b202db9
removing ci/cd filter
benja263 Sep 30, 2024
8c02913
trying to find llvm
benja263 Sep 30, 2024
5eb29bd
testing
benja263 Sep 30, 2024
cba4bec
fixed bug
benja263 Sep 30, 2024
5160753
fixing mac
benja263 Sep 30, 2024
3112ff4
testing
benja263 Sep 30, 2024
0d17662
added debug flags
benja263 Sep 30, 2024
968775c
cheking os architecture
benja263 Sep 30, 2024
16bf884
reinstalling cmake
benja263 Sep 30, 2024
8fe85ab
added debug comilation
benja263 Sep 30, 2024
538fe80
trying to fix
benja263 Sep 30, 2024
02f4ee6
testing paths
benja263 Sep 30, 2024
7b77cf1
testing
benja263 Sep 30, 2024
b73a22a
export files
benja263 Sep 30, 2024
cf20e63
reducing verbose
benja263 Sep 30, 2024
4129a1c
debuging
benja263 Sep 30, 2024
c562da5
debugging
benja263 Sep 30, 2024
8c0de58
tests
benja263 Sep 30, 2024
24c680a
fixing
benja263 Sep 30, 2024
1ef513c
no lldb
benja263 Sep 30, 2024
b97447a
adding omp in tests
benja263 Sep 30, 2024
807c59d
test
benja263 Sep 30, 2024
a1eb9b3
testing
benja263 Sep 30, 2024
9076969
deb ug
benja263 Sep 30, 2024
ccea2a0
brew path
benja263 Sep 30, 2024
f3af850
changes
benja263 Sep 30, 2024
42013dd
testing
benja263 Sep 30, 2024
2b3109e
adding openmp dynamic library
benja263 Sep 30, 2024
d70b707
using gcc
benja263 Sep 30, 2024
cbfcfa0
testing
benja263 Sep 30, 2024
fe8499b
testing
benja263 Sep 30, 2024
7188adb
testing
benja263 Sep 30, 2024
e995e22
testing
benja263 Sep 30, 2024
3f42bec
updating mac
benja263 Sep 30, 2024
73bed75
adding windows test
benja263 Sep 30, 2024
0ae3edc
testing
benja263 Sep 30, 2024
0475666
bla
benja263 Sep 30, 2024
2b49d47
cheecking
benja263 Sep 30, 2024
a4ff1a5
testing changes
benja263 Sep 30, 2024
33254ea
fixing mac
benja263 Sep 30, 2024
7555cd7
testing on many platforms
benja263 Sep 30, 2024
1a45814
fixing feature weights
benja263 Jan 31, 2025
5d77dc1
Merge branch 'master' into fix_feature_weights
benja263 Jan 31, 2025
de2e1b4
added input dim
benja263 Jan 31, 2025
568b18f
fixed bugs with cuda and specifficaly added tests for training contin…
benja263 Jan 31, 2025
8d504a8
fixed gbrl wrapper and tests
benja263 Jan 31, 2025
bdd843d
adding macos for unittest
benja263 Jan 31, 2025
6a03a19
fixing tests
benja263 Jan 31, 2025
0c5f4b3
fix mac
benja263 Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

strategy:
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.10']

steps:
Expand All @@ -36,10 +36,19 @@ jobs:
# sudo apt-get install -y lcov g++ gcc
sudo apt-get install -y g++ gcc

- name: Install dependencies on Windows
if: matrix.os == 'windows-latest'
- name: Install LLVM via Homebrew
if: matrix.os == 'macos-latest'
run: brew install libomp llvm

- name: Configure Environment Variables
if: matrix.os == 'macos-latest'
run: |
python -m pip install --upgrade pip
echo "PATH=$(brew --prefix llvm)/bin:$PATH" >> $GITHUB_ENV
echo "LDFLAGS=-L$(brew --prefix libomp)/lib -L$(brew --prefix llvm)/lib -L$(brew --prefix llvm)/lib/c++ -Wl,-rpath,$(brew --prefix llvm)/lib/c++" >> $GITHUB_ENV
echo "CPPFLAGS=-I$(brew --prefix libomp)/include -I$(brew --prefix llvm)/include" >> $GITHUB_ENV
echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV
echo "CXX=$(brew --prefix llvm)/bin/clang++" >> $GITHUB_ENV
echo "DYLD_LIBRARY_PATH=$(brew --prefix llvm)/lib:$(brew --prefix libomp)/lib" >> $GITHUB_ENV

- name: Set up MSVC environment
if: matrix.os == 'windows-latest'
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ if (USE_CUDA)
endif()
# Platform-specific settings and linking
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")

target_link_libraries(gbrl_cpp PRIVATE OpenMP::OpenMP_CXX)
if (USE_CUDA)
target_link_libraries(gbrl_cpp PRIVATE CUDA::cudart)
Expand Down
367 changes: 366 additions & 1 deletion LICENSES.txt

Large diffs are not rendered by default.

18 changes: 16 additions & 2 deletions gbrl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# https://nvlabs.github.io/gbrl/license.html
#
##############################################################################
__version__ = "1.0.8"
__version__ = "1.0.9"

import importlib.util
import os
Expand Down Expand Up @@ -39,7 +39,21 @@ def load_cpp_module():
spec.loader.exec_module(module)
_loaded_cpp_module = module = module
return module


if platform.system() == "Darwin": # check for .so on Darwin
ext = ".so"
for dir_path in possible_paths:
if os.path.exists(dir_path):
# Scan for files that match the module name and extension
for file_name in os.listdir(dir_path):
if file_name.startswith(module_name) and file_name.endswith(ext):
# Dynamically load the matching shared library
file_path = os.path.join(dir_path, file_name)
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
_loaded_cpp_module = module = module
return module
raise ImportError(f"Could not find {module_name}{ext} in any of the expected locations: {possible_paths}")


Expand Down
38 changes: 26 additions & 12 deletions gbrl/ac_gbrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
class ActorCritic(GBRL):
def __init__(self,
tree_struct: Dict,
input_dim: int,
output_dim: int,
policy_optimizer: Dict,
value_optimizer: Dict= None,
Expand Down Expand Up @@ -56,6 +57,7 @@ def __init__(self,
if value_optimizer is not None:
value_optimizer = setup_optimizer(value_optimizer, prefix='value_')
super().__init__(tree_struct,
input_dim,
output_dim,
None,
gbrl_params,
Expand All @@ -68,11 +70,13 @@ def __init__(self,
self.bias = bias if bias is not None else np.zeros(self.output_dim if shared_tree_struct else self.output_dim - 1, dtype=numerical_dtype)
# init model
if self.shared_tree_struct:
self._model = SharedActorCriticWrapper(self.output_dim, self.tree_struct, self.policy_optimizer, self.value_optimizer, self.gbrl_params, self.verbose, self.device)
self._model = SharedActorCriticWrapper(self.input_dim, self.output_dim, self.tree_struct, self.policy_optimizer, self.value_optimizer, self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_bias(self.bias)
else:
self._model = SeparateActorCriticWrapper(self.output_dim, self.tree_struct, self.policy_optimizer, self.value_optimizer, self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_bias(self.bias)
self._model = SeparateActorCriticWrapper(self.input_dim, self.output_dim, self.tree_struct, self.policy_optimizer, self.value_optimizer, self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_policy_bias(self.bias)
self.policy_grad = None
self.value_grad = None

Expand Down Expand Up @@ -100,6 +104,7 @@ def load_model(cls, load_name: str, device: str) -> "ActorCritic":
instance.bias = instance._model.get_bias()
instance.value_optimizer = instance._model.value_optimizer
instance.policy_optimizer = instance._model.policy_optimizer
instance.input_dim = instance._model.input_dim
instance.output_dim = instance._model.output_dim
instance.verbose = instance._model.verbose
instance.tree_struct = instance._model.tree_struct
Expand Down Expand Up @@ -257,14 +262,15 @@ def copy(self) -> "ActorCritic":

def __copy__(self) -> "ActorCritic":
value_optimizer = None if self.value_optimizer is None else self.value_optimizer.copy()
copy_ = ActorCritic(self.tree_struct.copy(), self.output_dim, self.policy_optimizer.copy(), value_optimizer, self.shared_tree_struct, self.gbrl_params, self.bias, self.verbose, self.device)
copy_ = ActorCritic(self.tree_struct.copy(), self.input_dim, self.output_dim, self.policy_optimizer.copy(), value_optimizer, self.shared_tree_struct, self.gbrl_params, self.bias, self.verbose, self.device)
if self._model is not None:
copy_._model = self._model.copy()
return copy_

class ParametricActor(GBRL):
def __init__(self,
tree_struct: Dict,
input_dim: int,
output_dim: int,
policy_optimizer: Dict,
gbrl_params: Dict=dict(),
Expand Down Expand Up @@ -294,6 +300,7 @@ def __init__(self,
"""
policy_optimizer = setup_optimizer(policy_optimizer, prefix='policy_')
super().__init__(tree_struct,
input_dim,
output_dim,
None,
gbrl_params,
Expand Down Expand Up @@ -341,6 +348,7 @@ def load_model(cls, load_name: str, device: str) -> "ParametricActor":
instance._model = GBTWrapper.load(load_name, device)
instance.bias = instance._model.get_bias()
instance.policy_optimizer = instance._model.optimizer
instance.input_dim = instance._model.input_dim
instance.output_dim = instance._model.output_dim
instance.verbose = instance._model.verbose
instance.tree_struct = instance._model.tree_struct
Expand Down Expand Up @@ -378,14 +386,15 @@ def __call__(self, observations: Union[np.ndarray, th.Tensor], requires_grad : b
return params

def __copy__(self) -> "ParametricActor":
copy_ = ParametricActor(self.tree_struct.copy(), self.output_dim, self.policy_optimizer.copy(), self.gbrl_params, self.bias, self.verbose, self.device)
copy_ = ParametricActor(self.tree_struct.copy(), self.input_dim, self.output_dim, self.policy_optimizer.copy(), self.gbrl_params, self.bias, self.verbose, self.device)
if self._model is not None:
copy_._model = self._model.copy()
return copy_

class GaussianActor(GBRL):
def __init__(self,
tree_struct: Dict,
input_dim: int,
output_dim: int,
mu_optimizer: Dict,
std_optimizer: Dict = None,
Expand Down Expand Up @@ -433,6 +442,7 @@ def __init__(self,
self.log_std_init = log_std_init

super().__init__(tree_struct,
input_dim,
output_dim,
None,
gbrl_params,
Expand All @@ -442,7 +452,7 @@ def __init__(self,

self.policy_dim = policy_dim
# init model
self._model = GBTWrapper(self.output_dim, self.tree_struct, [mu_optimizer, std_optimizer], self.gbrl_params, self.verbose, self.device)
self._model = GBTWrapper(self.input_dim, self.output_dim, self.tree_struct, [mu_optimizer, std_optimizer], self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_bias(self.bias)

Expand Down Expand Up @@ -513,7 +523,7 @@ def __call__(self, observations: Union[np.ndarray, th.Tensor], requires_grad : b

def __copy__(self) -> "GaussianActor":
std_optimizer = None if self.std_optimizer is None else self.std_optimizer.copy()
copy_ = GaussianActor(self.tree_struct.copy(), self.output_dim, self.mu_optimizer.copy(), std_optimizer, self.gbrl_params, self.bias, self.verbose, self.device)
copy_ = GaussianActor(self.tree_struct.copy(), self.input_dim, self.output_dim, self.mu_optimizer.copy(), std_optimizer, self.gbrl_params, self.bias, self.verbose, self.device)
if self._model is not None:
copy_._model = self._model.copy()
return copy_
Expand All @@ -522,6 +532,7 @@ def __copy__(self) -> "GaussianActor":
class ContinuousCritic(GBRL):
def __init__(self,
tree_struct: Dict,
input_dim: int,
output_dim: int,
weights_optimizer: Dict,
bias_optimizer: Dict = None,
Expand Down Expand Up @@ -564,6 +575,7 @@ def __init__(self,
bias_optimizer = setup_optimizer(bias_optimizer, prefix='bias_')

super().__init__(tree_struct,
input_dim,
output_dim,
None,
gbrl_params,
Expand All @@ -575,7 +587,7 @@ def __init__(self,
self.bias = bias if bias is not None else np.zeros(self.output_dim, dtype=numerical_dtype)
self.target_update_interval = target_update_interval
# init model
self._model = GBTWrapper(self.output_dim, self.tree_struct, [weights_optimizer, bias_optimizer], self.gbrl_params, self.verbose, self.device)
self._model = GBTWrapper(self.input_dim, self.output_dim, self.tree_struct, [weights_optimizer, bias_optimizer], self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_bias(self.bias)

Expand Down Expand Up @@ -657,7 +669,7 @@ def __call__(self, observations: Union[np.ndarray, th.Tensor], requires_grad: bo
return weights, bias

def __copy__(self) -> "ContinuousCritic":
copy_ = ContinuousCritic(self.tree_struct.copy(), self.output_dim, self.weights_optimizer.copy(), self.bias_optimizer.copy() if isinstance(self.critic_optimizer, dict) else {'weights_optimizer': self.critic_optimizer[0], 'bias_optimizer': self.critic_optimizer[1]}, self.gbrl_params, self.target_update_interval, self.bias, self.verbose, self.device)
copy_ = ContinuousCritic(self.tree_struct.copy(), self.input_dim, self.output_dim, self.weights_optimizer.copy(), self.bias_optimizer.copy() if isinstance(self.critic_optimizer, dict) else {'weights_optimizer': self.critic_optimizer[0], 'bias_optimizer': self.critic_optimizer[1]}, self.gbrl_params, self.target_update_interval, self.bias, self.verbose, self.device)
if self._model is not None:
copy_._model = self._model.copy()
return copy_
Expand All @@ -666,6 +678,7 @@ def __copy__(self) -> "ContinuousCritic":
class DiscreteCritic(GBRL):
def __init__(self,
tree_struct: Dict,
input_dim: int,
output_dim: int,
critic_optimizer: Dict,
gbrl_params: Dict=dict(),
Expand Down Expand Up @@ -695,6 +708,7 @@ def __init__(self,
"""
critic_optimizer = setup_optimizer(critic_optimizer, prefix='critic_')
super().__init__(tree_struct,
input_dim,
output_dim,
None,
gbrl_params,
Expand All @@ -704,7 +718,7 @@ def __init__(self,
self.target_update_interval = target_update_interval
self.bias = bias if bias is not None else np.zeros(self.output_dim, dtype=numerical_dtype)
# init model
self._model = GBTWrapper(self.output_dim, self.tree_struct, self.critic_optimizer, self.gbrl_params, self.verbose, self.device)
self._model = GBTWrapper(self.input_dim, self.output_dim, self.tree_struct, self.critic_optimizer, self.gbrl_params, self.verbose, self.device)
self._model.reset()
self._model.set_bias(self.bias)

Expand Down Expand Up @@ -764,7 +778,7 @@ def predict_target(self, observations: Union[np.ndarray, th.Tensor], tensor: boo
return self._model.predict(observations, requires_grad=False, stop_idx=max(n_trees - self.target_update_interval, 1), tensor=tensor)

def __copy__(self) -> "DiscreteCritic":
copy_ = DiscreteCritic(self.tree_struct.copy(), self.output_dim, self.critic_optimizer.copy(), self.gbrl_params, self.target_update_interval, self.bias, self.verbose, self.device)
copy_ = DiscreteCritic(self.tree_struct.copy(), self.input_dim, self.output_dim, self.critic_optimizer.copy(), self.gbrl_params, self.target_update_interval, self.bias, self.verbose, self.device)
if self._model is not None:
copy_._model = self._model.copy()
return copy_
Expand Down
Loading