Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

## Overview

The UFGSim package provides a ROS 2 node for running semantic segmentation inference using the RIUNet model.
The UFGSim package provides a ROS 2 node for running semantic segmentation inference using a pre-trained model.

Current models:
- RIUNet
- MVLidarNet

## Features

- **Semantic Segmentation**: The node uses the RIUNet model for semantic segmentation on point cloud data.
- **Semantic Segmentation**: The node uses a model for semantic segmentation on point cloud data.
- **Customizable Parameters**: You can adjust various parameters such as the field of view, image dimensions, and paths to the model and configuration files.
- **ROS 2 Integration**: The node is fully integrated with ROS 2, making it easy to incorporate into larger robotics systems.

Expand Down Expand Up @@ -64,13 +68,16 @@ You can run the node directly using `ros2 run` or via a launch file.
To run the node directly:

```bash
ros2 run ufgsim_pkg riunet_inf --ros-args -p model_path:=/path/to/your/model.ckpt
ros2 run ufgsim_pkg model_sim_inf --ros-args -p model_path:=/path/to/your/model.ckpt
```

### Parameters

The following parameters can be set when running the node:

- **model_name**: The name of the model for the inference. Default is `RIUNet`.
- **in_channels**: The amount of input channels. Default is `4`.
- **n_classes**: The amount of classes of the data. Default is `13`.
- **fov_up**: The field of view upwards in degrees. Default is `15.0`.
- **fov_down**: The field of view downwards in degrees. Default is `-15.0`.
- **width**: The width of the projection image. Default is `440`.
Expand All @@ -87,10 +94,9 @@ You can also use a launch file to set parameters and run the node. This is parti
To run the node using the launch file, execute the following command:

```bash
ros2 launch ufgsim_pkg riunet_inf.launch.py model_path:=/path/to/your/model.ckpt
ros2 launch ufgsim_pkg model_sim_inf.launch.py model_path:=/path/to/your/model.ckpt
```

## License

This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.

This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
tests_require=['pytest'],
entry_points={
'console_scripts': [
'riunet_inf = ufgsim_pkg.riunet_inf:main'
'riunet_inf = ufgsim_pkg.riunet_inf:main',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you removed the riunet_inf.py file, remember to remove this line as well. Otherwise it will be installed as a executable, even though it does not exist.

'model_sim_inf = ufgsim_pkg.model_sim_inf:main'
],
},
)
Binary file added ufgsim_pkg/__pycache__/__init__.cpython-310.pyc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Be careful not to push cache file.

Binary file not shown.
Binary file not shown.
Binary file added ufgsim_pkg/__pycache__/riunet_inf.cpython-310.pyc
Binary file not shown.
139 changes: 96 additions & 43 deletions ufgsim_pkg/riunet_inf.py → ufgsim_pkg/model_sim_inf.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,70 @@
from colorcloud.UFGsim2024infufg import SemanticSegmentationSimLDM, ProjectionSimTransform, ProjectionSimVizTransform
from colorcloud.biasutti2019riu import SemanticSegmentationTask, RIUNet

from colorcloud.biasutti2019riu import RIUNet
from colorcloud.biasutti2019riu import SemanticSegmentationTask as RIUTask
from colorcloud.chen2020mvlidarnet import MVLidarNet
from colorcloud.chen2020mvlidarnet import SemanticSegmentationTask as MVLidarTask

from colorcloud.behley2019iccv import SphericalProjection
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image, PointCloud2
from cv_bridge import CvBridge
from ros2_numpy.point_cloud2 import pointcloud2_to_array
from numpy.lib.recfunctions import structured_to_unstructured
from ros2_numpy.point_cloud2 import pointcloud2_to_array, array_to_pointcloud2
from numpy.lib.recfunctions import structured_to_unstructured, unstructured_to_structured
import cv2
import numpy as np
import torch
from torch.nn import CrossEntropyLoss
import yaml
import yaml
import os
from ament_index_python.packages import get_package_share_directory

class RIUNetInferencer(Node):
class SIMModelInferencer(Node):
def __init__(self):
super().__init__('riunet_sim')
super().__init__('model_sim')
'''
future parameters
'''
self.declare_parameter('model_name', 'RIUNet')
self.declare_parameter('in_channels', 4)
self.declare_parameter('n_classes', 13)
self.declare_parameter('fov_up', 15.0)
self.declare_parameter('fov_down', -15.0)
self.declare_parameter('width', 440)
self.declare_parameter('height', 16)
self.declare_parameter('yaml_path', os.path.join(get_package_share_directory('ufgsim_pkg'),'config','ufg-sim.yaml'))
self.declare_parameter('model_path', '')


model_name = self.get_parameter('model_name').get_parameter_value().string_value
in_channels = self.get_parameter('in_channels').get_parameter_value().integer_value
n_classes = self.get_parameter('n_classes').get_parameter_value().integer_value
fov_up = self.get_parameter('fov_up').get_parameter_value().double_value
fov_down = self.get_parameter('fov_down').get_parameter_value().double_value
width = self.get_parameter('width').get_parameter_value().integer_value
height = self.get_parameter('height').get_parameter_value().integer_value
yaml_path = self.get_parameter('yaml_path').get_parameter_value().string_value
self.model_path = self.get_parameter('model_path').get_parameter_value().string_value


self.spherical_projection = SphericalProjection(fov_up, fov_down, width, height)

self.projection_transform = ProjectionSimTransform(self.spherical_projection)

self.subscription = self.create_subscription(
PointCloud2,
'/velodyne_points',
self.inference_callback,
10
)
self.publisher = self.create_publisher(Image, '/segmentation_riunet', 10)
self.publisher_stage1 = self.create_publisher(Image, '/segmentation_model', 10)
self.publisher_stage2 = self.create_publisher(PointCloud2, '/segmentation_pc2_model', 10)
self.bridge = CvBridge()
self.model = RIUNet(in_channels=4, hidden_channels=(64, 128, 256, 512), n_classes=13)
self.model, self.task_class = self.load_model(model_name, in_channels, n_classes)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with open(yaml_path, 'r') as file:
metadata = yaml.safe_load(file)

self.learning_map = metadata['learning_map']
max_key = sorted(self.learning_map.keys())[-1]
self.learning_map_np = np.zeros((max_key+1,), dtype=int)
Expand All @@ -70,69 +81,111 @@ def __init__(self):
self.color_map_rgb_np = np.zeros((max_key+1,3))
for k,v in self.color_map_bgr.items():
self.color_map_rgb_np[k] = np.array(v[::-1], np.float32)

self.projectionviz_transform = ProjectionSimVizTransform(self.color_map_rgb_np, self.learning_map_inv_np)


def load_model(self, model_name, in_channels, n_classes):
"""
Dynamically load the model class based on the model_name parameter.
"""
if model_name == 'RIUNet':
model = RIUNet(in_channels=in_channels, hidden_channels=(64, 128, 256, 512), n_classes=n_classes)
task_class = RIUTask
elif model_name == 'MVLidarNet':
print("mvlidarnet")
model = MVLidarNet(in_channels=in_channels, n_classes=n_classes)
task_class = None
else:
raise ValueError(f"Model {model_name} not supported")

return model, task_class

def inference_callback(self, msg):

pointcloud = structured_to_unstructured(pointcloud2_to_array(msg))
pointcloud = pointcloud[:,:3]

item = {
'frame': pointcloud,
'label': None,
'mask': None
}
}

item = self.projection_transform(item)
item['frame'] = (item['frame'] * 255).astype(np.uint8) #duvida

# Convert to float32 before passing to the model
frame = item['frame'].astype(np.float32)

ldm = SemanticSegmentationSimLDM()
ldm.setup('predict')
epoch_steps = len(ldm.predict_dataloader())
n_epochs = 25
model = self.model
loss_fn = CrossEntropyLoss(reduction='none')
viz_tfm = ldm.viz_tfm
total_steps = n_epochs*epoch_steps

loaded_model = SemanticSegmentationTask.load_from_checkpoint(
self.model_path, model=model, loss_fn=loss_fn, viz_tfm=viz_tfm, total_steps=total_steps
)

loaded_model.to(self.device)
loaded_model.eval()

# ldm = SemanticSegmentationSimLDM()
# ldm.setup('predict')
# epoch_steps = len(ldm.predict_dataloader())
# n_epochs = 25
# model = self.model
# semantic_task = self.task_class
# loss_fn = CrossEntropyLoss(reduction='none')
# viz_tfm = ldm.viz_tfm
# total_steps = n_epochs*epoch_steps

# loaded_model = semantic_task.load_from_checkpoint(
# self.model_path, model=model, loss_fn=loss_fn, viz_tfm=viz_tfm, total_steps=total_steps
# )

# loaded_model.to(self.device)
# loaded_model.eval()
loaded_model = torch.load(self.model_path, weights_only=True)
self.model.load_state_dict(loaded_model['state_dict'], strict=False)
self.model.to(self.device)
self.model.eval()

with torch.no_grad():
frame = item['frame']
frame = np.expand_dims(frame, 0)
frame = torch.from_numpy(frame).to(self.device)
frame = frame.permute(0, 3, 1, 2).float() # (N, H, W, C) -> (N, C, H, W)
y_hat = loaded_model(frame).squeeze()
y_hat = self.model(frame).squeeze()
argmax = torch.argmax(y_hat, dim=0)
pred = np.array(argmax.cpu(), dtype=np.uint8)

item['label'] = pred
colored_pred = self.projectionviz_transform(item)

colored_pred_uint8 = colored_pred['label'].astype(np.uint8)

inferred_frame_msg = self.bridge.cv2_to_imgmsg(colored_pred_uint8, encoding='passthrough')
self.publisher.publish(inferred_frame_msg)

self.publisher_stage1.publish(inferred_frame_msg)
print("publicando 1")
# stage 2


frame = item['frame']
frame = np.transpose(frame, (2,0,1))
frame = frame[:3,:,:]
pred_expanded = np.expand_dims(pred, axis=0)
frame_with_pred = np.concatenate((frame, pred_expanded))
frame_with_pred = np.transpose(frame_with_pred, (1,2,0))
#frame_with_pred = frame_with_pred.reshape(-1)

# Need to structure the array for array_to_pointcloud2
dtype = np.dtype([('stage2dtype1', 'i1'), ('stage2dtype2', 'i1'), ('stage2dtype3', 'i1'), ('stage2dtype4', 'i1')])
data_reshaped = frame_with_pred.reshape(-1, 4)
frame_with_pred_structured = unstructured_to_structured(data_reshaped, dtype)
frame_with_pred_structured = frame_with_pred_structured.reshape(16,440)
inferred_point_cloud_msg = array_to_pointcloud2(frame_with_pred_structured)
self.publisher_stage2.publish(inferred_point_cloud_msg)
print("publicando 2")
def main(args=None):
rclpy.init(args=args)
riunet_inferencer = RIUNetInferencer()
model_inferencer = SIMModelInferencer()

try:
rclpy.spin(riunet_inferencer)
rclpy.spin(model_inferencer)
except KeyboardInterrupt:
pass
finally:
riunet_inferencer.destroy_node()
model_inferencer.destroy_node()
rclpy.shutdown()

if __name__ == '__main__':
main()
main()