AIR-UFG · beatrizpinheiro · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/README.md b/README.md
@@ -2,11 +2,15 @@
 
 ## Overview
 
-The UFGSim package provides a ROS 2 node for running semantic segmentation inference using the RIUNet model.
+The UFGSim package provides a ROS 2 node for running semantic segmentation inference using a pre-trained model.
+
+Current models:
+- RIUNet
+- MVLidarNet
 
 ## Features
 
-- **Semantic Segmentation**: The node uses the RIUNet model for semantic segmentation on point cloud data.
+- **Semantic Segmentation**: The node uses a model for semantic segmentation on point cloud data.
 - **Customizable Parameters**: You can adjust various parameters such as the field of view, image dimensions, and paths to the model and configuration files.
 - **ROS 2 Integration**: The node is fully integrated with ROS 2, making it easy to incorporate into larger robotics systems.
 
@@ -64,13 +68,16 @@ You can run the node directly using `ros2 run` or via a launch file.
 To run the node directly:
 
 ```bash
-ros2 run ufgsim_pkg riunet_inf --ros-args -p model_path:=/path/to/your/model.ckpt
+ros2 run ufgsim_pkg model_sim_inf --ros-args -p model_path:=/path/to/your/model.ckpt
 ```
 
 ### Parameters
 
 The following parameters can be set when running the node:
 
+- **model_name**: The name of the model for the inference. Default is `RIUNet`.
+- **in_channels**: The amount of input channels. Default is `4`.
+- **n_classes**: The amount of classes of the data. Default is `13`.
 - **fov_up**: The field of view upwards in degrees. Default is `15.0`.
 - **fov_down**: The field of view downwards in degrees. Default is `-15.0`.
 - **width**: The width of the projection image. Default is `440`.
@@ -87,10 +94,9 @@ You can also use a launch file to set parameters and run the node. This is parti
 To run the node using the launch file, execute the following command:
 
 ```bash
-ros2 launch ufgsim_pkg riunet_inf.launch.py model_path:=/path/to/your/model.ckpt
+ros2 launch ufgsim_pkg model_sim_inf.launch.py model_path:=/path/to/your/model.ckpt
 ```
 
 ## License
 
-This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
-
+This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
diff --git a/setup.py b/setup.py
@@ -29,7 +29,8 @@
     tests_require=['pytest'],
     entry_points={
         'console_scripts': [
-            'riunet_inf = ufgsim_pkg.riunet_inf:main'
+            'riunet_inf = ufgsim_pkg.riunet_inf:main',
+            'model_sim_inf = ufgsim_pkg.model_sim_inf:main'
         ],
     },
 )
diff --git a/ufgsim_pkg/__pycache__/__init__.cpython-310.pyc b/ufgsim_pkg/__pycache__/__init__.cpython-310.pyc
diff --git a/ufgsim_pkg/__pycache__/model_sim_inf.cpython-310.pyc b/ufgsim_pkg/__pycache__/model_sim_inf.cpython-310.pyc
diff --git a/ufgsim_pkg/__pycache__/riunet_inf.cpython-310.pyc b/ufgsim_pkg/__pycache__/riunet_inf.cpython-310.pyc
diff --git a/ufgsim_pkg/riunet_inf.py → ufgsim_pkg/model_sim_inf.py b/ufgsim_pkg/riunet_inf.py → ufgsim_pkg/model_sim_inf.py
@@ -1,59 +1,70 @@
 from colorcloud.UFGsim2024infufg import SemanticSegmentationSimLDM, ProjectionSimTransform, ProjectionSimVizTransform
-from colorcloud.biasutti2019riu import SemanticSegmentationTask, RIUNet
+
+from colorcloud.biasutti2019riu import RIUNet
+from colorcloud.biasutti2019riu import SemanticSegmentationTask as RIUTask
+from colorcloud.chen2020mvlidarnet import MVLidarNet
+from colorcloud.chen2020mvlidarnet import SemanticSegmentationTask as MVLidarTask
+
 from colorcloud.behley2019iccv import SphericalProjection
 import rclpy
 from rclpy.node import Node
 from sensor_msgs.msg import Image, PointCloud2
 from cv_bridge import CvBridge
-from ros2_numpy.point_cloud2 import pointcloud2_to_array
-from numpy.lib.recfunctions import structured_to_unstructured
+from ros2_numpy.point_cloud2 import pointcloud2_to_array, array_to_pointcloud2
+from numpy.lib.recfunctions import structured_to_unstructured, unstructured_to_structured
 import cv2
 import numpy as np
 import torch
 from torch.nn import CrossEntropyLoss
-import yaml 
+import yaml
 import os
 from ament_index_python.packages import get_package_share_directory
 
-class RIUNetInferencer(Node):
+class SIMModelInferencer(Node):
 	def __init__(self):
-		super().__init__('riunet_sim')
+		super().__init__('model_sim')
 		'''
 		future parameters
 		'''
+		self.declare_parameter('model_name', 'RIUNet')
+		self.declare_parameter('in_channels', 4)
+		self.declare_parameter('n_classes', 13)
 		self.declare_parameter('fov_up', 15.0)
 		self.declare_parameter('fov_down', -15.0)
 		self.declare_parameter('width', 440)
 		self.declare_parameter('height', 16)
 		self.declare_parameter('yaml_path', os.path.join(get_package_share_directory('ufgsim_pkg'),'config','ufg-sim.yaml'))
 		self.declare_parameter('model_path', '')
-
+
+		model_name = self.get_parameter('model_name').get_parameter_value().string_value
+		in_channels = self.get_parameter('in_channels').get_parameter_value().integer_value
+		n_classes = self.get_parameter('n_classes').get_parameter_value().integer_value
 		fov_up = self.get_parameter('fov_up').get_parameter_value().double_value
 		fov_down = self.get_parameter('fov_down').get_parameter_value().double_value
 		width = self.get_parameter('width').get_parameter_value().integer_value
 		height = self.get_parameter('height').get_parameter_value().integer_value
 		yaml_path = self.get_parameter('yaml_path').get_parameter_value().string_value
 		self.model_path = self.get_parameter('model_path').get_parameter_value().string_value
-
 
 		self.spherical_projection = SphericalProjection(fov_up, fov_down, width, height)
-		
+
 		self.projection_transform = ProjectionSimTransform(self.spherical_projection)
-		
+
 		self.subscription = self.create_subscription(
 			PointCloud2,
 			'/velodyne_points',
 			self.inference_callback,
 			10
 		)
-		self.publisher = self.create_publisher(Image, '/segmentation_riunet', 10)
+		self.publisher_stage1 = self.create_publisher(Image, '/segmentation_model', 10)
+		self.publisher_stage2 = self.create_publisher(PointCloud2, '/segmentation_pc2_model', 10)
 		self.bridge = CvBridge()
-		self.model = RIUNet(in_channels=4, hidden_channels=(64, 128, 256, 512), n_classes=13) 
+		self.model, self.task_class = self.load_model(model_name, in_channels, n_classes)
 		self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-		
+
 		with open(yaml_path, 'r') as file:
 			metadata = yaml.safe_load(file)
-			
+
 		self.learning_map = metadata['learning_map']
 		max_key = sorted(self.learning_map.keys())[-1]
 		self.learning_map_np = np.zeros((max_key+1,), dtype=int)
@@ -70,69 +81,111 @@ def __init__(self):
 		self.color_map_rgb_np = np.zeros((max_key+1,3))
 		for k,v in self.color_map_bgr.items():
 			self.color_map_rgb_np[k] = np.array(v[::-1], np.float32)
-		
+
 		self.projectionviz_transform = ProjectionSimVizTransform(self.color_map_rgb_np, self.learning_map_inv_np)
-
+
+	def load_model(self, model_name, in_channels, n_classes):
+		"""
+		Dynamically load the model class based on the model_name parameter.
+		"""
+		if model_name == 'RIUNet':
+			model = RIUNet(in_channels=in_channels, hidden_channels=(64, 128, 256, 512), n_classes=n_classes)
+			task_class = RIUTask
+		elif model_name == 'MVLidarNet':
+			print("mvlidarnet")
+			model =  MVLidarNet(in_channels=in_channels, n_classes=n_classes)
+			task_class = None
+		else:
+			raise ValueError(f"Model {model_name} not supported")
+
+		return model, task_class
+
 	def inference_callback(self, msg):
+
 		pointcloud = structured_to_unstructured(pointcloud2_to_array(msg))
 		pointcloud = pointcloud[:,:3]
-		
+
 		item = {
 			'frame': pointcloud,
 			'label': None,
 			'mask': None
-		}	
-		
+		}
+
 		item = self.projection_transform(item)
 		item['frame'] = (item['frame'] * 255).astype(np.uint8) #duvida
 
 		# Convert to float32 before passing to the model
 		frame = item['frame'].astype(np.float32)
-
-		ldm = SemanticSegmentationSimLDM()
-		ldm.setup('predict')
-		epoch_steps = len(ldm.predict_dataloader())
-		n_epochs = 25
-		model = self.model
-		loss_fn =  CrossEntropyLoss(reduction='none')
-		viz_tfm = ldm.viz_tfm
-		total_steps = n_epochs*epoch_steps
-
-		loaded_model = SemanticSegmentationTask.load_from_checkpoint(
-			self.model_path, model=model, loss_fn=loss_fn, viz_tfm=viz_tfm, total_steps=total_steps
-			)
-
-		loaded_model.to(self.device)
-		loaded_model.eval()
+
+		# ldm = SemanticSegmentationSimLDM()
+		# ldm.setup('predict')
+		# epoch_steps = len(ldm.predict_dataloader())
+		# n_epochs = 25
+		# model = self.model
+		# semantic_task = self.task_class
+		# loss_fn =  CrossEntropyLoss(reduction='none')
+		# viz_tfm = ldm.viz_tfm
+		# total_steps = n_epochs*epoch_steps
+
+		# loaded_model = semantic_task.load_from_checkpoint(
+		# 	self.model_path, model=model, loss_fn=loss_fn, viz_tfm=viz_tfm, total_steps=total_steps
+		# )
+
+		# loaded_model.to(self.device)
+		# loaded_model.eval()
+		loaded_model = torch.load(self.model_path, weights_only=True)
+		self.model.load_state_dict(loaded_model['state_dict'], strict=False)
+		self.model.to(self.device)
+		self.model.eval()
 
 		with torch.no_grad():
 			frame = item['frame']
 			frame = np.expand_dims(frame, 0)
 			frame = torch.from_numpy(frame).to(self.device)
 			frame = frame.permute(0, 3, 1, 2).float() # (N, H, W, C) -> (N, C, H, W)
-			y_hat = loaded_model(frame).squeeze()
+			y_hat = self.model(frame).squeeze()
 			argmax = torch.argmax(y_hat, dim=0)
 			pred = np.array(argmax.cpu(), dtype=np.uint8)
 
 		item['label'] = pred
 		colored_pred = self.projectionviz_transform(item)
 
 		colored_pred_uint8 = colored_pred['label'].astype(np.uint8)
-		
+
 		inferred_frame_msg = self.bridge.cv2_to_imgmsg(colored_pred_uint8, encoding='passthrough')
-		self.publisher.publish(inferred_frame_msg)
 
+		self.publisher_stage1.publish(inferred_frame_msg)
+		print("publicando 1")
+		# stage 2
+
+
+		frame = item['frame']
+		frame = np.transpose(frame, (2,0,1))
+		frame = frame[:3,:,:]
+		pred_expanded = np.expand_dims(pred, axis=0)
+		frame_with_pred = np.concatenate((frame, pred_expanded))
+		frame_with_pred = np.transpose(frame_with_pred, (1,2,0))
+		#frame_with_pred = frame_with_pred.reshape(-1)
+
+		# Need to structure the array for array_to_pointcloud2
+		dtype = np.dtype([('stage2dtype1', 'i1'), ('stage2dtype2', 'i1'), ('stage2dtype3', 'i1'), ('stage2dtype4', 'i1')])
+		data_reshaped = frame_with_pred.reshape(-1, 4)
+		frame_with_pred_structured = unstructured_to_structured(data_reshaped, dtype)
+		frame_with_pred_structured = frame_with_pred_structured.reshape(16,440)
+		inferred_point_cloud_msg = array_to_pointcloud2(frame_with_pred_structured)
+		self.publisher_stage2.publish(inferred_point_cloud_msg)
+		print("publicando 2")
 def main(args=None):
 	rclpy.init(args=args)
-	riunet_inferencer = RIUNetInferencer()
-	
+	model_inferencer = SIMModelInferencer()
+
 	try:
-		rclpy.spin(riunet_inferencer)
+		rclpy.spin(model_inferencer)
 	except KeyboardInterrupt:
 		pass
 	finally:
-		riunet_inferencer.destroy_node()
+		model_inferencer.destroy_node()
 		rclpy.shutdown()
 
 if __name__ == '__main__':
-    main()
+    main()