Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions perception/auv_yolo_detector/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/images
/bags
/venv
38 changes: 38 additions & 0 deletions perception/auv_yolo_detector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# YOLO Detection
## Overview
This package uses YOLO to independently detect SAM and buoy. It includes two trained models: one for simulation and another for real-world scenarios (*.pt* files).
Two different datasets were used, with ~450 labelled images in total. Access them [here](https://kth-my.sharepoint.com/:f:/g/personal/framir_ug_kth_se/EpHV7UF6nQVIsYwrSBDlYWkBR-Yv08Lia9hxuD-aqrMTJQ?e=cpmczE).
### Training
The sim model was trained with the sim dataset, which was later used to train the real model with the real dataset.

## Dependencies (dev versions)
- ROS2 Humble
- Ultralytics: 8.3.160

When installing *Ultralytics*, corresponding dependencies (*eg*: torch, opencv, etc) will be installed (check [here](https://docs.ultralytics.com/quickstart/) for more info)

## Launch yolo detector
``
ros2 launch auv_yolo_detector yolo_detector_launch.py
``
## **New Topics**
| Topic | Msg | Description |
| --- | ---| --- |
| /Quadrotor/image_annotated | Image | Image from camera with YOLO annotations (bounding boxes + probabilities)|
| /Quadrotor/pred_position/sam | PointStamped | Predicted sam position in map frame|
| /Quadrotor/pred_position/buoy | PointStamped | Predicted buoy position in map frame|
---

## Future work
- Convert bounding boxes+orientation to pose (not position)
- Estimate pose/position correctly in real scenario (not only sim)
- Implement some kind of filter or atleast outlier removal.

## Outro
Don't forget to
```
colcon build --symlink-install --packages-select auv_yolo_detector
source install/setup.sh
```
## Maintainer
Francisco Miranda, framir@kth.se
Empty file.
214 changes: 214 additions & 0 deletions perception/auv_yolo_detector/auv_yolo_detector/auv_yolo_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#!/usr/bin/env -S venv/bin/python
import rclpy
from ultralytics import YOLO
from rclpy.node import Node
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge
from image_geometry import PinholeCameraModel
from pathlib import Path
import numpy as np
from geometry_msgs.msg import PointStamped, Pose, PoseStamped
from nav_msgs.msg import Odometry
from tf2_ros import Buffer, TransformListener
import cv2
import tf2_geometry_msgs


class YOLODetector(Node):
def __init__(self, name = 'auv_yolo_detector',
allow_undeclared_parameters=True,
automatically_declare_parameters_from_overrides=True):
super().__init__(name)
self.get_params()

# camera calibration matrix
self.K = np.array([[369.5, 0, 320],
[0, 415.69, 240],
[0, 0, 1]])
self.invK = np.linalg.inv(self.K)

# detection filtering params
self.sam_dim = [self.model_params['real_dimensions.sam.width'], self.model_params['real_dimensions.sam.height']]
self.cam_fov = self.model_params['camera.fov']
self.max_horizontal_vel = 1
self.max_vertical_vel = 1
self.prev_detections = None


# pubs, subs and tf2
self.create_subscription(msg_type=Image,
topic=self.model_params['topics.sub.raw_image'],
callback=self.image_callback,
qos_profile=10)
self.create_subscription(msg_type=CameraInfo,
topic=self.model_params['topics.sub.camera_info'],
callback=self.camera_info_callback,
qos_profile=10)
self.create_subscription(
msg_type = Odometry,
topic = self.model_params["topics.sub.drone_position"],
callback = self.drone_position_callback,
qos_profile= 10)
self.annotated_img_pub = self.create_publisher(msg_type=Image,
topic=self.model_params['topics.pub.annotated_image'],
qos_profile=10)
self.sam_position_pub = self.create_publisher(msg_type=PointStamped,
topic = self.model_params['topics.pub.predicted_position.sam'],
qos_profile=10)
self.buoy_position_pub = self.create_publisher(msg_type=PointStamped,
topic = self.model_params['topics.pub.predicted_position.buoy'],
qos_profile=10)
self.tf_buffer = Buffer()
self.tf_listener = TransformListener(self.tf_buffer, self)
self.drone_position = PointStamped()

# models (yolo and camera)
self.yolo_model = YOLO(self.model_params['model_path']+'/best_'+self.model_params['mode']+'2.pt')
self.yolo_model.info()
self.camera_model = PinholeCameraModel()
self.bridge = CvBridge()

# timer to simulate tracking
self.image = None
self.classify_timer = self.create_timer(self.model_params['inference_frequency'], self.classify_callback)


def classify_callback(self):
""" Infers with yolo model and publishes objects positions """
if self.image is not None:
cv_image = self.bridge.imgmsg_to_cv2(self.image, desired_encoding='bgr8')
results = self.yolo_model.predict(source = cv_image,
conf = self.model_params['confidence_threshold'],
save = False,
verbose = False)

# publish bounding box
num_detections = results[0].obb.xywhr.shape[0]
self.get_logger().info(f'{num_detections} objects were detected')
for i in range(num_detections): #xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
if results[0].obb.cls[i] == 0:
self.sam_position_pub.publish(self.pixels2frame(results[0].obb.xywhr[i][0],
results[0].obb.xywhr[i][1],
self.drone_position.point.z,
self.model_params["frames.map"]))
self.get_logger().info('\n')

# publish annotated image as ros msg
im = results[0].plot()
ros_img = self.bridge.cv2_to_imgmsg(im, encoding = 'bgr8')
self.annotated_img_pub.publish(ros_img)

def pixels2frame(self, u:float, v:float, Z:float, frame: str) -> PointStamped:
"""
Apllies camera matrix to obtain object position in camera frame and transforms it to specified frame"""
camera_coord_norm = np.matmul(self.invK,np.array([u,v,1]))
camera_coord = camera_coord_norm*Z

pos = PointStamped()
pos.header.frame_id = self.model_params["frames.camera"]
pos.header.stamp = self.get_clock().now().to_msg()
pos.point.x = camera_coord[2] # x axis in camera frame is depth
pos.point.y = - camera_coord[0]
pos.point.z = camera_coord[1]

t = self.tf_buffer.lookup_transform(
target_frame = frame,
source_frame = pos.header.frame_id,
time = rclpy.time.Time())
#self.get_logger().info(f'In desired frame, position = {tf2_geometry_msgs.do_transform_point(pos, t).point.x,tf2_geometry_msgs.do_transform_point(pos, t).point.y}')

return tf2_geometry_msgs.do_transform_point(pos, t)

def remove_outliers(self) -> bool:
"""
TODO: implement simple outlier removel or kalman filter
"""
if len(self.prev_detections) < 5:
return True
else:
return False

def drone_position_callback(self, msg: Odometry):
""" Retrieve drone position (currently odometry gives in map_gt)"""
self.drone_position.point = msg.pose.pose.position
self.drone_position.header.stamp = self.get_clock().now().to_msg()
self.drone_position.header.frame_id = msg.header.frame_id

def image_callback(self, msg):
self.image = msg

def camera_info_callback(self, msg):
self.camera_model.fromCameraInfo(msg)

def get_params(self):
# Declare parameters
self.declare_parameter('mode', rclpy.Parameter.Type.STRING)
self.declare_parameter('inference_frequency', rclpy.Parameter.Type.DOUBLE)
self.declare_parameter('confidence_threshold', rclpy.Parameter.Type.DOUBLE)
self.declare_parameter('model_path', rclpy.Parameter.Type.STRING)

self.declare_parameter('camera.fov', rclpy.Parameter.Type.DOUBLE)

self.declare_parameter('real_dimensions.sam.width', rclpy.Parameter.Type.DOUBLE)
self.declare_parameter('real_dimensions.sam.height', rclpy.Parameter.Type.DOUBLE)

for mode in ['real', 'sim']:
self.declare_parameter(f'frames.{mode}.map', rclpy.Parameter.Type.STRING)
self.declare_parameter(f'frames.{mode}.quadrotor_odom', rclpy.Parameter.Type.STRING)
self.declare_parameter(f'frames.{mode}.sam_odom', rclpy.Parameter.Type.STRING)
self.declare_parameter(f'frames.{mode}.camera', rclpy.Parameter.Type.STRING)

self.declare_parameter(f'topics.sub.{mode}.raw_image', rclpy.Parameter.Type.STRING)
self.declare_parameter(f'topics.sub.{mode}.camera_info', rclpy.Parameter.Type.STRING)
self.declare_parameter(f'topics.sub.{mode}.drone_position', rclpy.Parameter.Type.STRING)

self.declare_parameter('topics.pub.annotated_image', rclpy.Parameter.Type.STRING)
self.declare_parameter('topics.pub.predicted_position.sam', rclpy.Parameter.Type.STRING)
self.declare_parameter('topics.pub.predicted_position.buoy', rclpy.Parameter.Type.STRING)

mode = self.get_parameter("mode").value
if mode not in ['real', 'sim']:
self.get_logger().warn(f"Invalid mode '{mode}' specified. Defaulting to 'sim'.")
mode = 'sim'

# retrieve params
self.model_params = {
"mode": self.get_parameter("mode").value,
"inference_frequency": self.get_parameter("inference_frequency").value,
"confidence_threshold": self.get_parameter("confidence_threshold").value,
"model_path": self.get_parameter("model_path").value,

"camera.fov": self.get_parameter("camera.fov").value,
"real_dimensions.sam.width": self.get_parameter("real_dimensions.sam.width").value,
"real_dimensions.sam.height": self.get_parameter("real_dimensions.sam.height").value,

"frames.map": self.get_parameter(f"frames.{mode}.map").value,
"frames.quadrotor_odom": self.get_parameter(f"frames.{mode}.quadrotor_odom").value,
"frames.sam_odom": self.get_parameter(f"frames.{mode}.sam_odom").value,
"frames.camera": self.get_parameter(f"frames.{mode}.camera").value,

"topics.sub.raw_image": self.get_parameter(f"topics.sub.{mode}.raw_image").value,
"topics.sub.camera_info": self.get_parameter(f"topics.sub.{mode}.camera_info").value,
"topics.sub.drone_position": self.get_parameter(f"topics.sub.{mode}.drone_position").value,

"topics.pub.annotated_image": self.get_parameter("topics.pub.annotated_image").value,
"topics.pub.predicted_position.sam": self.get_parameter("topics.pub.predicted_position.sam").value,
"topics.pub.predicted_position.buoy": self.get_parameter("topics.pub.predicted_position.buoy").value,
}


def main():
rclpy.init()
node = YOLODetector()

try:
while True and rclpy.ok():
rclpy.spin_once(node)
except KeyboardInterrupt:
return
pass



if __name__ == '__main__':
main()
Binary file added perception/auv_yolo_detector/best_real.pt
Binary file not shown.
Binary file added perception/auv_yolo_detector/best_sim.pt
Binary file not shown.
43 changes: 43 additions & 0 deletions perception/auv_yolo_detector/config/params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**/auv_yolo_detector:
ros__parameters:
mode: "sim"
inference_frequency: 1.0
confidence_threshold: 0.4
model_path: 'None'

camera:
fov: 82.0

real_dimensions:
sam:
width: 2.0
height: 16.0

frames:
real:
map: "map_gt"
quadrotor_odom: "Quadrotor/odom"
sam_odom: "sam_auv_v1/odom"
camera: "Quadrotor/camera"
sim:
map: "map_gt"
quadrotor_odom: "Quadrotor/odom"
sam_odom: "sam_auv_v1/odom"
camera: "Quadrotor/camera"

topics:
sub:
real:
raw_image: '/Quadrotor/gimbal_camera/image_raw'
camera_info: '/Quadrotor/gimbal_camera/camera_info'
drone_position: '/Quadrotor/smarc/odom' #/Quadrotor/smarc/odom
sim:
raw_image: /Quadrotor/core/fpcamera/image
camera_info: /Quadrotor/core/fpcamera/cam_info
drone_position: /Quadrotor/odom_gt #/Quadrotor/smarc/odom

pub:
annotated_image: 'image_annotated'
predicted_position:
sam: '/Quadrotor/pred_position/sam'
buoy: '/Quadrotor/pred_position/buoy'
47 changes: 47 additions & 0 deletions perception/auv_yolo_detector/launch/yolo_detector_launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from launch import LaunchDescription
from launch.actions import DeclareLaunchArgument
from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
from launch_ros.actions import Node
from launch_ros.substitutions import FindPackageShare

def generate_launch_description():

# ---- frequently changed params as launch arguments ...
mode_arg = DeclareLaunchArgument('mode', default_value='sim')
inference_frequency_arg = DeclareLaunchArgument('inference_frequency', default_value='0.5')
model_path_arg = DeclareLaunchArgument('model_path',
default_value = '/home/fm/KTH_Courses/ResearchProject/RProj_GitRepoFork/colcon_ws/src/smarc2/perception/auv_yolo_detector')

# ... and as node params
mode = LaunchConfiguration('mode')
inference_frequency = LaunchConfiguration('inference_frequency')
model_path = LaunchConfiguration('model_path')

# ---- rarely changed params from yaml (yaml has every parameter but launch arguments will override)
config_file = PathJoinSubstitution([
FindPackageShare('auv_yolo_detector'),
'config',
'params.yaml'
])

detector_node = Node(
package='auv_yolo_detector',
executable='auv_yolo_detector',
namespace='Quadrotor',
output='screen',
parameters=[
config_file,
{
'mode': mode,
'inference_frequency': inference_frequency,
'model_path': model_path
}
]
)

return LaunchDescription([
mode_arg,
inference_frequency_arg,
model_path_arg,
detector_node
])
18 changes: 18 additions & 0 deletions perception/auv_yolo_detector/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>auv_yolo_detector</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="franmira2003@gmail.com">fm</maintainer>
<license>TODO: License declaration</license>

<test_depend>ament_copyright</test_depend>
<test_depend>ament_flake8</test_depend>
<test_depend>ament_pep257</test_depend>
<test_depend>python3-pytest</test_depend>

<export>
<build_type>ament_python</build_type>
</export>
</package>
Empty file.
5 changes: 5 additions & 0 deletions perception/auv_yolo_detector/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[develop]
script_dir=$base/lib/auv_yolo_detector
[install]
install_scripts=$base/lib/auv_yolo_detector

Loading