Skip to content

[Kubectl-plugin] ray session --kill-all #3422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion kubectl-plugin/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/onsi/ginkgo/v2 v2.22.1
github.com/onsi/gomega v1.36.2
github.com/ray-project/kuberay/ray-operator v0.0.0
github.com/shirou/gopsutil/v4 v4.25.3
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.6
github.com/stretchr/testify v1.10.0
Expand All @@ -29,12 +30,14 @@ require (
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/chai2010/gettext-go v1.0.3 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/ebitengine/purego v0.8.2 // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
github.com/fatih/camelcase v1.0.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-errors/errors v1.5.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
Expand All @@ -43,7 +46,7 @@ require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
github.com/google/uuid v1.6.0 // indirect
Expand All @@ -53,6 +56,7 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/moby/spdystream v0.5.0 // indirect
Expand All @@ -65,9 +69,13 @@ require (
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/oauth2 v0.26.0 // indirect
golang.org/x/sync v0.11.0 // indirect
Expand Down
25 changes: 23 additions & 2 deletions kubectl-plugin/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 80 additions & 7 deletions kubectl-plugin/pkg/cmd/session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ package session
import (
"context"
"fmt"
"os"
"os/exec"
"strings"
"sync"
"time"

"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client"
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion"
"github.com/shirou/gopsutil/v4/process"
"github.com/spf13/cobra"
"k8s.io/cli-runtime/pkg/genericiooptions"
cmdutil "k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/util/templates"

"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client"
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion"
)

type appPort struct {
Expand All @@ -30,9 +33,13 @@ type SessionOptions struct {
ResourceName string
namespace string
Verbose bool
killAll bool
}

const reconnectDelay = 3 * time.Second
const (
reconnectDelay = 3 * time.Second
raySessionCommand = "kubectl-ray session"
)

var (
dashboardPort = appPort{
Expand Down Expand Up @@ -68,6 +75,9 @@ var (

# Forward local ports to the Ray cluster used for the RayService resource
kubectl ray session rayservice/my-rayservice

# Kill all existing Ray sessions started by kubectl-ray session command
kubectl ray session --kill-all
`)
)

Expand All @@ -87,8 +97,8 @@ func NewSessionCommand(cmdFactory cmdutil.Factory, streams genericiooptions.IOSt
Long: sessionLong,
Example: sessionExample,
Args: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
return cmdutil.UsageErrorf(cmd, "accepts 1 arg, received %d\n%s", len(args), cmd.Use)
if len(args) > 1 {
return cmdutil.UsageErrorf(cmd, "accepts at most 1 arg, received %d\n%s", len(args), cmd.Use)
}
return nil
},
Expand All @@ -102,11 +112,22 @@ func NewSessionCommand(cmdFactory cmdutil.Factory, streams genericiooptions.IOSt
}

cmd.Flags().BoolVarP(&options.Verbose, "verbose", "v", false, "verbose output")

cmd.Flags().BoolVarP(&options.killAll, "kill-all", "", false, "kill all existing Ray sessions started by kubectl-ray session command")
return cmd
}

func (options *SessionOptions) Complete(cmd *cobra.Command, args []string) error {
if options.killAll {
if len(args) > 0 {
return cmdutil.UsageErrorf(cmd, "accepts no args when killAll flag is set")
}
return nil
}

if len(args) == 0 {
return cmdutil.UsageErrorf(cmd, "killAll flag is not set, but no args provided")
}

Comment on lines +120 to +130
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Create another function called Validate and put the validation log there. You can use other files for reference. The order is Complete, Validate, Run.

namespace, err := cmd.Flags().GetString("namespace")
if err != nil {
return fmt.Errorf("failed to get namespace: %w", err)
Expand Down Expand Up @@ -153,6 +174,16 @@ func (options *SessionOptions) Complete(cmd *cobra.Command, args []string) error
}

func (options *SessionOptions) Run(ctx context.Context, factory cmdutil.Factory) error {
if options.killAll {
if options.Verbose {
fmt.Println("killAll flag is set, killing all existing Ray sessions...")
}
if err := killAllRaySessions(ctx, options.Verbose); err != nil {
return fmt.Errorf("failed to kill all ray sessions: %w", err)
}
return nil
}

k8sClient, err := client.NewClient(factory)
if err != nil {
return fmt.Errorf("failed to create client: %w", err)
Expand Down Expand Up @@ -216,3 +247,45 @@ func (options *SessionOptions) Run(ctx context.Context, factory cmdutil.Factory)
wg.Wait()
return nil
}

func killAllRaySessions(ctx context.Context, verbose bool) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test for this function?

procs, err := process.Processes()
if err != nil {
return fmt.Errorf("failed to get processes: %w", err)
}

for _, p := range procs {
cmdline, err := p.CmdlineWithContext(ctx)
if err != nil {
// Skip the process if we can't get its command line. It might be permission issue.
continue
}
if strings.Contains(cmdline, raySessionCommand) && int(p.Pid) != os.Getpid() {
if verbose {
fmt.Printf("Found ray session: %s\n", cmdline)
}
// Since ray session spawn child processes to run the actual commands,
// we need to kill all child processes first.
children, err := p.ChildrenWithContext(ctx)
if err != nil {
return fmt.Errorf("failed to get children of process %d: %w", p.Pid, err)
}
for _, child := range children {
if verbose {
fmt.Printf("Killing subprocess with PID %d\n", child.Pid)
}
if err := child.Kill(); err != nil {
return fmt.Errorf("failed to kill child process %d: %w", child.Pid, err)
}
}
// Then kill the parent process.
if verbose {
fmt.Printf("Killing process with PID %d\n", p.Pid)
}
if err := p.Kill(); err != nil {
return fmt.Errorf("failed to kill process %d: %w", p.Pid, err)
}
}
}
return nil
}
Loading