Skip to content

Commit 9571ca1

Browse files
make mappings more custom
1 parent 4c0f784 commit 9571ca1

File tree

6 files changed

+103
-36
lines changed

6 files changed

+103
-36
lines changed

.drone.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
workspace:
2-
base: /drone/src
3-
path: /usr/local/src/github.com/qnib/doxy
2+
base: /usr/local/
3+
path: src/github.com/qnib/doxy
44

55
pipeline:
66
alpine:

GPU.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,54 @@
22

33
As a Proof-of-Concept how to implement GPU (and InfiniBand) support, `doxy` was extended to allow for injection of payload to the `docker create` call.
44

5+
## CentOS7 box
6+
7+
### Check for CUDA devices
8+
9+
The BOX has 10 Tesla K80 devices (`/dev/nvidia[0-9]`).
10+
11+
```
12+
[root@odin001 ~]# nvidia-smi -L
13+
GPU 0: Tesla K80 (UUID: GPU-4095713a-1f9b-791d-841d-8b35143127d4)
14+
GPU 1: Tesla K80 (UUID: GPU-ab541226-7c4f-ab59-3927-1535f68a3a8f)
15+
GPU 2: Tesla K80 (UUID: GPU-8310202d-1d32-bac5-cc36-2add9e21d9d6)
16+
GPU 3: Tesla K80 (UUID: GPU-cb3d675d-ba3b-5cdb-2331-9534bfd20679)
17+
GPU 4: Tesla K80 (UUID: GPU-8f1511d6-5326-e718-c682-cd2377bbf7cf)
18+
GPU 5: Tesla K80 (UUID: GPU-997c3b02-765c-7cde-daff-f19dadeb6894)
19+
GPU 6: Tesla K80 (UUID: GPU-bb1a7162-859a-4e9c-eeac-150bd35ff767)
20+
GPU 7: Tesla K80 (UUID: GPU-9698fbda-39fd-5f1f-1691-626c7e780f36)
21+
GPU 8: Tesla K80 (UUID: GPU-243c49d3-5dc3-26cc-2f6e-42e2baf6ac93)
22+
GPU 9: Tesla K80 (UUID: GPU-0fc004de-3f1a-030a-3931-33043b895514)
23+
```
24+
25+
### Starting the proxy
26+
```
27+
[root@odin001 ~]# docker run -v /var/run:/var/run/ -ti --rm qnib/doxy:gpu doxy --pattern-key=hpc --debug --proxy-socket=/var/run/hpc.sock --gpu
28+
> execute CMD 'doxy --pattern-key=hpc --debug --proxy-socket=/var/run/hpc.sock --gpu'
29+
2018/02/25 00:07:17 [II] Start Version: 0.2.4
30+
2018/02/25 00:07:17 Error reading patterns file 'open /etc/doxy.pattern: no such file or directory', using hpc patterns
31+
2018/02/25 00:07:17 [doxy] Listening on /var/run/hpc.sock
32+
2018/02/25 00:07:17 Serving proxy on '/var/run/hpc.sock'
33+
[negroni] 2018-02-25T00:07:52Z | 200 | 843.548µs | docker | GET /_ping
34+
Add GPU stuff
35+
New device: /dev/nvidia0:/dev/nvidia0:rwm
36+
New device: /dev/nvidiactl:/dev/nvidiactl:rwm
37+
```
38+
39+
### Running a CUDA image:
40+
41+
Since passing through the log-API calls gave me some headache, the container is created using the proxy, but started using the proper docker API.
42+
43+
```
44+
$ docker start -ai $(docker -H unix:///var/run/hpc.sock create \
45+
-ti -e SKIP_ENTRYPOINTS=true \
46+
qnib/cplain-cuda nvidia-smi -L)
47+
[II] qnib/init-plain script v0.4.28
48+
> execute CMD 'nvidia-smi -L'
49+
GPU 0: Tesla K80 (UUID: GPU-4095713a-1f9b-791d-841d-8b35143127d4)
50+
```
51+
52+
## AWS p2.xlarge
553
On an `p2.xlarge` instance with CUDA support, `doxy:gpu` was started like that.
654

755
```bash

main.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ var (
6464
Usage: "Overwrite `--user` with given value (if pin-user is set)",
6565
EnvVar: "DOXY_USER",
6666
}
67+
cudaLibPathFlag = cli.StringFlag{
68+
Name: "cuda-lib-path",
69+
Usage: "Path to cuda libraries.",
70+
EnvVar: "DOXY_CUDA_LIB_PATH",
71+
}
6772
deviceFileFlag = cli.StringFlag{
6873
Name: "device-file",
6974
Value: proxy.DEVICE_FILE,
@@ -86,6 +91,8 @@ func EvalOptions(cfg *config.Config) (po []proxy.ProxyOption) {
8691
pinUser, _ := cfg.String("user")
8792
pinUserBool, _ := cfg.Bool("pin-user")
8893
po = append(po, proxy.WithPinUser(pinUserBool, pinUser))
94+
cudalibPath, _ := cfg.String("cuda-lib-path")
95+
po = append(po, proxy.WithCudaLibPath(cudalibPath))
8996
return
9097
}
9198

@@ -115,7 +122,7 @@ func EvalDevicesOpts(cfg *config.Config) (proxy.ProxyOption) {
115122
defer reader.Close()
116123
devices := []string{}
117124
if err != nil {
118-
return proxy.WithDevMappings(proxy.DEVICES)
125+
return proxy.WithDevMappings([]string{})
119126
}
120127
devices, err = proxy.ReadLineFile(reader)
121128
if err != nil {
@@ -158,6 +165,7 @@ func main() {
158165
bindAddFlag,
159166
pinUserBool,
160167
pinUserFlag,
168+
cudaLibPathFlag,
161169
}
162170
app.Action = RunApp
163171
app.Run(os.Args)

proxy/main.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ var (
3030
}
3131
HPC_PAT = []string{
3232
`^/(v\d\.\d+/)?containers(/\w+)?/(json|stats|top|logs|wait|create|start|run|kill)$`,
33-
`^/(v\d\.\d+/)?images(/\w+)?/(json|pull)$`,
33+
`^/(v\d\.\d+/)?images(/\w+)?/(json|pull|create)$`,
3434
`^/(v\d\.\d+/)?info$`,
3535
`^/(v\d\.\d+/)?images/(pull|create)$`,
3636
`^/(v\d\.\d+/)?version$`,
3737
"^/_ping$",
3838
}
39-
DEVICES = []string{
39+
GPUS = []string{
4040
"/dev/nvidia0:/dev/nvidia0:rwm",
41-
"/dev/nvidia-uvm:/dev/nvidia-uvm:rwm",
4241
"/dev/nvidiactl:/dev/nvidiactl:rwm",
4342
}
4443
PATTERNS = map[string][]string{
@@ -49,10 +48,10 @@ var (
4948

5049
type Proxy struct {
5150
po ProxyOptions
52-
dockerSocket, newSocket, pinUser string
53-
debug, gpu, pinUserEnabled bool
54-
patterns []string
55-
bindMounts,devMappings []string
51+
dockerSocket, newSocket, pinUser,cudaLibPath string
52+
debug, gpu, pinUserEnabled bool
53+
patterns []string
54+
bindMounts,devMappings []string
5655
}
5756

5857
func NewProxy(opts ...ProxyOption) Proxy {
@@ -71,6 +70,7 @@ func NewProxy(opts ...ProxyOption) Proxy {
7170
patterns: options.Patterns,
7271
bindMounts: options.BindMounts,
7372
devMappings: options.DevMappings,
73+
cudaLibPath: options.CudaLibPath,
7474
}
7575
}
7676

@@ -86,7 +86,7 @@ func (p *Proxy) GetOptions() map[string]interface{} {
8686
}
8787

8888
func (p *Proxy) Run() {
89-
upstream := NewUpstream(p.dockerSocket, p.patterns, p.bindMounts, p.devMappings, p.gpu, p.pinUser, p.pinUserEnabled)
89+
upstream := NewUpstream(p.dockerSocket, p.patterns, p.bindMounts, p.devMappings, p.gpu, p.pinUser, p.pinUserEnabled, p.cudaLibPath)
9090
sigc := make(chan os.Signal, 1)
9191
signal.Notify(sigc, os.Interrupt, os.Kill, syscall.SIGTERM)
9292
l, err := ListenToNewSock(p.newSocket, sigc)

proxy/options.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ package proxy
22

33

44
type ProxyOptions struct {
5-
DockerSocket,ProxySocket,PinUser string
6-
Debug,Gpu,PinUserEnabled bool
7-
Patterns,BindMounts,DevMappings []string
5+
DockerSocket,ProxySocket,PinUser,CudaLibPath string
6+
Debug,Gpu,PinUserEnabled bool
7+
Patterns,BindMounts,DevMappings []string
88
}
99

1010
var defaultProxyOptions = ProxyOptions{
@@ -14,6 +14,7 @@ var defaultProxyOptions = ProxyOptions{
1414
PinUserEnabled: false,
1515
Debug: false,
1616
Gpu: false,
17+
CudaLibPath: "",
1718
Patterns: []string{},
1819
BindMounts: []string{},
1920
DevMappings: []string{},
@@ -32,6 +33,11 @@ func WithDockerSocket(s string) ProxyOption {
3233
o.DockerSocket = s
3334
}
3435
}
36+
func WithCudaLibPath(s string) ProxyOption {
37+
return func(o *ProxyOptions) {
38+
o.CudaLibPath = s
39+
}
40+
}
3541

3642
func WithProxySocket(s string) ProxyOption {
3743
return func(o *ProxyOptions) {

proxy/proxy.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ import (
2121

2222
// UpStream creates upstream handler struct
2323
type UpStream struct {
24-
Name string
25-
proxy http.Handler
24+
Name string
25+
proxy http.Handler
2626
// TODO: Kick out separat config options and use more generic one
27-
allowed []*regexp.Regexp
28-
bindMounts []string
29-
devMappings []string
30-
gpu bool
31-
pinUser string
32-
pinUserEnabled bool
27+
allowed []*regexp.Regexp
28+
bindMounts []string
29+
devMappings []string
30+
gpu bool
31+
pinUser,cudaLibPath string
32+
pinUserEnabled bool
3333
}
3434

3535
// UnixSocket just provides the path, so that I can test it
@@ -81,11 +81,12 @@ func NewUpstreamPO(po ProxyOptions) *UpStream {
8181
gpu: po.Gpu,
8282
pinUser: po.PinUser,
8383
pinUserEnabled: po.PinUserEnabled,
84+
cudaLibPath: po.CudaLibPath,
8485
}
8586
return upstream
8687
}
8788
// NewUpstream returns a new socket (magic)
88-
func NewUpstream(socket string, regs []string, binds []string, devs []string, gpu bool, pinUser string, pinUserB bool) *UpStream {
89+
func NewUpstream(socket string, regs []string, binds []string, devs []string, gpu bool, pinUser string, pinUserB bool, cudaLibPath string) *UpStream {
8990
us := NewUnixSocket(socket)
9091
a := []*regexp.Regexp{}
9192
for _, r := range regs {
@@ -101,6 +102,7 @@ func NewUpstream(socket string, regs []string, binds []string, devs []string, gp
101102
gpu: gpu,
102103
pinUser: pinUser,
103104
pinUserEnabled: pinUserB,
105+
cudaLibPath: cudaLibPath,
104106
}
105107
return upstream
106108
}
@@ -151,16 +153,30 @@ func (u *UpStream) ServeHTTP(w http.ResponseWriter, req *http.Request) {
151153
}
152154
// prepare devMappings
153155
devMappings := []string{}
154-
for _, dev := range u.devMappings {
156+
for _, dev := range GPUS {
155157
devMappings = append(devMappings, dev)
156158
}
157159
// In case GPU support is enabled add devices and mounts
158160
if u.gpu {
159161
fmt.Println("Add GPU stuff")
160162
// TODO: Be smarter about the version of the driver
161-
hostConfig.Binds = append(hostConfig.Binds, "/usr/lib/nvidia-384/:/usr/local/nvidia/")
163+
if u.cudaLibPath != "" {
164+
hostConfig.Binds = append(hostConfig.Binds, fmt.Sprintf("%s:/usr/local/nvidia/", u.cudaLibPath))
165+
}
162166
config.Env = append(config.Env, "PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
163167
config.Env = append(config.Env, "LD_LIBRARY_PATH=/usr/local/nvidia/")
168+
for _, dev := range devMappings {
169+
if dev == "" {
170+
continue
171+
}
172+
fmt.Printf("New device: %s\n", dev)
173+
174+
dm, err := createDevMapping(dev)
175+
if err != nil {
176+
continue
177+
}
178+
hostConfig.Devices = append(hostConfig.Devices, dm)
179+
}
164180
}
165181
if u.pinUserEnabled {
166182
fmt.Print("Alter User setting ")
@@ -184,18 +200,7 @@ func (u *UpStream) ServeHTTP(w http.ResponseWriter, req *http.Request) {
184200
fmt.Printf("New bindmount: %s\n", bMount)
185201
hostConfig.Binds = append(hostConfig.Binds, bMount)
186202
}
187-
for _, dev := range devMappings {
188-
if dev == "" {
189-
continue
190-
}
191-
fmt.Printf("New device: %s\n", dev)
192203

193-
dm, err := createDevMapping(dev)
194-
if err != nil {
195-
continue
196-
}
197-
hostConfig.Devices = append(hostConfig.Devices, dm)
198-
}
199204
fmt.Printf("Mounts: %v\n", hostConfig.Binds)
200205
cfgBody := configWrapper{
201206
Config: config,

0 commit comments

Comments
 (0)