@@ -19,19 +19,23 @@ package main
1919import (
2020 "crypto/tls"
2121 "encoding/json"
22+ "fmt"
2223 "log"
24+ "net"
2325 "os"
2426 "strconv"
2527 "strings"
2628
2729 "github.com/joho/godotenv"
30+ "gopkg.in/yaml.v3"
2831)
2932
3033const (
3134 DefaultListenerPort = "8080"
3235 DefaultMetricsPort = "2112"
3336 DefaultUseSSL = "false"
3437 DefaultSeverityConfig = "Fatal,Critical,Informational"
38+ NodeDrainPolicyFile = "nodeDrainPolicy.json"
3539)
3640
3741type Config struct {
@@ -56,11 +60,19 @@ type Config struct {
5660 SlurmDrainExcludeStr string
5761 SubscriptionPayload SubscriptionPayload
5862 RedfishServers []RedfishServer
59- TriggerEvents [] TriggerEvent
63+ TriggerEvents map [ string ] map [ string ][] EventInfo //map[Severity][MessageRegistry.MessageId][]EventInfo
6064 PrometheusConfig PrometheusConfig
6165 context * tls.Config
6266 eventCount int
6367 dataBuffer []byte
68+ TlsTimeOut string
69+ }
70+
71+ type EventInfo struct {
72+ UniqueString string
73+ Category string
74+ Subcategory string
75+ DrainReasonPrefix string
6476}
6577
6678type TriggerEvent struct {
@@ -70,11 +82,27 @@ type TriggerEvent struct {
7082 DrainReasonPrefix string `json:"DrainReasonPrefix"`
7183}
7284
85+ type TriggerEventsInfo struct {
86+ Category string `json:"Category"`
87+ Subcategory string `json:"Subcategory"`
88+ MessageRegistry string `json:"MessageRegistry"`
89+ MessageId string `json:"MessageId"`
90+ UniqueString string `json:"UniqueString"`
91+ Severity string `json:"Severity"`
92+ DrainReasonPrefix string `json:"DrainReasonPrefix"`
93+ Enable bool `json:"Enable"`
94+ }
95+
7396type PrometheusConfig struct {
7497 Severity []string `json:"Severity"`
7598}
7699
77- func setupConfig () Config {
100+ type target struct {
101+ Targets []string `yaml:"targets"`
102+ Labels map [string ]string `yaml:"labels"`
103+ }
104+
105+ func setupConfig (targetFile string ) Config {
78106 // Load .env file
79107 err := godotenv .Load ()
80108 if err != nil {
@@ -125,20 +153,13 @@ func setupConfig() Config {
125153 AppConfig .SlurmUser = os .Getenv ("SLURM_USER" )
126154 AppConfig .SlurmDrainExcludeStr = os .Getenv ("SLURM_DRAIN_EXCLUDE_REASON_LIST" )
127155 AppConfig .SlurmScontrolPath = os .Getenv ("SLURM_SCONTROL_PATH" )
156+ AppConfig .TlsTimeOut = os .Getenv ("TLS_TIMEOUT" )
128157
129158 subscriptionPayloadJSON := os .Getenv ("SUBSCRIPTION_PAYLOAD" )
130159 if err := json .Unmarshal ([]byte (subscriptionPayloadJSON ), & AppConfig .SubscriptionPayload ); err != nil {
131160 log .Fatalf ("Failed to parse SUBSCRIPTION_PAYLOAD: %v" , err )
132161 }
133162
134- triggerEventsJSON := os .Getenv ("TRIGGER_EVENTS" )
135- if triggerEventsJSON != "" {
136- err = json .Unmarshal ([]byte (triggerEventsJSON ), & AppConfig .TriggerEvents )
137- if err != nil {
138- log .Fatalf ("Failed to unmarshal TRIGGER_EVENTS: %v" , err )
139- }
140- }
141-
142163 prometheusConfigJSON := os .Getenv ("PROMETHEUS_CONFIG" )
143164 if prometheusConfigJSON != "" {
144165 err = json .Unmarshal ([]byte (prometheusConfigJSON ), & AppConfig .PrometheusConfig )
@@ -154,10 +175,113 @@ func setupConfig() Config {
154175 redfishServersJSON := os .Getenv ("REDFISH_SERVERS" )
155176 if redfishServersJSON == "" {
156177 log .Println ("REDFISH_SERVERS environment variable is not set or is empty" )
178+ } else {
179+ if err := json .Unmarshal ([]byte (redfishServersJSON ), & AppConfig .RedfishServers ); err != nil {
180+ log .Fatalf ("Failed to parse REDFISH_SERVERS: %v" , err )
181+ }
182+ }
183+
184+ // Read the node drain policy config file
185+ nodeDrainPolicyConfig , err := os .ReadFile (NodeDrainPolicyFile )
186+
187+ if err != nil {
188+ log .Fatalf ("Failed to read: %v" , NodeDrainPolicyFile )
189+ }
190+
191+ triggerEventsInfo := []TriggerEventsInfo {}
192+ err = json .Unmarshal (nodeDrainPolicyConfig , & triggerEventsInfo )
193+ if err != nil {
194+ log .Fatalf ("Failed to unmarshal file: %v | err: %v" , NodeDrainPolicyFile , err )
195+ }
196+
197+ tInfoMap := map [string ]map [string ][]EventInfo {}
198+
199+ for _ , evt := range triggerEventsInfo {
200+ fmt .Printf ("Trigger Event: %+v\n " , evt )
201+ if evt .Enable != true {
202+ continue
203+ }
204+ eInfo := EventInfo {}
205+ eInfo .Category = evt .Category
206+ eInfo .Subcategory = evt .Subcategory
207+ eInfo .DrainReasonPrefix = evt .DrainReasonPrefix
208+ eInfo .UniqueString = evt .UniqueString
209+ key := ""
210+ if evt .MessageRegistry == "" {
211+ key = evt .MessageId
212+ } else {
213+ key = evt .MessageRegistry + "." + evt .MessageId
214+ }
215+ if ee , ok := tInfoMap [evt .Severity ]; ! ok {
216+ eInfoMap := map [string ][]EventInfo {}
217+ eInfoMap [key ] = []EventInfo {eInfo }
218+ tInfoMap [evt .Severity ] = eInfoMap
219+ } else {
220+ ee [key ] = append (ee [key ], eInfo )
221+ }
222+ }
223+
224+ AppConfig .TriggerEvents = tInfoMap
225+
226+ for kk , tt := range AppConfig .TriggerEvents {
227+ fmt .Println ("Severity: " , kk )
228+ for kkk , ttt := range tt {
229+ fmt .Println ("key: " , kkk )
230+ fmt .Printf ("event: %+v\n " , ttt )
231+ }
232+ }
233+
234+ // Read and parse the REDFISH_SERVERS_COMMON_CONFIG environment variable
235+ redfishServersCommonConfigJSON := os .Getenv ("REDFISH_SERVERS_COMMON_CONFIG" )
236+ if redfishServersCommonConfigJSON == "" {
237+ log .Println ("redfishServersCommonConfigJSON environment variable is not set or is empty" )
238+ return AppConfig
239+ }
240+ redfishServersCommonConfig := RedfishServersCommongConfig {}
241+ if err := json .Unmarshal ([]byte (redfishServersCommonConfigJSON ), & redfishServersCommonConfig ); err != nil {
242+ log .Fatalf ("Failed to parse REDFISH_SERVERS_COMMON_CONFIG: %v" , err )
243+ }
244+
245+ if targetFile == "" {
246+ log .Println ("No target file provided" )
157247 return AppConfig
158248 }
159- if err := json .Unmarshal ([]byte (redfishServersJSON ), & AppConfig .RedfishServers ); err != nil {
160- log .Fatalf ("Failed to parse REDFISH_SERVERS: %v" , err )
249+
250+ targetYamlFile , err := os .ReadFile (targetFile )
251+
252+ if err != nil {
253+ log .Fatalf ("Failed to read file: %v" , targetFile )
254+ }
255+
256+ targets := []target {}
257+
258+ err = yaml .Unmarshal (targetYamlFile , & targets )
259+
260+ if err != nil {
261+ log .Fatalf ("Error parsing target file: %v | err: %v" , targetFile , err )
262+ }
263+
264+ for _ , t := range targets {
265+ log .Println ("target: " , t .Targets )
266+
267+ for _ , hostName := range t .Targets {
268+ // add this target to Redfish servers
269+ server := RedfishServer {}
270+ bmcHost := fmt .Sprintf (hostName + ".%v" , redfishServersCommonConfig .HostSuffix )
271+ ips , err := net .LookupIP (bmcHost )
272+ if err != nil || len (ips ) == 0 {
273+ log .Printf ("[error] Couldn't get the IP for host: %v | ips: %v | err: %v" , bmcHost , ips , err )
274+ continue
275+ }
276+ log .Println ("IPs: " , ips )
277+
278+ server .IP = fmt .Sprintf ("https://%v" , ips [0 ])
279+ server .LoginType = "Session"
280+ server .Username = redfishServersCommonConfig .UserName
281+ server .Password = redfishServersCommonConfig .Password
282+ server .SlurmNode = hostName
283+ AppConfig .RedfishServers = append (AppConfig .RedfishServers , server )
284+ }
161285 }
162286
163287 return AppConfig
0 commit comments