@@ -90,6 +90,85 @@ def parse_test_log(
9090 return startup_time , failure_info
9191
9292
93+ def parse_timestamp (timestamp_str : str ) -> Optional [datetime ]:
94+ """
95+ Robustly parse timestamp with multiple format attempts.
96+
97+ Args:
98+ timestamp_str: Timestamp string to parse
99+
100+ Returns:
101+ datetime object or None if parsing fails
102+ """
103+ # List of common timestamp formats to try
104+ timestamp_formats = [
105+ "%Y-%m-%dT%H:%M:%S.%fZ" , # Full format with microseconds and Z
106+ "%Y-%m-%dT%H:%M:%SZ" , # Without microseconds, with Z
107+ "%Y-%m-%dT%H:%M:%S.%f" , # With microseconds, no timezone
108+ "%Y-%m-%dT%H:%M:%S" , # Basic ISO format
109+ "%Y-%m-%d %H:%M:%S.%f" , # Space separator with microseconds
110+ "%Y-%m-%d %H:%M:%S" , # Space separator without microseconds
111+ ]
112+
113+ for fmt in timestamp_formats :
114+ try :
115+ return datetime .strptime (timestamp_str , fmt )
116+ except ValueError :
117+ continue
118+
119+ # If no format matches, log the issue
120+ logging .debug (f"Could not parse timestamp: { timestamp_str } " )
121+ return None
122+
123+
124+ def extract_timestamp_from_log (
125+ log_path : str , from_end : bool = False , max_lines : int = 10 , debug_message : str = ""
126+ ) -> Optional [datetime ]:
127+ """
128+ Extract a timestamp from a log file by parsing JSON lines.
129+
130+ Args:
131+ log_path: Path to the log file
132+ from_end: If True, search from the end of file (for last timestamp)
133+ If False, search from beginning (for first timestamp)
134+ max_lines: Maximum number of lines to check
135+ debug_message: Debug message to log when timestamp is found
136+
137+ Returns:
138+ datetime object or None if no valid timestamp found
139+ """
140+ try :
141+ with open (log_path , "r" ) as f :
142+ lines = list (f .readlines ())
143+ if from_end :
144+ # Read from the end of the file
145+ lines_to_check = list (reversed (lines ))
146+ else :
147+ # Read from the beginning of the file
148+ lines_to_check = lines
149+ # Limit to max_lines
150+ lines_to_check = lines_to_check [:max_lines ]
151+
152+ for line in lines_to_check :
153+ if '"time":"' in line :
154+ try :
155+ log_entry = json .loads (line )
156+ timestamp_str = log_entry .get ("time" , "" )
157+ if timestamp_str :
158+ parsed_time = parse_timestamp (timestamp_str )
159+ if parsed_time :
160+ if debug_message :
161+ logging .debug (f"{ debug_message } : { timestamp_str } " )
162+ return parsed_time
163+ except (json .JSONDecodeError , ValueError ) as e :
164+ logging .debug (f"Failed to parse JSON line: { e } " )
165+ continue
166+ except IOError as e :
167+ logging .debug (f"Could not read { log_path } : { e } " )
168+
169+ return None
170+
171+
93172def calculate_recovery_time (
94173 failure_info : Optional [List [str ]],
95174 process_logs_dir : str ,
@@ -111,8 +190,10 @@ def calculate_recovery_time(
111190 # Determine component type from failure info (strip any quotes)
112191 component_type = failure_info [0 ].strip ("'\" " ) # e.g., "Frontend" or "decode"
113192 component_dir = os .path .join (process_logs_dir , component_type )
193+ logging .info (f"Component directory: { component_dir } " )
114194
115195 if not os .path .exists (component_dir ):
196+ logging .warning (f"Component directory { component_dir } does not exist" )
116197 return None
117198
118199 last_timestamp_before = None
@@ -122,41 +203,31 @@ def calculate_recovery_time(
122203 for log_file in os .listdir (component_dir ):
123204 if log_file .endswith (".previous.log" ):
124205 log_path = os .path .join (component_dir , log_file )
125- try :
126- with open (log_path , "r" ) as f :
127- # Read last few lines to find last valid timestamp
128- lines = f .readlines ()
129- for line in reversed (lines [- 10 :]): # Check last 10 lines
130- if '"time":"' in line :
131- try :
132- log_entry = json .loads (line )
133- timestamp_str = log_entry .get ("time" , "" )[:19 ]
134- last_timestamp_before = datetime .strptime (
135- timestamp_str , "%Y-%m-%dT%H:%M:%S"
136- )
137- break
138- except (json .JSONDecodeError , ValueError ):
139- continue
140- except IOError as e :
141- logging .debug (f"Could not read { log_file } : { e } " )
206+ logging .info (f"Previous pod log path: { log_path } " )
207+ last_timestamp_before = extract_timestamp_from_log (
208+ log_path ,
209+ from_end = True ,
210+ max_lines = 50 , # Check more lines for better chance of finding timestamp
211+ debug_message = "Last timestamp before failure" ,
212+ )
213+ if last_timestamp_before :
214+ break
142215
143216 # Find the first timestamp from current container log
144217 for log_file in os .listdir (component_dir ):
145218 if log_file .endswith (".log" ) and not log_file .endswith (
146219 (".previous.log" , ".metrics.log" )
147220 ):
148221 log_path = os .path .join (component_dir , log_file )
149- try :
150- with open (log_path , "r" ) as f :
151- first_line = f .readline ()
152- if first_line and '"time":"' in first_line :
153- log_entry = json .loads (first_line )
154- timestamp_str = log_entry .get ("time" , "" )[:19 ]
155- first_timestamp_after = datetime .strptime (
156- timestamp_str , "%Y-%m-%dT%H:%M:%S"
157- )
158- except (json .JSONDecodeError , ValueError , IOError ) as e :
159- logging .debug (f"Could not parse timestamp from { log_file } : { e } " )
222+ logging .info (f"Pod log path: { log_path } " )
223+ first_timestamp_after = extract_timestamp_from_log (
224+ log_path ,
225+ from_end = False ,
226+ max_lines = 100 , # May need to skip initial non-JSON output
227+ debug_message = "First timestamp after recovery" ,
228+ )
229+ if first_timestamp_after :
230+ break
160231
161232 # Calculate recovery time from container timestamps (both in UTC)
162233 if last_timestamp_before and first_timestamp_after :
0 commit comments