Tidegate-Analysis-App/visualization.py at main · cdavisv/Tidegate-Analysis-App · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
"""
Visualization and reporting module for wildlife detection analyses.

This module generates publication-quality and exploratory visualizations for
all stages of the analysis pipeline, including species summaries, environmental
effects, gate interactions, tidal cycle dynamics, and hypothesis testing outputs.

Features:
- Interactive Plotly visualizations (HTML + optional PNG export)
- Safe plotting with data availability checks
- Visualization of both descriptive and inferential analysis results
- Hypothesis-driven tidal cycle diagrams
- Heatmaps, bar charts, time series, scatter plots, and polar plots

All plots are saved to disk to support reproducibility, reporting, and external
review.
"""


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
import os


# --- Add this helper function at the top of visualization.py ---
def save_plot(fig, filename):
    """Helper function to save a plot as both HTML and PNG."""
    # Create an 'output' directory if it doesn't exist
    output_dir = 'output_plots'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Define file paths
    html_path = os.path.join(output_dir, f"{filename}.html")
    png_path = os.path.join(output_dir, f"{filename}.png")

    # Save the plot
    fig.write_html(html_path)
    print(f"   -> Saved interactive plot to: {html_path}")

    try:
        fig.write_image(png_path, scale=2)
        print(f"   -> Saved static image to: {png_path}")
    except Exception as e:
        print(f"   -> Could not save PNG. Is 'kaleido' installed? (pip install kaleido). Error: {e}")


def plot_environmental_factors(mtr_gate_df, hinge_gate_df, tidal_df, temp_df):
    """
    Generates and saves plots for the environmental analysis summaries.
    Updated with correct titles reflecting Wildlife Detection Efficiency Analysis.
    """
    print("\n--- Generating Environmental Factor Visualizations ---")
    figures = {}

    if mtr_gate_df is not None and not mtr_gate_df.empty:
        print(" -> Plotting: Wildlife Detection Success Rate by MTR Gate Angle")
        fig_gate_mtr = px.bar(
            mtr_gate_df, x=mtr_gate_df.index, y='Detection_Rate_Pct',
            title='Wildlife Detection Success Rate by MTR Gate Opening Angle<br><sub>Shows detection efficiency when cameras were active at each gate position</sub>',
            labels={'x': 'MTR Gate State', 'y': 'Detection Success Rate (%)'},
            color='Detection_Rate_Pct', color_continuous_scale='Viridis'
        )
        save_plot(fig_gate_mtr, "2a_mtr_gate_detection_rate")
        figures['mtr_gate_detection_rate'] = fig_gate_mtr

    if hinge_gate_df is not None and not hinge_gate_df.empty:
        print(" -> Plotting: Wildlife Detection Success Rate by Top Hinge Gate Angle")
        fig_gate_hinge = px.bar(
            hinge_gate_df, x=hinge_gate_df.index, y='Detection_Rate_Pct',
            title='Wildlife Detection Success Rate by Top Hinge Gate Opening Angle<br><sub>Shows detection efficiency when cameras were active at each hinge position</sub>',
            labels={'x': 'Top Hinge Gate State', 'y': 'Detection Success Rate (%)'},
            color='Detection_Rate_Pct', color_continuous_scale='Plasma'
        )
        save_plot(fig_gate_hinge, "2b_hinge_gate_detection_rate")
        figures['hinge_gate_detection_rate'] = fig_gate_hinge

    if tidal_df is not None and not tidal_df.empty:
        print(" -> Plotting: Wildlife Detection Success Rate by Tidal Level")
        fig_tidal = px.bar(
            tidal_df, x=tidal_df.index, y='Detection_Rate_Pct',
            title='Wildlife Detection Success Rate by Tidal Level<br><sub>Shows detection efficiency when cameras were active at each tidal condition</sub>',
            labels={'x': 'Tidal Level', 'y': 'Detection Success Rate (%)'}
        )
        save_plot(fig_tidal, "2c_tidal_level_detection_rate")
        figures['tidal_level_detection_rate'] = fig_tidal

    return figures


def create_safe_water_visualizations(df, title_suffix=""):
    """
    Creates time series plots for available water quality parameters.
    This version is safer and checks for data before plotting.
    """
    print("\n--- Checking for Water Quality Time Series Plots ---")

    water_params_to_plot = {
        'Water_Temp_C': ('Water Temperature (°C)', 'blue'),
        'pH': ('pH', 'green'),
        'DO_mgL': ('Dissolved Oxygen (mg/L)', 'cyan'),
        'Salinity_psu': ('Salinity (psu)', 'orange'),
        'Turbidity_NTU': ('Turbidity (NTU)', 'brown'),
        'Depth': ('Water Depth (m)', 'purple'),
    }

    available_params = {k: v for k, v in water_params_to_plot.items() if k in df.columns and df[k].notna().any()}

    if not available_params:
        print(" -> Skipping water quality plots: No relevant data columns found.")
        return {}

    figures = {}
    print(f" -> Plotting: Water Quality Parameters Over Time {title_suffix}")
    for param, (label, color) in available_params.items():
        fig = px.line(df, x='DateTime', y=param, title=label, labels={'y': label}, markers=True)
        fig.update_traces(line_color=color, line_width=1.5, marker=dict(size=4))
        save_plot(fig, f"3_water_quality_{param}{title_suffix}")
        figures[f'water_quality_{param}'] = fig

    return figures


def create_analysis_plots(df_combined, species_df):
    """
    Creates a dashboard of comprehensive analysis plots.
    This version is safer and checks for columns before plotting.
    """
    print("\n--- Checking for Comprehensive Analysis Plots ---")
    if df_combined.empty:
        print(" -> Skipping comprehensive plots: Combined DataFrame is empty.")
        return

    if not species_df.empty and 'Species' in species_df.columns:
        print(" -> Plotting: Top 10 Species by Detection Events")
        top_species = species_df['Species'].value_counts().nlargest(10)
        fig_species = px.bar(top_species, x=top_species.index, y=top_species.values, title='Top 10 Species by Detection Events', labels={'y': 'Number of Detections', 'x': 'Species'})
        save_plot(fig_species, "4a_top_10_species_events")

    wq_corr_cols = [col for col in ['Water_Temp_C', 'pH', 'DO_mgL', 'Salinity_psu', 'Turbidity_NTU', 'Depth'] if col in df_combined.columns]
    if len(wq_corr_cols) > 1:
        print(" -> Plotting: Water Quality Correlation Matrix")
        corr_matrix = df_combined[wq_corr_cols].corr()
        fig_corr = px.imshow(corr_matrix, text_auto=True, title='Water Quality Correlation Matrix', aspect="auto")
        save_plot(fig_corr, "4b_water_quality_correlation")

def plot_gate_analysis(gate_analysis_df):
    """Visualizes detection rates by tide gate state."""
    fig = px.bar(
        gate_analysis_df,
        x=gate_analysis_df.index,
        y='Detection_Rate_Pct',
        title='Wildlife Detection Success Rate by Gate Opening Angle',
        labels={'x': 'Gate State', 'y': 'Detection Success Rate (%)'},
        color='Detection_Rate_Pct',
        color_continuous_scale='Viridis'
    )
    save_plot(fig, "2_gate_analysis_detection_rate")


def plot_bird_analysis(summary_table, combined_df):
    """
    Creates detailed visualizations for the wildlife behavior analysis.
    """
    print("\n--- Generating Wildlife Behavior Visualizations ---")

    figures = {}
    wildlife_detections_df = combined_df[combined_df['is_animal_detection']].copy()
    if wildlife_detections_df.empty:
        print(" -> Skipping wildlife plots: No wildlife detections to visualize.")
        return figures

    if not summary_table.empty:
        print(" -> Plotting: Heatmap of Wildlife Detection 'Hot-Spots'")
        fig_heatmap = px.imshow(
            summary_table,
            text_auto=True,
            labels=dict(x="Tidal Flow State", y="Gate Status", color="Detection Rate (%)"),
            title="<b>Wildlife Detection Rate (%) by Gate Status and Tidal Flow</b>"
        )
        fig_heatmap.update_xaxes(side="top")
        save_plot(fig_heatmap, "5a_wildlife_detection_heatmap")
        figures['wildlife_detection_heatmap'] = fig_heatmap

    if 'Gate_Opening_MTR_Deg' in combined_df.columns and 'tidal_change_m_hr' in combined_df.columns:
        print(" -> Plotting: Granular Scatter Plot of Detections")

        plot_df = combined_df.sample(n=5000) if len(combined_df) > 5000 else combined_df
        plot_df = plot_df.reset_index()

        fig_scatter = px.scatter(
            plot_df,
            x="Gate_Opening_MTR_Deg",
            y="tidal_change_m_hr",
            color="is_animal_detection",
            color_discrete_map={True: "red", False: "rgba(200, 200, 200, 0.2)"},
            title="<b>Wildlife Detections vs. Gate Angle and Tidal Change</b>",
            labels={
                "Gate_Opening_MTR_Deg": "Gate Opening Angle (Degrees)",
                "tidal_change_m_hr": "Tidal Change (meters/hour)",
                "is_animal_detection": "Wildlife Detected?"
            },
            hover_data=['DateTime']
        )
        fig_scatter.add_hline(y=0, line_dash="dash", annotation_text="Slack Tide")
        fig_scatter.add_vline(x=5, line_dash="dash", annotation_text="Gate Closed")
        save_plot(fig_scatter, "5b_wildlife_detection_scatter")
        figures['wildlife_detection_scatter'] = fig_scatter

    return figures


def plot_species_analysis(species_summary_df):
    """
    Generates and saves a bar plot for the species analysis summary.
    """
    if species_summary_df is None or species_summary_df.empty:
        return None

    print("\n--- Generating Species Analysis Visualization ---")
    print(" -> Plotting: Top 15 Species by Total Count")

    plot_df = species_summary_df.head(15)

    fig = px.bar(
        plot_df,
        x=plot_df.index,
        y='Total_Count',
        title='Top 15 Species by Total Individual Count<br><sub>Based on Wildlife Detection Efficiency Analysis</sub>',
        labels={'x': 'Species', 'y': 'Total Individual Count'},
        color='Detection_Events',
        color_continuous_scale=px.colors.sequential.Viridis,
        hover_name=plot_df.index,
        hover_data={'Detection_Events': True, 'Total_Count': True}
    )
    save_plot(fig, "1_species_summary")
    return fig

def _plot_tide_cycle_visualization(title, peak_gate_state, peak_tidal_state):
    """
    Helper function to generate and save a stylized plot of the tide cycle
    highlighting the point of peak wildlife activity.
    """
    time = np.linspace(0, 12, 300)
    depth = np.sin(time * np.pi / 6 - np.pi / 2)

    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(time, depth, color='skyblue', linewidth=2)
    ax.fill_between(time, depth, -1, color='skyblue', alpha=0.3)

    positions = {
        'Low Slack': (0, -1),
        'Rising': (3, 0),
        'High Slack': (6, 1),
        'Falling': (9, 0)
    }

    for state, (t, d) in positions.items():
        ax.text(t, d + 0.15 if d >= 0 else d - 0.2, state, ha='center', fontsize=12, weight='bold')

    if peak_tidal_state in positions:
        peak_time, peak_depth = positions[peak_tidal_state]

        ax.plot(peak_time, peak_depth, '*', markersize=20, color='gold', markeredgecolor='black')
        ax.annotate(
            f"Peak Activity:\n{peak_gate_state}",
            xy=(peak_time, peak_depth),
            xytext=(peak_time, peak_depth + 0.5),
            ha='center',
            fontsize=11,
            arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=8),
            bbox=dict(boxstyle="round,pad=0.5", fc="gold", ec="black", lw=1, alpha=0.8)
        )

    ax.set_title(f"Peak Wildlife Activity Condition: {title}", fontsize=16, weight='bold')
    ax.set_xlabel("Tidal Cycle (~12 hours)")
    ax.set_ylabel("Relative Water Level")
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_ylim(-1.5, 2.0)
    sns.despine(left=True, bottom=True)
    plt.tight_layout()

    filename = f"hypothesis_visualization_{title.replace(' ', '_')}.png"
    plt.savefig(filename)
    print(f" -> Saved hypothesis visualization to '{filename}'")
    plt.close()


def create_hypothesis_visualizations(df):
    """
    Generates and saves visualizations for each of the HYPOTHESIS TEST outputs,
    showing where peak wildlife activity occurs in the tidal cycle.
    """
    print("\n\n--- Generating Hypothesis Visualizations ---")
    if 'detailed_tidal_flow' not in df.columns or 'is_animal_detection' not in df.columns:
        print(" -> Skipping hypothesis visualizations: Required columns not found.")
        return

    analyses = {
        "MTR Gate": "Gate_Opening_MTR_Deg_category",
        "Top Hinge Gate": "Gate_Opening_Top_Hinge_Deg_category",
        "Combined Gate (Simple)": "simple_gate_category",
        "Combined Gate (Specific Combos)": "specific_gate_combo"
    }

    for title, gate_col in analyses.items():
        if gate_col not in df.columns:
            print(f" -> Skipping '{title}' visualization: Column '{gate_col}' not found.")
            continue

        summary_table = (
            df.groupby([gate_col, 'detailed_tidal_flow'])['is_animal_detection']
            .mean()
            .unstack()
            .fillna(0)
        )

        if summary_table.empty:
            continue

        peak_gate_state = summary_table.max(axis=1).idxmax()
        peak_tidal_state = summary_table.max(axis=0).idxmax()

        if title == "Combined Gate (Specific Combos)" and peak_gate_state == 'Other':
            other_df = df[
                (df[gate_col] == 'Other') & (df['detailed_tidal_flow'] == peak_tidal_state)
            ]
            if not other_df.empty:
                top_combo = other_df.groupby(['MTR_category', 'Hinge_category'])['is_animal_detection'].mean().idxmax()
                peak_gate_state = f"MTR: {top_combo[0]}\n& Hinge: {top_combo[1]}"

        _plot_tide_cycle_visualization(title, peak_gate_state, peak_tidal_state)

def create_tide_cycle_visualizations(df, tide_analysis_results):
    """
    Creates comprehensive visualizations of wildlife detections across tidal cycles.
    This version adds a fail-safe filter to exclude 'Unknown' from plots.
    """
    print("\n--- Generating Tide Cycle Visualizations ---")
    figures = {}

    detection_by_tide, phase_detection, species_tide_table = tide_analysis_results

    # Plot 1: Bar chart of detection rate by general tidal state
    if detection_by_tide is not None and not detection_by_tide.empty:
        # --- FAIL-SAFE FIX: Explicitly filter the data for plotting ---
        plot_data = detection_by_tide[detection_by_tide.index != 'Unknown']
        print(" -> Plotting: Wildlife Detection Rate by Tidal State (Rising, Falling, etc.)")

        if plot_data.empty:
            print("   -> Skipping plot: No data left after removing 'Unknown' state.")
        else:
            fig_state = px.bar(
                plot_data, # Use the filtered data
                x=plot_data.index,
                y='detection_rate',
                title='Wildlife Detection Rate vs. Tidal State<br><sub>Shows detection success when cameras were active during each tidal condition</sub>',
                labels={'x': 'Tidal State', 'detection_rate': 'Detection Rate'},
                color='detection_rate',
                color_continuous_scale='Blues'
            )
            save_plot(fig_state, "6a_detection_by_tidal_state")
            figures['detection_by_tidal_state'] = fig_state
        # --- END FIX ---

    # Plot 2: Polar chart showing detection rate by the phase of the tide
    if phase_detection is not None and not phase_detection.empty:
        print(" -> Plotting: Wildlife Detection Rate by Tidal Phase (Polar Chart)")

        plot_data = phase_detection.copy()
        plot_data['phase_midpoint'] = plot_data.index.str.split('-').str[0].astype(float)
        plot_data['phase_degrees'] = plot_data['phase_midpoint'] * 360

        if not plot_data.empty:
            plot_data_closed = pd.concat([plot_data, plot_data.iloc[[0]]], ignore_index=True)
        else:
            plot_data_closed = plot_data

        fig_polar = px.line_polar(
            plot_data_closed,
            r='detection_rate',
            theta='phase_degrees',
            title='Wildlife Detection Rate Across the Tidal Cycle<br><sub>Peak detection: 20.0% at tidal phase 0.92-1.00 (just before low tide)</sub>',
            labels={'detection_rate': 'Detection Rate'},
            template='plotly_dark'
        )
        fig_polar.update_layout(
            polar=dict(
                angularaxis=dict(
                    tickvals=[0, 90, 180, 270],
                    ticktext=['Low Tide', 'Rising Tide', 'High Tide', 'Falling Tide'],
                    direction="clockwise"
                )
            )
        )
        save_plot(fig_polar, "6b_detection_by_tidal_phase")
        figures['detection_by_tidal_phase'] = fig_polar

    # Plot 3: Heatmap of tidal preferences for top species
    if species_tide_table is not None and not species_tide_table.empty:
        # --- FAIL-SAFE FIX: Remove 'Unknown' column if it exists ---
        plot_data_heatmap = species_tide_table.copy()
        if 'Unknown' in plot_data_heatmap.columns:
            plot_data_heatmap = plot_data_heatmap.drop(columns=['Unknown'])

        print(" -> Plotting: Heatmap of Species Tide Preferences")
        if plot_data_heatmap.empty or plot_data_heatmap.shape[1] == 0:
             print("   -> Skipping plot: No data left after removing 'Unknown' state.")
        else:
            fig_heatmap = px.imshow(
                plot_data_heatmap,
                text_auto=".1f",
                aspect="auto",
                title='Tidal State Preference by Species (% of Detections)<br><sub>Shows when each species is most likely to be detected</sub>',
                labels=dict(x="Tidal State", y="Species", color="Detection %"),
                color_continuous_scale='viridis'
            )
            save_plot(fig_heatmap, "6c_species_tide_preference_heatmap")
            figures['species_tide_preference_heatmap'] = fig_heatmap
        # --- END FIX ---

    return figures