diff --git a/main.py b/main.py
new file mode 100644
index 0000000..1eb35f8
--- /dev/null
+++ b/main.py
@@ -0,0 +1,151 @@
+import streamlit as st
+import os
+import sys
+from pathlib import Path
+
+# Set page configuration first
+st.set_page_config(
+    page_title="Analytics Dashboard",
+    page_icon="📊",
+    layout="wide"
+)
+
+# Add src directory and subdirectories to sys.path
+project_root = Path(__file__).parent
+src_path = project_root / 'src'
+sys.path.insert(0, str(src_path))
+
+# Add each application's directory to sys.path
+app_dirs = ['DescriptiveAnalysis', 'PredictiveAnalytics1', 'preprocessing', 'PrescriptiveAnalysis1']
+for app_dir in app_dirs:
+    app_path = src_path / app_dir
+    if app_path.exists():
+        sys.path.insert(0, str(app_path))
+    else:
+        print(f"Directory {app_dir} not found in src/. Please check the directory structure.")
+
+# Add PredictiveAnalytics1/Frontend and PrescriptiveAnalysis1 subdirectories to sys.path
+predictive_frontend_path = src_path / 'PredictiveAnalytics1' / 'Frontend'
+if predictive_frontend_path.exists():
+    sys.path.insert(0, str(predictive_frontend_path))
+else:
+    print(f"Frontend directory not found at {predictive_frontend_path}.")
+
+prescriptive_frontend_path = src_path / 'PrescriptiveAnalysis1' / 'Frontend'
+if prescriptive_frontend_path.exists():
+    sys.path.insert(0, str(prescriptive_frontend_path))
+else:
+    print(f"Frontend directory not found at {prescriptive_frontend_path}.")
+
+prescriptive_backend_path = src_path / 'PrescriptiveAnalysis1' / 'Backend'
+if prescriptive_backend_path.exists():
+    sys.path.insert(0, str(prescriptive_backend_path))
+else:
+    print(f"Backend directory not found at {prescriptive_backend_path}.")
+
+# Import main functions from each application
+try:
+    from DescriptiveAnalysis.frontend import main as descriptive_main
+except ModuleNotFoundError as e:
+    print(f"Error importing DescriptiveAnalysis: {e}")
+    descriptive_main = None
+
+try:
+    from PredictiveAnalytics1.Frontend.app import main as predictive_main
+except ModuleNotFoundError as e:
+    print(f"Error importing PredictiveAnalytics1: {e}")
+    predictive_main = None
+
+try:
+    from preprocessing.app import main as preprocessing_main
+except ModuleNotFoundError as e:
+    print(f"Error importing preprocessing: {e}")
+    preprocessing_main = None
+
+try:
+    from PrescriptiveAnalysis1.Frontend.main import main as prescriptive_main
+except ModuleNotFoundError as e:
+    print(f"Error importing PrescriptiveAnalysis1: {e}")
+    prescriptive_main = None
+
+# Custom CSS for styling
+st.markdown("""
+<style>
+    @import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@400;600&display=swap');
+
+    html, body {
+        font-family: 'Montserrat', sans-serif;
+        color: #333;
+    }
+    .sidebar .sidebar-content {
+        background: linear-gradient(135deg, #f0f4f8, #d9e2ec);
+        border-radius: 8px;
+        padding: 1rem;
+    }
+    .stSelectbox > div > label {
+        font-size: 1.1rem;
+        font-weight: 600;
+        color: #2c3e50;
+    }
+    .main-container {
+        background-color: #fff;
+        border-radius: 10px;
+        box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
+        padding: 2rem;
+        margin: 1rem;
+    }
+    h1 {
+        color: #2c3e50;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+</style>
+""", unsafe_allow_html=True)
+
+def main():
+    """
+    Main function to run the Streamlit dashboard application.
+    Provides a sidebar to navigate between different analytics applications.
+    """
+    st.title("Analytics Dashboard")
+
+    # Display import errors, if any
+    if descriptive_main is None:
+        st.error("Cannot import DescriptiveAnalysis. Please check the directory structure and files.")
+    if predictive_main is None:
+        st.error("Cannot import PredictiveAnalytics1. Please check the directory structure and files.")
+    if preprocessing_main is None:
+        st.error("Cannot import preprocessing. Please check the directory structure and files.")
+    if prescriptive_main is None:
+        st.error("Cannot import PrescriptiveAnalysis1. Please check the directory structure and files.")
+
+    # Sidebar for application selection
+    st.sidebar.title("Navigation")
+    app_options = [
+        "Descriptive Analysis",
+        "Predictive Analytics",
+        "Preprocessing",
+        "Prescriptive Analysis"
+    ]
+    selected_app = st.sidebar.selectbox("Select Application", app_options)
+
+    # Map selected app to the corresponding main function
+    app_functions = {
+        "Descriptive Analysis": descriptive_main,
+        "Predictive Analytics": predictive_main,
+        "Preprocessing": preprocessing_main,
+        "Prescriptive Analysis": prescriptive_main
+    }
+
+    # Run the selected application's main function
+    with st.container():
+        st.markdown('<div class="main-container">', unsafe_allow_html=True)
+        selected_function = app_functions[selected_app]
+        if selected_function is None:
+            st.error(f"Cannot run {selected_app}. The module could not be imported. Please check the error messages above.")
+        else:
+            selected_function()
+        st.markdown('</div>', unsafe_allow_html=True)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/DescriptiveAnalysis/__init__.py b/src/DescriptiveAnalysis/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/DescriptiveAnalysis/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/DescriptiveAnalysis/frontend.py b/src/DescriptiveAnalysis/frontend.py
index 4b9e281..7a0a88a 100644
--- a/src/DescriptiveAnalysis/frontend.py
+++ b/src/DescriptiveAnalysis/frontend.py
@@ -5,42 +5,7 @@
 import streamlit.components.v1 as components
 import pandas as pd
 
-from backend import generate_data_cubes, process_download
-
-# Set page configuration and custom CSS styling
-st.set_page_config(layout="wide")
-
-st.markdown("""
-    <style>
-    @import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@400;600&display=swap');
-
-    html, body {
-        background: linear-gradient(135deg, #f0f4f8, #d9e2ec);
-        font-family: 'Montserrat', sans-serif;
-        color: #333;
-    }
-    .main-container {
-        background-color: #fff;
-        border-radius: 10px;
-        box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
-        padding: 2rem;
-        margin: 1rem;
-    }
-    .css-1e5imcs, .css-1d391kg {
-        background-color: #ffffff !important;
-        border-radius: 8px;
-        box-shadow: 0px 2px 5px rgba(0,0,0,0.1);
-        padding: 1rem;
-    }
-    .css-1aumxhk {
-        font-size: 1.2rem;
-        font-weight: 600;
-    }
-    .css-1d391kg .stTabs {
-        background-color: #fff;
-    }
-    </style>
-""", unsafe_allow_html=True)
+from .backend import generate_data_cubes, process_download
 
 def load_data():
     """
@@ -347,6 +312,39 @@ def main():
       2. Drill-Down and Roll-Up
       3. Batch Processing
     """
+    # Apply custom CSS styling
+    st.markdown("""
+    <style>
+    @import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@400;600&display=swap');
+
+    html, body {
+        background: linear-gradient(135deg, #f0f4f8, #d9e2ec);
+        font-family: 'Montserrat', sans-serif;
+        color: #333;
+    }
+    .main-container {
+        background-color: #fff;
+        border-radius: 10px;
+        box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
+        padding: 2rem;
+        margin: 1rem;
+    }
+    .css-1e5imcs, .css-1d391kg {
+        background-color: #ffffff !important;
+        border-radius: 8px;
+        box-shadow: 0px 2px 5px rgba(0,0,0,0.1);
+        padding: 1rem;
+    }
+    .css-1aumxhk {
+        font-size: 1.2rem;
+        font-weight: 600;
+    }
+    .css-1d391kg .stTabs {
+        background-color: #fff;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+
     st.title("Data Warehouse & Cube Generator")
 
     # Load data from CSV file
@@ -387,4 +385,4 @@ def main():
         batch_processing()
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/src/PredictiveAnalytics1/Backend/__init__.py b/src/PredictiveAnalytics1/Backend/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/PredictiveAnalytics1/Backend/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/PredictiveAnalytics1/Frontend/__init__.py b/src/PredictiveAnalytics1/Frontend/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/PredictiveAnalytics1/Frontend/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/PredictiveAnalytics1/Frontend/app.py b/src/PredictiveAnalytics1/Frontend/app.py
index 0c1dd48..01cd523 100644
--- a/src/PredictiveAnalytics1/Frontend/app.py
+++ b/src/PredictiveAnalytics1/Frontend/app.py
@@ -4,20 +4,13 @@
 import pandas as pd
 import numpy as np
 import os
-import sys
 from pathlib import Path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-from Backend.backend import (
-    load_data, preprocess_data, apply_pca, determine_problem_type, train_model)
+from ..Backend.backend import load_data, preprocess_data, apply_pca, determine_problem_type, train_model
 from sklearn.preprocessing import LabelEncoder
 from sklearn.cluster import DBSCAN, SpectralClustering
 from sklearn.preprocessing import StandardScaler
 from sklearn.metrics import silhouette_score
 
-
-st.set_page_config(page_title="Model Comparison Tool", layout="wide")
-
-
 @st.cache_data
 def cached_load_data(file_path):
     return load_data(file_path)
@@ -127,7 +120,11 @@ def main():
     else:
         current_file = Path(__file__).resolve()
         dataset_dir = current_file.parents[3] / 'Datasets' / 'predictive-analytics-1'
-        dataset_files = [f for f in os.listdir(dataset_dir) if f.endswith(".csv")]
+        if not dataset_dir.exists():
+            st.warning("Dataset directory not found. Please upload a CSV file.")
+            dataset_files = []
+        else:
+            dataset_files = [f for f in os.listdir(dataset_dir) if f.endswith(".csv")]
 
         if dataset_files:
             selected_dataset = st.selectbox("Select a dataset", dataset_files)
@@ -267,13 +264,11 @@ def main():
 
                 st.dataframe(comparison_df.set_index('Model').T)
 
-
                 st.subheader("Performance Visualization")
                 chart_data = comparison_df.melt(id_vars=['Model'], var_name='Metric', value_name='Score')
                 chart_data['Score'] = pd.to_numeric(chart_data['Score'], errors='coerce')
                 chart_data = chart_data.dropna(subset=['Score'])
 
-
                 if not chart_data.empty:
                     st.bar_chart(chart_data, x='Metric', y='Score', color='Model', stack=False)
                 else:
diff --git a/src/PredictiveAnalytics1/__init__.py b/src/PredictiveAnalytics1/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/PredictiveAnalytics1/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/PrescriptiveAnalysis1/Backend/__init__.py b/src/PrescriptiveAnalysis1/Backend/__init__.py
index e69de29..8d1c8b6 100644
--- a/src/PrescriptiveAnalysis1/Backend/__init__.py
+++ b/src/PrescriptiveAnalysis1/Backend/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/PrescriptiveAnalysis1/Frontend/__init__.py b/src/PrescriptiveAnalysis1/Frontend/__init__.py
index e69de29..8d1c8b6 100644
--- a/src/PrescriptiveAnalysis1/Frontend/__init__.py
+++ b/src/PrescriptiveAnalysis1/Frontend/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/PrescriptiveAnalysis1/Frontend/main.py b/src/PrescriptiveAnalysis1/Frontend/main.py
index 3e601d7..ef5a9e3 100644
--- a/src/PrescriptiveAnalysis1/Frontend/main.py
+++ b/src/PrescriptiveAnalysis1/Frontend/main.py
@@ -1,19 +1,19 @@
 import streamlit as st
-import sys
-import os
 import pandas as pd
+import os
 import time
 from collections import defaultdict
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from Backend.gspan import run_gspan_analysis, construct_dfs_code, load_graphs_from_json
-from Backend.apriori_graph import parse_graph_file, apriori_graph_mining
-from Backend.gsp import preprocess_sequences_ordered, gsp_algorithm
-from Backend.apriori import run_apriori_analysis
-from Backend.fp_growth import run_fp_growth_analysis  
+
+# Relative imports for Backend modules
+from ..Backend.gspan import run_gspan_analysis, construct_dfs_code, load_graphs_from_json
+from ..Backend.apriori_graph import parse_graph_file, apriori_graph_mining
+from ..Backend.gsp import preprocess_sequences_ordered, gsp_algorithm
+from ..Backend.apriori import run_apriori_analysis
+from ..Backend.fp_growth import run_fp_growth_analysis
 
 def apriori_graph_mining_app():
     st.title("Apriori-Based Graph Mining")
-    uploaded_file = st.file_uploader("Upload your graph dataset file ", type=['txt'], key="apriori_file")
+    uploaded_file = st.file_uploader("Upload your graph dataset file", type=['txt'], key="apriori_file")
     if uploaded_file is not None:
         graphs = parse_graph_file(uploaded_file)
         st.write(f"Number of graphs loaded: {len(graphs)}")
diff --git a/src/PrescriptiveAnalysis1/__init__.py b/src/PrescriptiveAnalysis1/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/PrescriptiveAnalysis1/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/preprocessing/__init__.py b/src/preprocessing/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/preprocessing/__init__.py
@@ -0,0 +1 @@
+ 
diff --git a/src/preprocessing/app.py b/src/preprocessing/app.py
index 219a5ee..b4b4b5f 100644
--- a/src/preprocessing/app.py
+++ b/src/preprocessing/app.py
@@ -9,10 +9,6 @@
 from scipy.ndimage import gaussian_filter1d
 from statsmodels.nonparametric.smoothers_lowess import lowess
 
-
-# Page config and custom UI styling
-st.set_page_config(page_title="Data Preprocessor", page_icon="📊", layout="wide")
-
 st.markdown("""
 <style>
     .stDataFrame {
@@ -30,7 +26,6 @@
 </style>
 """, unsafe_allow_html=True)
 
-
 def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
     """
     Handles missing values using multiple imputation strategies.
@@ -54,7 +49,7 @@ def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
         ])
 
         if imputation_method == "Drop Rows with Missing Values":
-            data.dropna(subset=columns_with_na, inplace=True)
+            data = data.dropna(subset=columns_with_na)
             st.write("Dropped rows with missing values.")
         else:
             if numeric_columns:
@@ -62,35 +57,28 @@ def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
                     strategy = st.selectbox("Select strategy", ["Mean", "Median", "Mode"])
 
                     if strategy == "Mode":
-                        # Determine threshold based on dataset size
                         total_rows = len(data)
                         if total_rows < 100:
-                            threshold = 0.05  # 5% for small datasets (< 100 rows)
+                            threshold = 0.05
                         elif total_rows < 1000:
-                            threshold = 0.10  # 10% for medium datasets (100–999 rows)
+                            threshold = 0.10
                         else:
                             threshold = 0.15
 
-                        # Check if data has few unique values or strong central tendency
                         for column in numeric_columns:
                             unique_values = data[column].nunique()
                             total_values = len(data[column].dropna())
                             unique_ratio = unique_values / total_values if total_values > 0 else 0
 
-                            # If less than 10% unique values, assume discrete with central tendency
                             if unique_ratio < threshold:
-                                # Can use mode imputation
-                                strategy = "Mode"
                                 imputed_value = data[column].mode()[0]
                                 data[column] = data[column].fillna(imputed_value)
                                 st.write(f"Filled missing values in {column} with mode (discrete data): {imputed_value:.2f}")
-
                             else:
-                                # Offer mean or median for continuous data
                                 strategy = st.radio(
-                                f"Select strategy for {column} (continuous data)",
-                                ["Median", "Mean"],
-                                horizontal=True
+                                    f"Select strategy for {column} (continuous data)",
+                                    ["Median", "Mean"],
+                                    horizontal=True
                                 )
                                 imputed_value = (
                                     data[column].mean() if strategy == "Mean"
@@ -98,10 +86,8 @@ def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
                                 )
                                 data[column] = data[column].fillna(imputed_value)
                                 st.write(f"Filled missing values in {column} with {strategy.lower()}: {imputed_value:.2f}")
-                    
                     else:
                         for column in numeric_columns:
-                            # Compute value based on strategy
                             imputed_value = (
                                 data[column].mean() if strategy == "Mean"
                                 else data[column].median()
@@ -109,18 +95,15 @@ def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
                             data[column] = data[column].fillna(imputed_value)
                             st.write(f"Filled missing values in {column} with {strategy.lower()}: {imputed_value:.2f}")
                 else:
-                    # Use regression or tree-based imputation
                     imputer = IterativeImputer(
                         estimator=DecisionTreeRegressor() if imputation_method == "Decision Tree Imputation" else None,
                         random_state=42
                     )
                     data[numeric_columns] = imputer.fit_transform(data[numeric_columns])
                     st.write(f"{imputation_method} applied on numeric columns.")
-
             else:
-                st.warning(f"No missing value is found in numeric columns.")
+                st.warning("No missing values found in numeric columns.")
 
-            # Fill categorical columns with mode
             categorical_columns = [col for col in columns_with_na if col not in numeric_columns]
             for column in categorical_columns:
                 mode_value = data[column].mode()[0] if not data[column].mode().empty else ""
@@ -131,7 +114,6 @@ def handle_missing_values(data: pd.DataFrame) -> pd.DataFrame:
         st.dataframe(data.head())
         return data
 
-
 def smooth_data(data: pd.DataFrame) -> pd.DataFrame:
     """
     Applies smoothing to all numeric columns using selected technique.
@@ -148,9 +130,8 @@ def smooth_data(data: pd.DataFrame) -> pd.DataFrame:
         smoothing_window = st.slider("Smoothing intensity", 3, 15, 5)
 
         for column in numeric_columns:
-            # Apply selected smoothing technique
             if smoothing_method == "Moving Average":
-                data[f"{column}_smoothed"] = data[column].rolling(window=smoothing_window).mean()
+                data[f"{column}_smoothed"] = data[column].rolling(window=smoothing_window, min_periods=1).mean()
             elif smoothing_method == "Exponential":
                 data[f"{column}_smoothed"] = data[column].ewm(span=smoothing_window).mean()
             elif smoothing_method == "Gaussian":
@@ -159,19 +140,15 @@ def smooth_data(data: pd.DataFrame) -> pd.DataFrame:
                 loess_result = lowess(data[column], np.arange(len(data)), frac=smoothing_window / len(data))
                 data[f"{column}_smoothed"] = loess_result[:, 1]
 
-        # Store a backup and display results
         st.session_state.smoothing_df = data.copy()
         st.write("Preview after smoothing:")
         st.dataframe(data.head())
 
-        # Plot all smoothed columns
         smoothed_cols = [f"{col}_smoothed" for col in numeric_columns if f"{col}_smoothed" in data.columns]
         fig = px.line(data[smoothed_cols], title="Smoothed Data Visualization")
-
         st.plotly_chart(fig, use_container_width=True)
         return data
 
-
 def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
     """
     Detects and treats outliers in numeric columns using selected detection and treatment strategies.
@@ -190,12 +167,10 @@ def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
         threshold, lower_percentile, upper_percentile = 3.0, 1.0, 99.0
         iqr_multiplier = 1.5
 
-        # Get user input based on method
         if detection_method == "IQR":
             iqr_multiplier = st.slider("IQR Multiplier", 1.0, 5.0, 1.5)
-
         elif detection_method in ["Z-Score", "Modified Z-Score"]:
-            threshold = st.slider("Threshold", 1.0, 5.0, 3.0)        
+            threshold = st.slider("Threshold", 1.0, 5.0, 3.0)
         else:
             lower_percentile = st.slider("Lower percentile", 0.0, 10.0, 1.0)
             upper_percentile = st.slider("Upper percentile", 90.0, 100.0, 99.0)
@@ -203,7 +178,6 @@ def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
         total_outlier_count = 0
 
         for column in numeric_columns:
-            # Detect outliers based on selected method
             if detection_method == "IQR":
                 q1 = data[column].quantile(0.25)
                 q3 = data[column].quantile(0.75)
@@ -211,17 +185,14 @@ def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
                 lower_bound = q1 - iqr_multiplier * iqr
                 upper_bound = q3 + iqr_multiplier * iqr
                 outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
-            
             elif detection_method == "Z-Score":
                 z_scores = (data[column] - data[column].mean()) / data[column].std()
                 outliers = data[np.abs(z_scores) > threshold]
-            
             elif detection_method == "Modified Z-Score":
                 median = data[column].median()
                 median_abs_dev = np.median(np.abs(data[column] - median))
                 modified_z_scores = 0.6745 * (data[column] - median) / median_abs_dev
                 outliers = data[np.abs(modified_z_scores) > threshold]
-            
             else:
                 lower_bound = data[column].quantile(lower_percentile / 100)
                 upper_bound = data[column].quantile(upper_percentile / 100)
@@ -230,14 +201,11 @@ def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
             num_outliers = len(outliers)
             total_outlier_count += num_outliers
 
-            # Apply chosen treatment to outliers
             if num_outliers > 0:
                 if treatment_strategy == "Remove":
                     data = data[~data.index.isin(outliers.index)]
-                
                 elif treatment_strategy == "Cap":
                     data[column] = np.clip(data[column], lower_bound, upper_bound)
-                
                 else:
                     median_value = data[column].median()
                     data.loc[outliers.index, column] = median_value
@@ -247,12 +215,10 @@ def handle_outliers(data: pd.DataFrame) -> pd.DataFrame:
         st.write(f"Total outliers handled: {total_outlier_count}")
         st.dataframe(data.head())
 
-        # Show box plot for updated data
         fig = px.box(data.select_dtypes(include=np.number), title="Box Plot After Outlier Treatment")
         st.plotly_chart(fig, use_container_width=True)
         return data
 
-
 def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
     """
     Performs variance analysis and feature selection on numeric columns.
@@ -268,7 +234,6 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             st.error("No numeric columns found for variance analysis!")
             return data
 
-        # Compute variance, standard deviation, and coefficient of variation
         variance_summary = pd.DataFrame({
             'Feature': numeric_columns,
             'Variance': [data[column].var() for column in numeric_columns],
@@ -279,13 +244,10 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             ]
         }).sort_values('Variance', ascending=False).reset_index(drop=True)
 
-        # Tabs for chart view and table view
         tab_visuals, tab_table = st.tabs(["Visualization", "Data"])
 
         with tab_visuals:
             st.subheader("Feature Variance Distribution")
-
-            # Variance bar chart
             fig_variance = px.bar(
                 variance_summary,
                 x='Feature',
@@ -296,7 +258,6 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             )
             st.plotly_chart(fig_variance, use_container_width=True)
 
-            # Coefficient of Variation chart
             cv_data = variance_summary.dropna(subset=['Coefficient of Variation'])
             if not cv_data.empty:
                 fig_cv = px.bar(
@@ -313,8 +274,6 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             st.dataframe(variance_summary, use_container_width=True)
 
         st.subheader("Feature Selection")
-
-        # Choose selection method
         selection_strategy = st.radio(
             "Selection method:",
             ["Variance Threshold", "Top N Features", "Manual Selection"],
@@ -330,7 +289,6 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
                 step=0.05
             )
             selected_features = variance_summary[variance_summary['Variance'] >= min_variance]['Feature'].tolist()
-
         elif selection_strategy == "Top N Features":
             top_n = st.slider(
                 "Number of top features to keep:",
@@ -340,9 +298,7 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
                 step=1
             )
             selected_features = variance_summary.head(top_n)['Feature'].tolist()
-
         else:
-            # Manual feature picker
             selected_features = st.multiselect(
                 "Select features to keep:",
                 options=numeric_columns,
@@ -354,18 +310,15 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             st.write("Selected features:")
             st.write(", ".join(selected_features))
 
-            # Optionally create new dataset with selected features
             if st.checkbox("Create dataset with only selected features"):
                 include_categorical = st.checkbox("Include non-numeric columns", value=True)
                 non_numeric_columns = [col for col in data.columns if col not in numeric_columns]
-
                 final_columns = selected_features + non_numeric_columns if include_categorical else selected_features
                 selected_data = data[final_columns].copy()
 
                 st.write("Preview of dataset with selected features:")
                 st.dataframe(selected_data.head(), use_container_width=True)
 
-                # Download option
                 csv_data = selected_data.to_csv(index=False).encode('utf-8')
                 st.download_button(
                     "Download selected features dataset",
@@ -374,18 +327,15 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
                     mime="text/csv"
                 )
 
-                # Optionally update original dataset
                 if st.checkbox("Update main dataset to only include selected features"):
                     data = selected_data.copy()
                     st.info("Main dataset updated to include only selected features")
         else:
             st.warning("No features selected. Please adjust your selection criteria.")
 
-        # Correlation analysis among selected features
         if len(selected_features) > 1:
             st.subheader("Correlation Analysis for Selected Features")
             correlation_matrix = data[selected_features].corr()
-
             fig_corr = px.imshow(
                 correlation_matrix,
                 title="Correlation Matrix for Selected Features",
@@ -396,12 +346,9 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             st.plotly_chart(fig_corr, use_container_width=True)
 
             correlation_threshold = st.slider("Correlation threshold for highlighting:", 0.0, 1.0, 0.8, 0.05)
-
-            # Find pairs with high correlation
             high_correlation_pairs = correlation_matrix.where(
                 (np.abs(correlation_matrix) > correlation_threshold) & (np.abs(correlation_matrix) < 1.0)
             ).stack().reset_index()
-
             high_correlation_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
 
             if not high_correlation_pairs.empty:
@@ -413,8 +360,7 @@ def analyse_variance(data: pd.DataFrame) -> pd.DataFrame:
             else:
                 st.info(f"No feature pairs with correlation above {correlation_threshold} found.")
 
-    return data
-
+        return data
 
 def main():
     """
@@ -431,9 +377,8 @@ def main():
 
     if uploaded_file:
         try:
-            # Read the uploaded file
             raw_data = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
-            st.session_state.df = raw_data
+            st.session_state.df = raw_data.copy()
 
             st.success(f"✅ Loaded {len(raw_data)} rows × {len(raw_data.columns)} columns")
 
@@ -462,13 +407,11 @@ def main():
                 - **Duplicate Rows:** {raw_data.duplicated().sum()}
                 """)
 
-            # Apply preprocessing steps sequentially
-            processed_data = handle_missing_values(raw_data)
+            processed_data = handle_missing_values(raw_data.copy())
             processed_data = smooth_data(processed_data)
             processed_data = handle_outliers(processed_data)
             processed_data = analyse_variance(processed_data)
 
-            # Downloading feature :  Preprocessed dataset
             st.subheader("📥 Download Final Preprocessed Dataset")
             csv_final = processed_data.to_csv(index=False).encode('utf-8')
             st.download_button(
@@ -482,4 +425,4 @@ def main():
             st.error(f"Error loading file: {str(error)}")
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file