diff --git a/README.md b/README.md
index c6c6e5d..a1f1cd1 100644
--- a/README.md
+++ b/README.md
@@ -159,6 +159,128 @@ regression_results = ann_regression.evaluate(
 print(regression_results)
 
 
+## **Transfer Learning with VGG16** 🚀
+
+PyDeepFlow now supports **transfer learning** with the VGG16 architecture! This powerful feature allows you to leverage pretrained deep learning models for your custom computer vision tasks.
+
+### **What is VGG16?**
+
+VGG16 is a deep convolutional neural network with 16 layers (13 convolutional + 3 fully connected) that was developed by the Visual Geometry Group at Oxford. It's widely used for image classification and feature extraction tasks.
+
+### **Quick Start with VGG16**
+
+```python
+from pydeepflow.pretrained import VGG16
+from pydeepflow.transfer_learning import TransferLearningManager
+import numpy as np
+
+# Load VGG16 for your custom dataset (e.g., 10 classes)
+vgg = VGG16(num_classes=10, freeze_features=True)
+
+# Display architecture
+vgg.summary()
+
+# Use Transfer Learning Manager for structured workflow
+manager = TransferLearningManager(vgg)
+
+# Phase 1: Feature Extraction (train only classifier)
+manager.setup_feature_extraction()
+# Train your model here with frozen conv layers...
+
+# Phase 2: Fine-Tuning (unfreeze last conv block)
+manager.setup_fine_tuning(num_layers=3)
+# Continue training with unfrozen layers...
+```
+
+### **Transfer Learning Workflows**
+
+#### **1. Feature Extraction (Small Datasets)**
+Best for datasets with < 1000 samples:
+
+```python
+# Freeze all convolutional layers
+vgg = VGG16(num_classes=5, freeze_features=True)
+
+# Train only the classifier
+# Recommended: 10-20 epochs, LR = 1e-2
+```
+
+#### **2. Fine-Tuning (Medium to Large Datasets)**
+Best for datasets with > 1000 samples:
+
+```python
+# Start with frozen features
+vgg = VGG16(num_classes=10, freeze_features=True)
+# Train classifier first...
+
+# Then unfreeze last conv block for fine-tuning
+vgg.unfreeze_layers(num_layers=3)
+# Continue training with lower LR (1e-3 to 1e-4)
+```
+
+#### **3. Feature Extraction Only**
+Use VGG16 as a feature extractor for other classifiers:
+
+```python
+# Create VGG16 without classifier layers
+vgg_features = VGG16(include_top=False)
+
+# Extract features
+features = vgg_features.predict(X_images)
+
+# Use features with SVM, Random Forest, or custom classifier
+```
+
+### **Key Features**
+
+- ✅ **Full VGG16 Architecture**: 13 conv layers + 3 FC layers
+- ✅ **Layer Freezing/Unfreezing**: Fine-grained control over trainable layers
+- ✅ **Transfer Learning Manager**: Structured workflow for best practices
+- ✅ **Weight Save/Load**: Save and load pretrained weights
+- ✅ **GPU Support**: Accelerate training with CUDA
+- ✅ **Progressive Unfreezing**: Gradually unfreeze layers to prevent catastrophic forgetting
+
+### **Advanced Usage**
+
+```python
+from pydeepflow.transfer_learning import (
+    calculate_trainable_params,
+    print_transfer_learning_guide
+)
+
+# Get detailed parameter information
+params = calculate_trainable_params(vgg)
+print(f"Trainable: {params['trainable']:,} / {params['total']:,}")
+
+# Display best practices guide
+print_transfer_learning_guide()
+
+# Progressive unfreezing strategy
+manager = TransferLearningManager(vgg)
+stages = manager.progressive_unfreeze(stages=3)
+
+for stage_info in stages:
+    vgg.unfreeze_layers(num_layers=stage_info['layers_to_unfreeze'])
+    # Train with recommended learning rate...
+```
+
+### **Examples**
+
+Check out the `examples/vgg16_transfer_learning.py` file for comprehensive examples including:
+- Basic VGG16 usage
+- Feature extraction workflow
+- Fine-tuning strategies
+- Progressive unfreezing
+- Feature extraction without classifier
+
+### **Tests**
+
+Run VGG16 tests to verify functionality:
+
+```bash
+python -m pytest tests/test_vgg16.py -v
+```
+
 ## **GPU and CPU Support**
 
 `PyDeepFlow` is designed to be flexible and can run on both CPUs and NVIDIA GPUs.
diff --git a/pydeepflow/__init__.py b/pydeepflow/__init__.py
index 2711150..f8d03a3 100644
--- a/pydeepflow/__init__.py
+++ b/pydeepflow/__init__.py
@@ -26,6 +26,21 @@
 except ImportError:
     _has_cnn = False
 
+# Try to import pretrained models and transfer learning utilities
+try:
+    from .pretrained import VGG16
+    from .transfer_learning import (
+        TransferLearningManager,
+        freeze_layers,
+        unfreeze_layers,
+        get_layer_info,
+        calculate_trainable_params,
+        print_transfer_learning_guide
+    )
+    _has_pretrained = True
+except ImportError:
+    _has_pretrained = False
+
 __all__ = [
     "activation",
     "activation_derivative",
@@ -55,3 +70,14 @@
 
 if _has_cnn:
     __all__.extend(["ConvLayer", "Flatten", "Multi_Layer_CNN"])
+
+if _has_pretrained:
+    __all__.extend([
+        "VGG16",
+        "TransferLearningManager",
+        "freeze_layers",
+        "unfreeze_layers",
+        "get_layer_info",
+        "calculate_trainable_params",
+        "print_transfer_learning_guide"
+    ])
diff --git a/pydeepflow/pretrained/__init__.py b/pydeepflow/pretrained/__init__.py
new file mode 100644
index 0000000..7c54932
--- /dev/null
+++ b/pydeepflow/pretrained/__init__.py
@@ -0,0 +1,10 @@
+"""
+Pretrained Models Module for PyDeepFlow
+
+This module provides pretrained deep learning architectures for transfer learning,
+starting with VGG16 and expandable to other architectures like ResNet, VGG19, etc.
+"""
+
+from .vgg16 import VGG16
+
+__all__ = ['VGG16']
diff --git a/pydeepflow/pretrained/vgg16.py b/pydeepflow/pretrained/vgg16.py
new file mode 100644
index 0000000..cfa7592
--- /dev/null
+++ b/pydeepflow/pretrained/vgg16.py
@@ -0,0 +1,680 @@
+"""
+VGG16 Architecture Implementation for PyDeepFlow
+
+This module implements the VGG16 deep convolutional neural network architecture
+for transfer learning and feature extraction tasks.
+
+VGG16 Architecture:
+- Input: 224x224x3 RGB images
+- 5 convolutional blocks with max pooling
+- 3 fully connected layers
+- Total: 13 conv layers + 3 FC layers = 16 layers with learnable parameters
+
+Reference:
+Simonyan, K., & Zisserman, A. (2014). Very Deep Convolutional Networks for 
+Large-Scale Image Recognition. arXiv:1409.1556
+"""
+
+import numpy as np
+from pydeepflow.model import ConvLayer, MaxPooling2D, Flatten, Multi_Layer_CNN
+from pydeepflow.device import Device
+from pydeepflow.weight_initialization import WeightInitializer
+from pydeepflow.activations import activation
+import warnings
+
+
+class VGG16:
+    """
+    VGG16 Convolutional Neural Network for Transfer Learning.
+    
+    This class implements the VGG16 architecture, which consists of:
+    - Block 1: 2 conv layers (64 filters) + max pool
+    - Block 2: 2 conv layers (128 filters) + max pool
+    - Block 3: 3 conv layers (256 filters) + max pool
+    - Block 4: 3 conv layers (512 filters) + max pool
+    - Block 5: 3 conv layers (512 filters) + max pool
+    - Flatten layer
+    - FC layer 1: 4096 neurons
+    - FC layer 2: 4096 neurons
+    - FC layer 3: num_classes neurons (output)
+    
+    All convolutional layers use:
+    - 3x3 kernels
+    - Stride of 1
+    - Padding of 1 (to preserve spatial dimensions)
+    - ReLU activation
+    
+    All max pooling layers use:
+    - 2x2 window
+    - Stride of 2
+    
+    Parameters
+    ----------
+    num_classes : int, optional
+        Number of output classes for classification. Default is 1000 (ImageNet).
+    input_shape : tuple, optional
+        Input image shape (height, width, channels). Default is (224, 224, 3).
+    use_gpu : bool, optional
+        Whether to use GPU acceleration. Default is False.
+    include_top : bool, optional
+        Whether to include the fully connected layers at the top. Default is True.
+        Set to False for feature extraction.
+    weights : str or None, optional
+        Path to pretrained weights file or None for random initialization.
+        Default is None.
+    freeze_features : bool, optional
+        If True, freeze convolutional layers for feature extraction mode.
+        Default is False.
+    
+    Attributes
+    ----------
+    device : Device
+        Device object for CPU/GPU operations.
+    num_classes : int
+        Number of output classes.
+    input_shape : tuple
+        Expected input shape.
+    include_top : bool
+        Whether fully connected layers are included.
+    layers : list
+        List of all layers in the network.
+    feature_layers : list
+        List of convolutional/pooling layers only.
+    classifier_layers : list
+        List of fully connected layers only.
+    frozen_layers : set
+        Set of layer indices that are frozen (not trainable).
+    
+    Examples
+    --------
+    Create VGG16 for transfer learning on a custom dataset:
+    
+    >>> from pydeepflow.pretrained import VGG16
+    >>> 
+    >>> # For feature extraction (freeze conv layers, train only FC layers)
+    >>> vgg = VGG16(num_classes=10, freeze_features=True)
+    >>> 
+    >>> # For fine-tuning (train all layers)
+    >>> vgg = VGG16(num_classes=10, freeze_features=False)
+    >>> 
+    >>> # For feature extraction without classification head
+    >>> vgg_features = VGG16(include_top=False)
+    >>> features = vgg_features.predict(X)  # Extract features
+    """
+    
+    def __init__(self, num_classes=1000, input_shape=(224, 224, 3), 
+                 use_gpu=False, include_top=True, weights=None, 
+                 freeze_features=False):
+        """Initialize VGG16 architecture."""
+        self.device = Device(use_gpu=use_gpu)
+        self.num_classes = num_classes
+        self.input_shape = input_shape
+        self.include_top = include_top
+        self.frozen_layers = set()
+        
+        # Validate input shape
+        if len(input_shape) != 3:
+            raise ValueError(f"input_shape must be 3D (H, W, C), got {input_shape}")
+        
+        if input_shape[2] != 3:
+            warnings.warn(f"VGG16 was designed for RGB images (3 channels), "
+                         f"but got {input_shape[2]} channels. This may affect performance.")
+        
+        # Build the architecture
+        self.layers = []
+        self.feature_layers = []
+        self.classifier_layers = []
+        
+        self._build_architecture()
+        
+        # Load pretrained weights if provided
+        if weights is not None:
+            self.load_weights(weights)
+        
+        # Freeze feature layers if requested
+        if freeze_features:
+            self.freeze_feature_layers()
+    
+    def _build_architecture(self):
+        """
+        Build the complete VGG16 architecture.
+        
+        This method constructs all layers of VGG16 according to the original paper.
+        """
+        # Track current spatial dimensions
+        H, W, C = self.input_shape
+        
+        # ====================================================================
+        # BLOCK 1: 2x Conv(64) + MaxPool
+        # ====================================================================
+        # Conv1_1: 3x3x3 -> 3x3x64
+        conv1_1 = ConvLayer(
+            in_channels=C,
+            out_channels=64, # number of filters initially
+            kernel_size=3,  # filter of 3x3 will be iterated over image
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv1_1)
+        self.feature_layers.append(conv1_1)
+        C = 64  # Update channel count
+        
+        # Conv1_2: 3x3x64 -> 3x3x64
+        conv1_2 = ConvLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv1_2)
+        self.feature_layers.append(conv1_2)
+        
+        # MaxPool1: 2x2, stride 2
+        pool1 = MaxPooling2D(pool_size=(2, 2), stride=2)
+        self.layers.append(pool1)
+        self.feature_layers.append(pool1)
+        H, W = H // 2, W // 2  # Spatial dimensions halved
+        
+        # ====================================================================
+        # BLOCK 2: 2x Conv(128) + MaxPool
+        # ====================================================================
+        # Conv2_1: 3x3x64 -> 3x3x128
+        conv2_1 = ConvLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv2_1)
+        self.feature_layers.append(conv2_1)
+        C = 128
+        
+        # Conv2_2: 3x3x128 -> 3x3x128
+        conv2_2 = ConvLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv2_2)
+        self.feature_layers.append(conv2_2)
+        
+        # MaxPool2: 2x2, stride 2
+        pool2 = MaxPooling2D(pool_size=(2, 2), stride=2)
+        self.layers.append(pool2)
+        self.feature_layers.append(pool2)
+        H, W = H // 2, W // 2
+        
+        # ====================================================================
+        # BLOCK 3: 3x Conv(256) + MaxPool
+        # ====================================================================
+        # Conv3_1: 3x3x128 -> 3x3x256
+        conv3_1 = ConvLayer(
+            in_channels=128,
+            out_channels=256,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv3_1)
+        self.feature_layers.append(conv3_1)
+        C = 256
+        
+        # Conv3_2: 3x3x256 -> 3x3x256
+        conv3_2 = ConvLayer(
+            in_channels=256,
+            out_channels=256,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv3_2)
+        self.feature_layers.append(conv3_2)
+        
+        # Conv3_3: 3x3x256 -> 3x3x256
+        conv3_3 = ConvLayer(
+            in_channels=256,
+            out_channels=256,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv3_3)
+        self.feature_layers.append(conv3_3)
+        
+        # MaxPool3: 2x2, stride 2
+        pool3 = MaxPooling2D(pool_size=(2, 2), stride=2)
+        self.layers.append(pool3)
+        self.feature_layers.append(pool3)
+        H, W = H // 2, W // 2
+        
+        # ====================================================================
+        # BLOCK 4: 3x Conv(512) + MaxPool
+        # ====================================================================
+        # Conv4_1: 3x3x256 -> 3x3x512
+        conv4_1 = ConvLayer(
+            in_channels=256,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv4_1)
+        self.feature_layers.append(conv4_1)
+        C = 512
+        
+        # Conv4_2: 3x3x512 -> 3x3x512
+        conv4_2 = ConvLayer(
+            in_channels=512,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv4_2)
+        self.feature_layers.append(conv4_2)
+        
+        # Conv4_3: 3x3x512 -> 3x3x512
+        conv4_3 = ConvLayer(
+            in_channels=512,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv4_3)
+        self.feature_layers.append(conv4_3)
+        
+        # MaxPool4: 2x2, stride 2
+        pool4 = MaxPooling2D(pool_size=(2, 2), stride=2)
+        self.layers.append(pool4)
+        self.feature_layers.append(pool4)
+        H, W = H // 2, W // 2
+        
+        # ====================================================================
+        # BLOCK 5: 3x Conv(512) + MaxPool
+        # ====================================================================
+        # Conv5_1: 3x3x512 -> 3x3x512
+        conv5_1 = ConvLayer(
+            in_channels=512,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv5_1)
+        self.feature_layers.append(conv5_1)
+        
+        # Conv5_2: 3x3x512 -> 3x3x512
+        conv5_2 = ConvLayer(
+            in_channels=512,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv5_2)
+        self.feature_layers.append(conv5_2)
+        
+        # Conv5_3: 3x3x512 -> 3x3x512
+        conv5_3 = ConvLayer(
+            in_channels=512,
+            out_channels=512,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            device=self.device,
+            activation='relu',
+            weight_init='he_normal'
+        )
+        self.layers.append(conv5_3)
+        self.feature_layers.append(conv5_3)
+        
+        # MaxPool5: 2x2, stride 2
+        pool5 = MaxPooling2D(pool_size=(2, 2), stride=2)
+        self.layers.append(pool5)
+        self.feature_layers.append(pool5)
+        H, W = H // 2, W // 2
+        
+        # ====================================================================
+        # FULLY CONNECTED LAYERS (Classifier)
+        # ====================================================================
+        if self.include_top:
+            # Flatten layer
+            flatten = Flatten()
+            self.layers.append(flatten)
+            
+            # Calculate flattened size
+            flattened_size = H * W * 512
+            
+            # FC1: 4096 neurons with ReLU
+            initializer = WeightInitializer(
+                device=self.device,
+                mode='auto',
+                bias_init='auto'
+            )
+            fc1_w, fc1_b, _ = initializer.initialize_dense_layer(
+                input_dim=flattened_size,
+                output_dim=4096,
+                activation='relu'
+            )
+            fc1 = {
+                'W': self.device.array(fc1_w),
+                'b': self.device.array(fc1_b.reshape(1, -1)),
+                'activation': 'relu'
+            }
+            self.layers.append(fc1)
+            self.classifier_layers.append(fc1)
+            
+            # FC2: 4096 neurons with ReLU
+            fc2_w, fc2_b, _ = initializer.initialize_dense_layer(
+                input_dim=4096,
+                output_dim=4096,
+                activation='relu'
+            )
+            fc2 = {
+                'W': self.device.array(fc2_w),
+                'b': self.device.array(fc2_b.reshape(1, -1)),
+                'activation': 'relu'
+            }
+            self.layers.append(fc2)
+            self.classifier_layers.append(fc2)
+            
+            # FC3 (Output): num_classes neurons with softmax
+            output_activation = 'softmax' if self.num_classes > 1 else 'sigmoid'
+            fc3_w, fc3_b, _ = initializer.initialize_dense_layer(
+                input_dim=4096,
+                output_dim=self.num_classes,
+                activation=output_activation
+            )
+            fc3 = {
+                'W': self.device.array(fc3_w),
+                'b': self.device.array(fc3_b.reshape(1, -1)),
+                'activation': output_activation
+            }
+            self.layers.append(fc3)
+            self.classifier_layers.append(fc3)
+    
+    def forward(self, X, training=False):
+        """
+        Perform forward pass through the network.
+        
+        Parameters
+        ----------
+        X : ndarray
+            Input data with shape (N, H, W, C).
+        training : bool, optional
+            Whether in training mode (affects dropout, batch norm if added).
+            Default is False.
+        
+        Returns
+        -------
+        ndarray
+            Output activations. Shape depends on include_top:
+            - If include_top=True: (N, num_classes)
+            - If include_top=False: (N, H', W', 512) feature maps
+        """
+        # Validate input shape
+        if X.ndim != 4:
+            raise ValueError(f"Input must be 4D (N, H, W, C), got shape {X.shape}")
+        
+        if X.shape[1:] != self.input_shape:
+            warnings.warn(f"Input shape {X.shape[1:]} differs from expected "
+                         f"{self.input_shape}. This may affect performance.")
+        
+        current_output = X
+        
+        # Pass through all layers
+        for i, layer in enumerate(self.layers):
+            # Skip frozen layers during training (they still forward pass though)
+            if isinstance(layer, (ConvLayer, MaxPooling2D, Flatten)):
+                current_output = layer.forward(current_output)
+            elif isinstance(layer, dict) and 'W' in layer:
+                # Dense layer
+                Z = self.device.dot(current_output, layer['W']) + layer['b']
+                current_output = activation(Z, layer['activation'], self.device)
+        
+        return current_output
+    
+    def predict(self, X):
+        """
+        Make predictions on input data.
+        
+        Parameters
+        ----------
+        X : ndarray
+            Input data with shape (N, H, W, C).
+        
+        Returns
+        -------
+        ndarray
+            Predictions. For classification, returns class probabilities.
+        """
+        return self.forward(X, training=False)
+    
+    def freeze_feature_layers(self):
+        """
+        Freeze all convolutional layers for feature extraction.
+        
+        This is useful for transfer learning when you want to use VGG16
+        as a fixed feature extractor and only train the classifier layers.
+        """
+        for i, layer in enumerate(self.layers):
+            if isinstance(layer, ConvLayer):
+                self.frozen_layers.add(i)
+        
+        print(f"Frozen {len(self.frozen_layers)} convolutional layers for feature extraction.")
+    
+    def unfreeze_layers(self, layer_names=None, num_layers=None):
+        """
+        Unfreeze specific layers or the last N layers for fine-tuning.
+        
+        Parameters
+        ----------
+        layer_names : list of int, optional
+            List of layer indices to unfreeze. If None, unfreezes based on num_layers.
+        num_layers : int, optional
+            Number of layers from the end to unfreeze. If None and layer_names is None,
+            unfreezes all layers.
+        
+        Examples
+        --------
+        >>> # Unfreeze last 3 conv blocks for fine-tuning
+        >>> vgg.unfreeze_layers(num_layers=10)
+        >>> 
+        >>> # Unfreeze specific layers
+        >>> vgg.unfreeze_layers(layer_names=[10, 11, 12])
+        >>> 
+        >>> # Unfreeze all layers
+        >>> vgg.unfreeze_layers()
+        """
+        if layer_names is not None:
+            # Unfreeze specific layers
+            for idx in layer_names:
+                if idx in self.frozen_layers:
+                    self.frozen_layers.remove(idx)
+            print(f"Unfrozen layers: {layer_names}")
+        elif num_layers is not None:
+            # Unfreeze last N layers
+            conv_indices = [i for i, layer in enumerate(self.layers) 
+                           if isinstance(layer, ConvLayer)]
+            to_unfreeze = conv_indices[-num_layers:] if num_layers <= len(conv_indices) else conv_indices
+            for idx in to_unfreeze:
+                if idx in self.frozen_layers:
+                    self.frozen_layers.remove(idx)
+            print(f"Unfrozen last {len(to_unfreeze)} convolutional layers.")
+        else:
+            # Unfreeze all layers
+            self.frozen_layers.clear()
+            print("Unfrozen all layers.")
+    
+    def get_trainable_params(self):
+        """
+        Get all trainable parameters (not frozen).
+        
+        Returns
+        -------
+        list
+            List of parameter arrays that should be updated during training.
+        """
+        trainable = []
+        for i, layer in enumerate(self.layers):
+            if i not in self.frozen_layers:
+                if isinstance(layer, ConvLayer):
+                    trainable.extend([layer.params['W'], layer.params['b']])
+                elif isinstance(layer, dict) and 'W' in layer:
+                    trainable.extend([layer['W'], layer['b']])
+        return trainable
+    
+    def summary(self):
+        """
+        Print a summary of the VGG16 architecture.
+        
+        Displays layer-by-layer breakdown with output shapes and parameter counts.
+        """
+        print("=" * 80)
+        print("VGG16 Architecture Summary")
+        print("=" * 80)
+        print(f"Input Shape: {self.input_shape}")
+        print(f"Number of Classes: {self.num_classes}")
+        print(f"Include Top (FC Layers): {self.include_top}")
+        print(f"Frozen Layers: {len(self.frozen_layers)}")
+        print("=" * 80)
+        print(f"{'Layer':<30} {'Output Shape':<25} {'Params':<15}")
+        print("-" * 80)
+        
+        # Track dimensions through network
+        H, W, C = self.input_shape
+        total_params = 0
+        
+        for i, layer in enumerate(self.layers):
+            frozen_mark = " [FROZEN]" if i in self.frozen_layers else ""
+            
+            if isinstance(layer, ConvLayer):
+                # Conv layer
+                out_c = layer.out_channels
+                params = (layer.Fh * layer.Fw * layer.in_channels * out_c) + out_c
+                output_shape = f"({H}, {W}, {out_c})"
+                layer_name = f"Conv2D_{i}{frozen_mark}"
+                print(f"{layer_name:<30} {output_shape:<25} {params:<15,}")
+                total_params += params
+                C = out_c
+            elif isinstance(layer, MaxPooling2D):
+                # Max pooling
+                H, W = H // layer.stride, W // layer.stride
+                output_shape = f"({H}, {W}, {C})"
+                layer_name = f"MaxPooling2D_{i}"
+                print(f"{layer_name:<30} {output_shape:<25} {'0':<15}")
+            elif isinstance(layer, Flatten):
+                # Flatten
+                flat_size = H * W * C
+                output_shape = f"({flat_size},)"
+                layer_name = "Flatten"
+                print(f"{layer_name:<30} {output_shape:<25} {'0':<15}")
+            elif isinstance(layer, dict) and 'W' in layer:
+                # Dense layer
+                in_size = layer['W'].shape[0]
+                out_size = layer['W'].shape[1]
+                params = (in_size * out_size) + out_size
+                output_shape = f"({out_size},)"
+                layer_name = f"Dense_{i}{frozen_mark}"
+                print(f"{layer_name:<30} {output_shape:<25} {params:<15,}")
+                total_params += params
+        
+        print("=" * 80)
+        print(f"Total Parameters: {total_params:,}")
+        trainable_params = sum(
+            np.prod(p.shape) for p in self.get_trainable_params()
+        )
+        print(f"Trainable Parameters: {trainable_params:,}")
+        print(f"Non-trainable Parameters: {total_params - trainable_params:,}")
+        print("=" * 80)
+    
+    def save_weights(self, filepath):
+        """
+        Save model weights to a file.
+        
+        Parameters
+        ----------
+        filepath : str
+            Path to save the weights file (.npy format).
+        """
+        weights_dict = {}
+        
+        for i, layer in enumerate(self.layers):
+            if isinstance(layer, ConvLayer):
+                weights_dict[f'conv_{i}_W'] = self.device.asnumpy(layer.params['W'])
+                weights_dict[f'conv_{i}_b'] = self.device.asnumpy(layer.params['b'])
+            elif isinstance(layer, dict) and 'W' in layer:
+                weights_dict[f'dense_{i}_W'] = self.device.asnumpy(layer['W'])
+                weights_dict[f'dense_{i}_b'] = self.device.asnumpy(layer['b'])
+        
+        np.save(filepath, weights_dict)
+        print(f"Model weights saved to {filepath}")
+    
+    def load_weights(self, filepath):
+        """
+        Load model weights from a file.
+        
+        Parameters
+        ----------
+        filepath : str
+            Path to the weights file (.npy format).
+        """
+        try:
+            weights_dict = np.load(filepath, allow_pickle=True).item()
+            
+            for i, layer in enumerate(self.layers):
+                if isinstance(layer, ConvLayer):
+                    if f'conv_{i}_W' in weights_dict:
+                        layer.params['W'] = self.device.array(weights_dict[f'conv_{i}_W'])
+                        layer.params['b'] = self.device.array(weights_dict[f'conv_{i}_b'])
+                elif isinstance(layer, dict) and 'W' in layer:
+                    if f'dense_{i}_W' in weights_dict:
+                        layer['W'] = self.device.array(weights_dict[f'dense_{i}_W'])
+                        layer['b'] = self.device.array(weights_dict[f'dense_{i}_b'])
+            
+            print(f"Model weights loaded from {filepath}")
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Weights file not found: {filepath}")
+        except Exception as e:
+            raise RuntimeError(f"Error loading weights: {str(e)}")
diff --git a/pydeepflow/transfer_learning.py b/pydeepflow/transfer_learning.py
new file mode 100644
index 0000000..15606d2
--- /dev/null
+++ b/pydeepflow/transfer_learning.py
@@ -0,0 +1,451 @@
+"""
+Transfer Learning Utilities for PyDeepFlow
+
+This module provides utility functions and classes for transfer learning workflows,
+including layer freezing, feature extraction, and fine-tuning strategies.
+"""
+
+import numpy as np
+from pydeepflow.model import ConvLayer
+from pydeepflow.device import Device
+import warnings
+
+
+class TransferLearningManager:
+    """
+    Manager class for transfer learning operations.
+    
+    This class provides high-level APIs for common transfer learning tasks:
+    - Feature extraction (frozen backbone)
+    - Fine-tuning (selective unfreezing)
+    - Progressive unfreezing strategies
+    
+    Parameters
+    ----------
+    model : object
+        The pretrained model (e.g., VGG16) to manage.
+    
+    Attributes
+    ----------
+    model : object
+        Reference to the model being managed.
+    training_history : dict
+        Records of training phases and their results.
+    
+    Examples
+    --------
+    >>> from pydeepflow.pretrained import VGG16
+    >>> from pydeepflow.transfer_learning import TransferLearningManager
+    >>> 
+    >>> vgg = VGG16(num_classes=10)
+    >>> manager = TransferLearningManager(vgg)
+    >>> 
+    >>> # Phase 1: Feature extraction (train only classifier)
+    >>> manager.setup_feature_extraction()
+    >>> # ... train model ...
+    >>> 
+    >>> # Phase 2: Fine-tuning (unfreeze last conv block)
+    >>> manager.setup_fine_tuning(num_layers=3)
+    >>> # ... continue training ...
+    """
+    
+    def __init__(self, model):
+        """Initialize the transfer learning manager."""
+        self.model = model
+        self.training_history = {
+            'phases': [],
+            'frozen_counts': [],
+            'learning_rates': []
+        }
+    
+    def setup_feature_extraction(self):
+        """
+        Set up the model for feature extraction mode.
+        
+        Freezes all convolutional layers, keeping only the classifier
+        layers trainable. This is the recommended first phase of transfer
+        learning when you have a small dataset.
+        """
+        self.model.freeze_feature_layers()
+        self.training_history['phases'].append('feature_extraction')
+        self.training_history['frozen_counts'].append(len(self.model.frozen_layers))
+        
+        print("\n" + "=" * 70)
+        print("PHASE: Feature Extraction Mode")
+        print("=" * 70)
+        print("All convolutional layers are frozen.")
+        print("Only classifier (FC) layers will be trained.")
+        print("Recommended learning rate: 1e-3 to 1e-2")
+        print("=" * 70 + "\n")
+    
+    def setup_fine_tuning(self, num_layers=None, layer_names=None, 
+                         learning_rate_reduction=0.1):
+        """
+        Set up the model for fine-tuning mode.
+        
+        Unfreezes specified layers (or last N layers) for fine-tuning.
+        This is typically done after initial training with frozen features.
+        
+        Parameters
+        ----------
+        num_layers : int, optional
+            Number of layers from the end to unfreeze.
+        layer_names : list of int, optional
+            Specific layer indices to unfreeze.
+        learning_rate_reduction : float, optional
+            Factor to multiply previous learning rate by. Default is 0.1.
+            Fine-tuning typically uses a lower learning rate than feature extraction.
+        
+        Notes
+        -----
+        Common fine-tuning strategies:
+        - Unfreeze last conv block (num_layers=3 for VGG16)
+        - Unfreeze last 2 blocks (num_layers=6 for VGG16)
+        - Unfreeze all layers (num_layers=None and layer_names=None)
+        """
+        self.model.unfreeze_layers(layer_names=layer_names, num_layers=num_layers)
+        self.training_history['phases'].append('fine_tuning')
+        self.training_history['frozen_counts'].append(len(self.model.frozen_layers))
+        
+        print("\n" + "=" * 70)
+        print("PHASE: Fine-Tuning Mode")
+        print("=" * 70)
+        print(f"Unfrozen layers for fine-tuning.")
+        print(f"Current frozen layer count: {len(self.model.frozen_layers)}")
+        print(f"Recommended LR reduction: {learning_rate_reduction}x")
+        print(f"Example: If previous LR was 1e-2, use {learning_rate_reduction * 1e-2:.0e}")
+        print("=" * 70 + "\n")
+    
+    def progressive_unfreeze(self, stages=3):
+        """
+        Implement progressive unfreezing strategy.
+        
+        This gradually unfreezes layers in stages, which can help prevent
+        catastrophic forgetting of pretrained features.
+        
+        Parameters
+        ----------
+        stages : int, optional
+            Number of unfreezing stages. Default is 3.
+        
+        Returns
+        -------
+        list of dict
+            List of dictionaries defining each stage:
+            - 'stage': Stage number
+            - 'layers_to_unfreeze': Number of layers to unfreeze
+            - 'recommended_lr': Suggested learning rate multiplier
+        
+        Examples
+        --------
+        >>> manager = TransferLearningManager(vgg16)
+        >>> stages = manager.progressive_unfreeze(stages=3)
+        >>> 
+        >>> for stage_info in stages:
+        ...     print(f"Stage {stage_info['stage']}: Unfreeze {stage_info['layers_to_unfreeze']} layers")
+        ...     manager.model.unfreeze_layers(num_layers=stage_info['layers_to_unfreeze'])
+        ...     # Train with recommended_lr
+        ...     # ...
+        """
+        # Count convolutional layers
+        conv_layers = [i for i, layer in enumerate(self.model.layers) 
+                      if isinstance(layer, ConvLayer)]
+        total_conv = len(conv_layers)
+        
+        if stages > total_conv:
+            warnings.warn(f"Requested {stages} stages but only {total_conv} conv layers. "
+                         f"Using {total_conv} stages instead.")
+            stages = total_conv
+        
+        layers_per_stage = total_conv // stages
+        
+        unfreeze_schedule = []
+        for stage in range(1, stages + 1):
+            layers_to_unfreeze = layers_per_stage * stage
+            lr_multiplier = 0.1 ** (stages - stage)  # Decrease LR as we unfreeze more
+            
+            unfreeze_schedule.append({
+                'stage': stage,
+                'layers_to_unfreeze': layers_to_unfreeze,
+                'recommended_lr': lr_multiplier,
+                'description': f"Unfreeze last {layers_to_unfreeze} conv layers"
+            })
+        
+        print("\n" + "=" * 70)
+        print("Progressive Unfreezing Schedule")
+        print("=" * 70)
+        for schedule in unfreeze_schedule:
+            print(f"Stage {schedule['stage']}: {schedule['description']}")
+            print(f"  └─ Recommended LR multiplier: {schedule['recommended_lr']:.2e}")
+        print("=" * 70 + "\n")
+        
+        return unfreeze_schedule
+    
+    def get_training_summary(self):
+        """
+        Get a summary of the transfer learning training history.
+        
+        Returns
+        -------
+        dict
+            Dictionary with training phase information.
+        """
+        return {
+            'total_phases': len(self.training_history['phases']),
+            'phases': self.training_history['phases'],
+            'frozen_layer_progression': self.training_history['frozen_counts']
+        }
+
+
+def freeze_layers(model, layer_indices):
+    """
+    Freeze specific layers by their indices.
+    
+    Parameters
+    ----------
+    model : object
+        Model with a frozen_layers attribute (set of frozen layer indices).
+    layer_indices : list of int
+        Indices of layers to freeze.
+    
+    Examples
+    --------
+    >>> from pydeepflow.transfer_learning import freeze_layers
+    >>> freeze_layers(vgg16, [0, 1, 2, 3, 4])  # Freeze first conv block
+    """
+    if not hasattr(model, 'frozen_layers'):
+        model.frozen_layers = set()
+    
+    for idx in layer_indices:
+        model.frozen_layers.add(idx)
+    
+    print(f"Froze {len(layer_indices)} layers. Total frozen: {len(model.frozen_layers)}")
+
+
+def unfreeze_layers(model, layer_indices=None):
+    """
+    Unfreeze specific layers or all layers.
+    
+    Parameters
+    ----------
+    model : object
+        Model with a frozen_layers attribute.
+    layer_indices : list of int or None, optional
+        Indices of layers to unfreeze. If None, unfreezes all layers.
+    
+    Examples
+    --------
+    >>> from pydeepflow.transfer_learning import unfreeze_layers
+    >>> unfreeze_layers(vgg16, [10, 11, 12])  # Unfreeze specific layers
+    >>> unfreeze_layers(vgg16)  # Unfreeze all layers
+    """
+    if not hasattr(model, 'frozen_layers'):
+        print("No frozen layers found.")
+        return
+    
+    if layer_indices is None:
+        # Unfreeze all
+        model.frozen_layers.clear()
+        print("Unfroze all layers.")
+    else:
+        for idx in layer_indices:
+            if idx in model.frozen_layers:
+                model.frozen_layers.remove(idx)
+        print(f"Unfroze {len(layer_indices)} layers. Total frozen: {len(model.frozen_layers)}")
+
+
+def get_layer_info(model):
+    """
+    Get information about all layers in the model.
+    
+    Parameters
+    ----------
+    model : object
+        Model with layers attribute.
+    
+    Returns
+    -------
+    list of dict
+        List of dictionaries containing layer information:
+        - 'index': Layer index
+        - 'type': Layer type (ConvLayer, Dense, MaxPooling2D, etc.)
+        - 'trainable': Whether layer is trainable (not frozen)
+        - 'params': Number of parameters
+    
+    Examples
+    --------
+    >>> from pydeepflow.transfer_learning import get_layer_info
+    >>> layer_info = get_layer_info(vgg16)
+    >>> for info in layer_info:
+    ...     print(f"Layer {info['index']}: {info['type']} - Trainable: {info['trainable']}")
+    """
+    if not hasattr(model, 'layers'):
+        raise AttributeError("Model does not have 'layers' attribute")
+    
+    frozen = getattr(model, 'frozen_layers', set())
+    layer_info = []
+    
+    for i, layer in enumerate(model.layers):
+        info = {
+            'index': i,
+            'trainable': i not in frozen
+        }
+        
+        if isinstance(layer, ConvLayer):
+            info['type'] = 'ConvLayer'
+            params = (layer.Fh * layer.Fw * layer.in_channels * layer.out_channels) + layer.out_channels
+            info['params'] = params
+        elif isinstance(layer, dict) and 'W' in layer:
+            info['type'] = 'Dense'
+            params = np.prod(layer['W'].shape) + np.prod(layer['b'].shape)
+            info['params'] = params
+        else:
+            info['type'] = type(layer).__name__
+            info['params'] = 0
+        
+        layer_info.append(info)
+    
+    return layer_info
+
+
+def calculate_trainable_params(model):
+    """
+    Calculate the number of trainable parameters in the model.
+    
+    Parameters
+    ----------
+    model : object
+        Model to analyze.
+    
+    Returns
+    -------
+    dict
+        Dictionary with:
+        - 'total': Total number of parameters
+        - 'trainable': Number of trainable parameters
+        - 'frozen': Number of frozen parameters
+        - 'trainable_ratio': Ratio of trainable to total parameters
+    
+    Examples
+    --------
+    >>> from pydeepflow.transfer_learning import calculate_trainable_params
+    >>> params = calculate_trainable_params(vgg16)
+    >>> print(f"Trainable: {params['trainable']:,} / {params['total']:,}")
+    """
+    layer_info = get_layer_info(model)
+    
+    total_params = sum(info['params'] for info in layer_info)
+    trainable_params = sum(info['params'] for info in layer_info if info['trainable'])
+    frozen_params = total_params - trainable_params
+    
+    return {
+        'total': total_params,
+        'trainable': trainable_params,
+        'frozen': frozen_params,
+        'trainable_ratio': trainable_params / total_params if total_params > 0 else 0
+    }
+
+
+def print_transfer_learning_guide():
+    """
+    Print a comprehensive guide for transfer learning with PyDeepFlow.
+    
+    This function displays best practices and recommended strategies
+    for successful transfer learning.
+    """
+    guide = """
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                   PyDeepFlow Transfer Learning Guide                         ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+📚 RECOMMENDED WORKFLOW
+─────────────────────────────────────────────────────────────────────────────────
+
+Phase 1: Feature Extraction (Recommended duration: 10-20 epochs)
+  • Freeze all convolutional layers
+  • Train only the classifier (FC layers)
+  • Use higher learning rate (1e-3 to 1e-2)
+  • Goal: Adapt classifier to your dataset
+  
+  ```python
+  vgg = VGG16(num_classes=10, freeze_features=True)
+  # Train with LR = 1e-2
+  ```
+
+Phase 2: Fine-Tuning (Recommended duration: 10-30 epochs)
+  • Unfreeze last conv block or last N layers
+  • Train with lower learning rate (1e-4 to 1e-3)
+  • Use learning rate decay
+  • Goal: Adapt high-level features to your domain
+  
+  ```python
+  vgg.unfreeze_layers(num_layers=3)  # Unfreeze last block
+  # Train with LR = 1e-3
+  ```
+
+Phase 3: Full Fine-Tuning (Optional, 5-15 epochs)
+  • Unfreeze all layers
+  • Train with very low learning rate (1e-5 to 1e-4)
+  • Monitor for overfitting
+  • Goal: Full adaptation to your specific task
+  
+  ```python
+  vgg.unfreeze_layers()  # Unfreeze all
+  # Train with LR = 1e-4
+  ```
+
+🎯 KEY RECOMMENDATIONS
+─────────────────────────────────────────────────────────────────────────────────
+
+1. Dataset Size Guidelines:
+   • Small (<1000 samples): Feature extraction only
+   • Medium (1k-10k): Feature extraction + fine-tune last block
+   • Large (>10k): Feature extraction + full fine-tuning
+
+2. Learning Rate Strategy:
+   • Start high for frozen features (1e-2)
+   • Reduce by 10x when unfreezing layers (1e-3)
+   • Use learning rate decay/scheduling
+
+3. Regularization:
+   • Use dropout (0.3-0.5) in FC layers
+   • Apply data augmentation aggressively
+   • Consider L2 regularization (1e-4 to 1e-5)
+
+4. Batch Size:
+   • Larger batches (32-64) for stable training
+   • Reduce if memory constrained
+
+5. Monitoring:
+   • Watch validation loss carefully
+   • Stop if validation loss increases
+   • Save checkpoints frequently
+
+⚠️  COMMON PITFALLS
+─────────────────────────────────────────────────────────────────────────────────
+
+✗ Using same learning rate for all phases
+✗ Unfreezing too early (before classifier converges)
+✗ Not using data augmentation
+✗ Training for too many epochs (overfitting)
+✗ Forgetting to reduce learning rate when unfreezing
+
+✓ Follow the phased approach above
+✓ Monitor metrics closely
+✓ Use early stopping
+✓ Experiment with different unfreezing strategies
+
+╚══════════════════════════════════════════════════════════════════════════════╝
+"""
+    print(guide)
+
+
+# Export all public functions
+__all__ = [
+    'TransferLearningManager',
+    'freeze_layers',
+    'unfreeze_layers',
+    'get_layer_info',
+    'calculate_trainable_params',
+    'print_transfer_learning_guide'
+]