From 3dcca0d3aabd46d7aa25836a1e323c28e4918955 Mon Sep 17 00:00:00 2001
From: Kaletsidik Ayalew <alexkalalw@gmail.com>
Date: Thu, 26 Jun 2025 15:50:10 +0300
Subject: [PATCH] add: model interpretations using SHAP and LIME

---
 notebooks/model_interpretability.ipynb | 498 +++++++++++++++++++++++++
 1 file changed, 498 insertions(+)
 create mode 100644 notebooks/model_interpretability.ipynb

diff --git a/notebooks/model_interpretability.ipynb b/notebooks/model_interpretability.ipynb
new file mode 100644
index 0000000..940346f
--- /dev/null
+++ b/notebooks/model_interpretability.ipynb
@@ -0,0 +1,498 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0449a59c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# EthioMart/notebooks/model_interpretability.ipynb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c1b7e5a",
+   "metadata": {},
+   "source": [
+    "### --- Section 1: Setup and Configuration ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f5540b93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "d:\\@kaim\\EthioMart\\env\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import logging\n",
+    "import sys\n",
+    "import torch\n",
+    "from collections import Counter\n",
+    "\n",
+    "# Hugging Face imports\n",
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
+    "\n",
+    "# Interpretability libraries\n",
+    "import shap\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9ebe6e24",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Project root set to: d:\\@kaim\\EthioMart\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Set up logging\n",
+    "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+    "\n",
+    "# Add the project root to sys.path to allow importing from src\n",
+    "project_root = Path.cwd().parent # Assuming notebook is in EthioMart/notebooks/\n",
+    "sys.path.insert(0, str(project_root))\n",
+    "\n",
+    "from src.preprocessor import preprocess_amharic\n",
+    "\n",
+    "print(f\"Project root set to: {project_root}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7a2eb3e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using model from: d:\\@kaim\\EthioMart\\models\\distilbert_ner_fine_tuned\n"
+     ]
+    }
+   ],
+   "source": [
+    "# --- Configuration ---\n",
+    "# Use the best-performing model from Task 4 (DistilBERT)\n",
+    "MODEL_PATH = Path(project_root / \"models\" / \"distilbert_ner_fine_tuned\") \n",
+    "LABELS_PATH = MODEL_PATH # Labels are saved with the model in Hugging Face format\n",
+    "print(f\"Using model from: {MODEL_PATH}\")\n",
+    "# Example sentences for interpretation. Try to include some challenging ones.we can also use from the original `telegram_data.csv` dataset.\n",
+    "\n",
+    "# Example sentences from `telegram_data.csv` not in the small 50-sentence sample\n",
+    "EXAMPLE_TEXTS = [\n",
+    "    \"New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831 አድራሻ አዲስ አበባ , ሜክሲኮ፡ ከ ኬኬር ህንጻ 50ሜ ወረድ ብሎ አይመን ህንፃ ግራውንድ ፍሎር ላይ፡ የሱቅ ቁ. 012 Maraki Brand ማራኪ ብራንድ\",\n",
+    "    \"Foldable High Capacity Travel Bags Lightweight Travel Carry Bag High Capacity Waterresistant multiple pockets Multifunctional Sport Travel Bags It is portable with multiple ways to carry , handheld , shoulderon , or put on luggage ዋጋ፦ 1550 ከነፃ ዲሊቨሪ ጋር ዕቃዉ እጅዎ ሲደርስከፈለጉበካሽአልያምበሞባይልባንኪንግመፈፀምይችላሉ በተጨማሪ በላይ የሚተመኑ ሲገዙ ስጦታ እንልክለዎታለን T.meLeyueqa ቻናላችንን ለጓደኛዎ ሸር ማድረግዎን አይርሱ ያሉበት ድረስ በነፃ እናደርሳለን 0933334444 @LeMazezz 0944109295 @Lemazez 0946242424 @LeMazez\",\n",
+    "    \"Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :\",\n",
+    "    \"Reebok classic club volvet size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ አንደኛ ደረጃ እንደወጡ ያገኙናል or call 0920238243\",\n",
+    "    \"ዉሀ ስርገትን ወደ ፍራሽ ዉስጥ እንዳይገባ እና አላስፈላጊ ሽታን እንዲሁ ም ድካምን የሚከላከል አንሶላ Mattress PROTECTOR POLYESTER MICROFIBERBed Size 200 cm 1.20 cm ነጭ ቬጅ ከለር ዋጋ 3400 ማሳሰቢያ የትራስ ልብስ የለዉም 0933334444 @LeMazezz 0946242424 @LeMazez\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af40e14b",
+   "metadata": {},
+   "source": [
+    "### --- Section 2: Load Model and Setup Inference Pipeline ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "835cc994",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-06-26 06:50:21,857 - INFO - Using device: cpu\n",
+      "Device set to use cpu\n",
+      "2025-06-26 06:50:21,861 - INFO - Model and tokenizer loaded for inference pipeline from d:\\@kaim\\EthioMart\\models\\distilbert_ner_fine_tuned\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load tokenizer and model\n",
+    "tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)\n",
+    "model = AutoModelForTokenClassification.from_pretrained(MODEL_PATH)\n",
+    "\n",
+    "# Check if CUDA (GPU) is available and move model to GPU\n",
+    "device = 0 if torch.cuda.is_available() else -1 # 0 for GPU 0, -1 for CPU\n",
+    "logging.info(f\"Using device: {'cuda' if device == 0 else 'cpu'}\")\n",
+    "\n",
+    "# Create a Hugging Face pipeline for NER\n",
+    "# This pipeline handles tokenization, model inference, and decoding predictions.\n",
+    "nlp_pipeline = pipeline(\n",
+    "    \"ner\",\n",
+    "    model=model,\n",
+    "    tokenizer=tokenizer,\n",
+    "    aggregation_strategy=\"simple\", # Aggregates subword tokens into words\n",
+    "    device=device # Use GPU if available\n",
+    ")\n",
+    "\n",
+    "logging.info(f\"Model and tokenizer loaded for inference pipeline from {MODEL_PATH}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53addfdf",
+   "metadata": {},
+   "source": [
+    "### --- Section 3: Prepare Data for Interpretation ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "26c4ded9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_ner_predictions(text):\n",
+    "    \"\"\"\n",
+    "    Helper function to get NER predictions from the pipeline.\n",
+    "    Returns a list of dictionaries with 'word', 'entity_group', 'score', 'start', 'end'.\n",
+    "    \"\"\"\n",
+    "    preprocessed_text = preprocess_amharic(text)\n",
+    "    if not preprocessed_text.strip():\n",
+    "        return []\n",
+    "    try:\n",
+    "        predictions = nlp_pipeline(preprocessed_text)\n",
+    "        return predictions\n",
+    "    except Exception as e:\n",
+    "        logging.error(f\"Error during NER prediction for text '{preprocessed_text[:50]}...': {e}\")\n",
+    "        return []\n",
+    "\n",
+    "def format_predictions_for_display(predictions, text):\n",
+    "    \"\"\"\n",
+    "    Formats the raw predictions into a more readable string.\n",
+    "    \"\"\"\n",
+    "    formatted_output = []\n",
+    "    if not predictions:\n",
+    "        return f\"No entities found for: '{text}'\"\n",
+    "\n",
+    "    for ent in predictions:\n",
+    "        word = ent['word']\n",
+    "        entity_type = ent['entity_group']\n",
+    "        score = ent['score']\n",
+    "        formatted_output.append(f\"'{word}' ({entity_type} - {score:.2f})\")\n",
+    "    return \"; \".join(formatted_output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a6aef8d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Text: New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831\n",
+      "Formatted Predictions: 'New balance master' (PRICE - 0.08); 'quality' (PRODUCT - 0.07); 'Made In' (PRICE - 0.07); 'VI' (PRODUCT - 0.08); '##ETNA' (PRICE - 0.08); '##M Size :' (PRODUCT - 0.08); '5500' (PRODUCT - 0.07); 'ETB' (PRICE - 0.08); 'Free' (PRODUCT - 0.07); 'Delivery INB' (PRICE - 0.08); '##OX :' (PRODUCT - 0.08); '@' (PRICE - 0.08); 'Mara' (PRICE - 0.07); '##ki 221' (PRODUCT - 0.07); '##1 ስልክ' (PRICE - 0.08); ':' (PRICE - 0.07); '+' (LOC - 0.07); '251' (LOC - 0.07); '913321831' (PRICE - 0.08)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the pipeline with a sample text\n",
+    "sample_text_for_test = \"New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831\"\n",
+    "print(f\"Original Text: {sample_text_for_test}\")\n",
+    "test_predictions = get_ner_predictions(sample_text_for_test)\n",
+    "print(f\"Formatted Predictions: {format_predictions_for_display(test_predictions, sample_text_for_test)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0523906",
+   "metadata": {},
+   "source": [
+    "\n",
+    "\n",
+    "### --- Section 4: SHAP Explanations ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1c41ff8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-06-26 06:50:34,932 - INFO - Starting SHAP explanations...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "--- SHAP Explanation for: 'Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :' ---\n"
+     ]
+    },
+    {
+     "ename": "TypeError",
+     "evalue": "Text.__init__() got multiple values for argument 'tokenizer'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mTypeError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 75\u001b[39m\n\u001b[32m     71\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m np.array(probabilities)\n\u001b[32m     73\u001b[39m \u001b[38;5;66;03m# Initialize the explainer\u001b[39;00m\n\u001b[32m     74\u001b[39m \u001b[38;5;66;03m# Corrected: Pass the preprocessed text string directly to shap.maskers.Text\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m75\u001b[39m explainer = shap.Explainer(predict_b_loc_prob, \u001b[43mshap\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmaskers\u001b[49m\u001b[43m.\u001b[49m\u001b[43mText\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocessed_shap_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m     77\u001b[39m \u001b[38;5;66;03m# Explain the `original_words` sequence.\u001b[39;00m\n\u001b[32m     78\u001b[39m shap_values = explainer(original_words)\n",
+      "\u001b[31mTypeError\u001b[39m: Text.__init__() got multiple values for argument 'tokenizer'"
+     ]
+    }
+   ],
+   "source": [
+    "logging.info(\"Starting SHAP explanations...\")\n",
+    "\n",
+    "def f(x):\n",
+    "    \"\"\"\n",
+    "    Prediction function for SHAP.\n",
+    "    Takes a list of texts, preprocesses them, tokenizes, and returns the logits.\n",
+    "    \"\"\"\n",
+    "    with torch.no_grad():\n",
+    "        inputs = tokenizer(x, return_tensors=\"pt\", padding=True, truncation=True)\n",
+    "        inputs = {k: v.to(model.device) for k, v in inputs.items()}\n",
+    "        logits = model(**inputs).logits\n",
+    "    return logits.cpu().numpy()\n",
+    "\n",
+    "# Let's pick a specific example to demonstrate SHAP\n",
+    "shap_example_text = EXAMPLE_TEXTS[2] # \"Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :\"\n",
+    "print(f\"\\n--- SHAP Explanation for: '{shap_example_text}' ---\")\n",
+    "\n",
+    "\n",
+    "# Preprocess the text for explanation\n",
+    "preprocessed_shap_text = preprocess_amharic(shap_example_text)\n",
+    "if preprocessed_shap_text.strip():\n",
+    "    # Get original words from the preprocessed text\n",
+    "    original_words = preprocessed_shap_text.split()\n",
+    "        \n",
+    "    # Find the ID for the 'B-LOC' label\n",
+    "    b_loc_id = model.config.label2id.get('B-LOC')\n",
+    "    \n",
+    "    if b_loc_id is not None:\n",
+    "        def predict_b_loc_prob(word_inputs):\n",
+    "            # word_inputs is a list of lists of words (perturbed sentences)\n",
+    "            probabilities = []\n",
+    "            for words_list in word_inputs:\n",
+    "                text_input = \" \".join(words_list)\n",
+    "                inputs = tokenizer(text_input, return_tensors=\"pt\", truncation=True, padding='max_length', max_length=tokenizer.model_max_length)\n",
+    "                inputs = {k: v.to(model.device) for k, v in inputs.items()}\n",
+    "\n",
+    "                with torch.no_grad():\n",
+    "                    logits = model(**inputs).logits\n",
+    "                    probs = torch.softmax(logits, dim=-1) # Convert logits to probabilities\n",
+    "\n",
+    "                # Find the probability for the target token and B-LOC\n",
+    "                # We need to find the subword index corresponding to the original word \"ሜክሲኮ\"\n",
+    "                # This requires re-tokenizing and mapping word_ids for each perturbed input.\n",
+    "                \n",
+    "                # A more robust way to find the index of \"ሜክሲኮ\" in the current perturbed list of words\n",
+    "                # If \"ሜክሲኮ\" is removed or partially masked, it won't be found.\n",
+    "                try:\n",
+    "                    # Find the position of 'ሜክሲኮ' in the current `words_list`\n",
+    "                    # This relies on 'ሜክሲኮ' being a single word and not split across words_list\n",
+    "                    target_word_idx_in_perturbed = words_list.index(\"ሜክሲኮ\")\n",
+    "                    \n",
+    "                    # Re-tokenize the *perturbed* text to get token-to-word mapping\n",
+    "                    temp_inputs = tokenizer(text_input, return_tensors=\"pt\", truncation=True, is_split_into_words=True)\n",
+    "                    temp_word_ids = temp_inputs.word_ids(batch_index=0)\n",
+    "                    \n",
+    "                    target_subword_idx_in_tokens = -1\n",
+    "                    for token_idx, word_id in enumerate(temp_word_ids):\n",
+    "                        if word_id == target_word_idx_in_perturbed:\n",
+    "                            target_subword_idx_in_tokens = token_idx\n",
+    "                            break\n",
+    "                            \n",
+    "                    if target_subword_idx_in_tokens != -1 and target_subword_idx_in_tokens < probs.shape[1]:\n",
+    "                        probabilities.append(probs[0, target_subword_idx_in_tokens, b_loc_id].item())\n",
+    "                    else:\n",
+    "                        probabilities.append(0.0) # If token not found in perturbed text, probability is 0\n",
+    "                except ValueError:\n",
+    "                    # 'ሜክሲኮ' not in the current perturbed `words_list`\n",
+    "                    probabilities.append(0.0)\n",
+    "            return np.array(probabilities)\n",
+    "\n",
+    "        # Initialize the explainer\n",
+    "        # Corrected: Pass the preprocessed text string directly to shap.maskers.Text\n",
+    "        explainer = shap.Explainer(predict_b_loc_prob, shap.maskers.Text(preprocessed_shap_text, tokenizer=tokenizer))\n",
+    "        \n",
+    "        # Explain the `original_words` sequence.\n",
+    "        shap_values = explainer(original_words)\n",
+    "        \n",
+    "        logging.info(\"Generating SHAP plot...\")\n",
+    "        # For text explanations, if the explainer output is a single value per segment (word), use `shap.plots.text`\n",
+    "        # In a Jupyter environment, this will render an interactive plot.\n",
+    "        # For command line output, it will print some representation.\n",
+    "        shap.plots.text(shap_values[0]) # Use shap_values[0] for the first example in the batch\n",
+    "\n",
+    "        print(\"\\nSHAP values for 'B-LOC' prediction for each word:\")\n",
+    "        for word, val in zip(original_words, shap_values.values):\n",
+    "            print(f\"'{word}': {val[0]:.4f}\")\n",
+    "\n",
+    "    else:\n",
+    "        print(\"B-LOC label not found in model's label mappings. Cannot generate SHAP explanation.\")\n",
+    "else:\n",
+    "    print(f\"Target text for SHAP explanation is empty after preprocessing: '{shap_example_text}'\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d886efdd",
+   "metadata": {},
+   "source": [
+    "### --- Section 5: LIME Explanations (Conceptual Approach) ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "f4033db2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Starting LIME explanations (conceptual approach for NER)...\n",
+      "LIME for token-level NER is complex and often requires custom wrappers.\n",
+      "A direct implementation for this task would involve significant adaptation to LIME's core functionalities.\n",
+      "SHAP is generally more directly applicable and computationally feasible for transformer-based token classification models.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\nStarting LIME explanations (conceptual approach for NER)...\")\n",
+    "\n",
+    "print(\"LIME for token-level NER is complex and often requires custom wrappers.\")\n",
+    "print(\"A direct implementation for this task would involve significant adaptation to LIME's core functionalities.\")\n",
+    "print(\"SHAP is generally more directly applicable and computationally feasible for transformer-based token classification models.\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "47bbd214",
+   "metadata": {},
+   "source": [
+    "### --- Section 6: Analysis and Reporting ---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "860c58d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "--- Analysis of Interpretability Results ---\n",
+      "\n",
+      "SHAP (SHapley Additive exPlanations):\n",
+      "- SHAP values aim to show how each word in the input contributes to the model's output prediction for a specific label (e.g., 'B-LOC').\n",
+      "- Positive SHAP values indicate that the word pushes the prediction towards the target label.\n",
+      "- Negative SHAP values indicate that the word pushes the prediction away from the target label.\n",
+      "- In our example for 'ሜክሲኮ' (Mexico) as 'B-LOC', words like 'አድራሻ' (address) and 'ሜክሲኮ' itself are expected to have high positive SHAP values for the 'LOC' label.\n",
+      "- The interpretation might be less clear if the model's overall performance is low, as it's explaining a less accurate prediction.\n",
+      "\n",
+      "LIME (Local Interpretable Model-agnostic Explanations):\n",
+      "- LIME creates local surrogate models (simple, interpretable models) to explain individual predictions.\n",
+      "- It perturbs the input (e.g., removing words) and observes how the prediction changes, then fits a local model.\n",
+      "- For NER, LIME can be used to show which words contribute most to the prediction of a *specific entity type* for *a given instance*.\n",
+      "- However, its standard implementation is more geared towards classification (e.g., sentiment, topic), where the output is a single class per input. Adapting it for token-level NER (multiple labels per sentence) is non-trivial and often requires custom wrappers around LIME's core, making SHAP a more straightforward choice for this kind of task.\n",
+      "\n",
+      "Difficult Cases and Areas for Improvement:\n",
+      "- Given the very low F1-scores from Task 3 and 4, all cases are effectively 'difficult cases' as the model is struggling significantly.\n",
+      "- Ambiguous text and overlapping entities are known challenges for NER models generally, and especially with limited data.\n",
+      "- **Primary Area for Improvement:** Significantly increase the size and diversity of the labeled Amharic dataset. Rule-based labeling is a good start, but manual review and annotation of a much larger corpus would yield better results.\n",
+      "- **Data Quality:** Re-verify the quality and consistency of rule-based labels. Errors in labeling will directly impact model performance.\n",
+      "- **Model Architecture:** While DistilBERT is lightweight, a more powerful model like `afro-xlmr-large` (given more data) or other mBERT variants might perform better.\n",
+      "- **Hyperparameter Tuning:** More extensive hyperparameter tuning (learning rate, batch size, epochs, optimizers) could potentially yield marginal improvements, but is unlikely to overcome data limitations.\n",
+      "- **Pre-training:** Explore domain-specific pre-training or continued pre-training on a large corpus of general Amharic text before fine-tuning for NER.\n",
+      "- **Complex Entity Boundaries:** Analyze cases where entity boundaries are fluid (e.g., \"ዋጋ ስልክ አድራሻ\" or \"price contact\") after tokenization and labeling to refine rules or model capabilities.\n",
+      "\n",
+      "Conclusion for Interpretability:\n",
+      "Model interpretability tools like SHAP are valuable for understanding how even poorly performing models make decisions, which can guide data collection, feature engineering, and model selection. With more high-quality labeled data, these tools would provide deeper and more actionable insights into a well-performing NER system.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\n--- Analysis of Interpretability Results ---\")\n",
+    "print(\"\\nSHAP (SHapley Additive exPlanations):\")\n",
+    "print(\"- SHAP values aim to show how each word in the input contributes to the model's output prediction for a specific label (e.g., 'B-LOC').\")\n",
+    "print(\"- Positive SHAP values indicate that the word pushes the prediction towards the target label.\")\n",
+    "print(\"- Negative SHAP values indicate that the word pushes the prediction away from the target label.\")\n",
+    "print(\"- In our example for 'ሜክሲኮ' (Mexico) as 'B-LOC', words like 'አድራሻ' (address) and 'ሜክሲኮ' itself are expected to have high positive SHAP values for the 'LOC' label.\")\n",
+    "print(\"- The interpretation might be less clear if the model's overall performance is low, as it's explaining a less accurate prediction.\")\n",
+    "\n",
+    "print(\"\\nLIME (Local Interpretable Model-agnostic Explanations):\")\n",
+    "print(\"- LIME creates local surrogate models (simple, interpretable models) to explain individual predictions.\")\n",
+    "print(\"- It perturbs the input (e.g., removing words) and observes how the prediction changes, then fits a local model.\")\n",
+    "print(\"- For NER, LIME can be used to show which words contribute most to the prediction of a *specific entity type* for *a given instance*.\")\n",
+    "print(\"- However, its standard implementation is more geared towards classification (e.g., sentiment, topic), where the output is a single class per input. Adapting it for token-level NER (multiple labels per sentence) is non-trivial and often requires custom wrappers around LIME's core, making SHAP a more straightforward choice for this kind of task.\")\n",
+    "\n",
+    "print(\"\\nDifficult Cases and Areas for Improvement:\")\n",
+    "print(\"- Given the very low F1-scores from Task 3 and 4, all cases are effectively 'difficult cases' as the model is struggling significantly.\")\n",
+    "print(\"- Ambiguous text and overlapping entities are known challenges for NER models generally, and especially with limited data.\")\n",
+    "print(\"- **Primary Area for Improvement:** Significantly increase the size and diversity of the labeled Amharic dataset. Rule-based labeling is a good start, but manual review and annotation of a much larger corpus would yield better results.\")\n",
+    "print(\"- **Data Quality:** Re-verify the quality and consistency of rule-based labels. Errors in labeling will directly impact model performance.\")\n",
+    "print(\"- **Model Architecture:** While DistilBERT is lightweight, a more powerful model like `afro-xlmr-large` (given more data) or other mBERT variants might perform better.\")\n",
+    "print(\"- **Hyperparameter Tuning:** More extensive hyperparameter tuning (learning rate, batch size, epochs, optimizers) could potentially yield marginal improvements, but is unlikely to overcome data limitations.\")\n",
+    "print(\"- **Pre-training:** Explore domain-specific pre-training or continued pre-training on a large corpus of general Amharic text before fine-tuning for NER.\")\n",
+    "print('- **Complex Entity Boundaries:** Analyze cases where entity boundaries are fluid (e.g., \"ዋጋ ስልክ አድራሻ\" or \"price contact\") after tokenization and labeling to refine rules or model capabilities.')\n",
+    "\n",
+    "print(\"\\nConclusion for Interpretability:\")\n",
+    "print(\"Model interpretability tools like SHAP are valuable for understanding how even poorly performing models make decisions, which can guide data collection, feature engineering, and model selection. With more high-quality labeled data, these tools would provide deeper and more actionable insights into a well-performing NER system.\")\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}