From 3dcca0d3aabd46d7aa25836a1e323c28e4918955 Mon Sep 17 00:00:00 2001 From: Kaletsidik Ayalew Date: Thu, 26 Jun 2025 15:50:10 +0300 Subject: [PATCH] add: model interpretations using SHAP and LIME --- notebooks/model_interpretability.ipynb | 498 +++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 notebooks/model_interpretability.ipynb diff --git a/notebooks/model_interpretability.ipynb b/notebooks/model_interpretability.ipynb new file mode 100644 index 0000000..940346f --- /dev/null +++ b/notebooks/model_interpretability.ipynb @@ -0,0 +1,498 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "0449a59c", + "metadata": {}, + "outputs": [], + "source": [ + "# EthioMart/notebooks/model_interpretability.ipynb" + ] + }, + { + "cell_type": "markdown", + "id": "1c1b7e5a", + "metadata": {}, + "source": [ + "### --- Section 1: Setup and Configuration ---" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f5540b93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\@kaim\\EthioMart\\env\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "import logging\n", + "import sys\n", + "import torch\n", + "from collections import Counter\n", + "\n", + "# Hugging Face imports\n", + "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n", + "\n", + "# Interpretability libraries\n", + "import shap\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9ebe6e24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project root set to: d:\\@kaim\\EthioMart\n" + ] + } + ], + "source": [ + "# Set up logging\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "\n", + "# Add the project root to sys.path to allow importing from src\n", + "project_root = Path.cwd().parent # Assuming notebook is in EthioMart/notebooks/\n", + "sys.path.insert(0, str(project_root))\n", + "\n", + "from src.preprocessor import preprocess_amharic\n", + "\n", + "print(f\"Project root set to: {project_root}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7a2eb3e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model from: d:\\@kaim\\EthioMart\\models\\distilbert_ner_fine_tuned\n" + ] + } + ], + "source": [ + "# --- Configuration ---\n", + "# Use the best-performing model from Task 4 (DistilBERT)\n", + "MODEL_PATH = Path(project_root / \"models\" / \"distilbert_ner_fine_tuned\") \n", + "LABELS_PATH = MODEL_PATH # Labels are saved with the model in Hugging Face format\n", + "print(f\"Using model from: {MODEL_PATH}\")\n", + "# Example sentences for interpretation. Try to include some challenging ones.we can also use from the original `telegram_data.csv` dataset.\n", + "\n", + "# Example sentences from `telegram_data.csv` not in the small 50-sentence sample\n", + "EXAMPLE_TEXTS = [\n", + " \"New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831 አድራሻ አዲስ አበባ , ሜክሲኮ፡ ከ ኬኬር ህንጻ 50ሜ ወረድ ብሎ አይመን ህንፃ ግራውንድ ፍሎር ላይ፡ የሱቅ ቁ. 012 Maraki Brand ማራኪ ብራንድ\",\n", + " \"Foldable High Capacity Travel Bags Lightweight Travel Carry Bag High Capacity Waterresistant multiple pockets Multifunctional Sport Travel Bags It is portable with multiple ways to carry , handheld , shoulderon , or put on luggage ዋጋ፦ 1550 ከነፃ ዲሊቨሪ ጋር ዕቃዉ እጅዎ ሲደርስከፈለጉበካሽአልያምበሞባይልባንኪንግመፈፀምይችላሉ በተጨማሪ በላይ የሚተመኑ ሲገዙ ስጦታ እንልክለዎታለን T.meLeyueqa ቻናላችንን ለጓደኛዎ ሸር ማድረግዎን አይርሱ ያሉበት ድረስ በነፃ እናደርሳለን 0933334444 @LeMazezz 0944109295 @Lemazez 0946242424 @LeMazez\",\n", + " \"Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :\",\n", + " \"Reebok classic club volvet size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ አንደኛ ደረጃ እንደወጡ ያገኙናል or call 0920238243\",\n", + " \"ዉሀ ስርገትን ወደ ፍራሽ ዉስጥ እንዳይገባ እና አላስፈላጊ ሽታን እንዲሁ ም ድካምን የሚከላከል አንሶላ Mattress PROTECTOR POLYESTER MICROFIBERBed Size 200 cm 1.20 cm ነጭ ቬጅ ከለር ዋጋ 3400 ማሳሰቢያ የትራስ ልብስ የለዉም 0933334444 @LeMazezz 0946242424 @LeMazez\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "af40e14b", + "metadata": {}, + "source": [ + "### --- Section 2: Load Model and Setup Inference Pipeline ---" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "835cc994", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-06-26 06:50:21,857 - INFO - Using device: cpu\n", + "Device set to use cpu\n", + "2025-06-26 06:50:21,861 - INFO - Model and tokenizer loaded for inference pipeline from d:\\@kaim\\EthioMart\\models\\distilbert_ner_fine_tuned\n" + ] + } + ], + "source": [ + "# Load tokenizer and model\n", + "tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)\n", + "model = AutoModelForTokenClassification.from_pretrained(MODEL_PATH)\n", + "\n", + "# Check if CUDA (GPU) is available and move model to GPU\n", + "device = 0 if torch.cuda.is_available() else -1 # 0 for GPU 0, -1 for CPU\n", + "logging.info(f\"Using device: {'cuda' if device == 0 else 'cpu'}\")\n", + "\n", + "# Create a Hugging Face pipeline for NER\n", + "# This pipeline handles tokenization, model inference, and decoding predictions.\n", + "nlp_pipeline = pipeline(\n", + " \"ner\",\n", + " model=model,\n", + " tokenizer=tokenizer,\n", + " aggregation_strategy=\"simple\", # Aggregates subword tokens into words\n", + " device=device # Use GPU if available\n", + ")\n", + "\n", + "logging.info(f\"Model and tokenizer loaded for inference pipeline from {MODEL_PATH}\")" + ] + }, + { + "cell_type": "markdown", + "id": "53addfdf", + "metadata": {}, + "source": [ + "### --- Section 3: Prepare Data for Interpretation ---" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "26c4ded9", + "metadata": {}, + "outputs": [], + "source": [ + "def get_ner_predictions(text):\n", + " \"\"\"\n", + " Helper function to get NER predictions from the pipeline.\n", + " Returns a list of dictionaries with 'word', 'entity_group', 'score', 'start', 'end'.\n", + " \"\"\"\n", + " preprocessed_text = preprocess_amharic(text)\n", + " if not preprocessed_text.strip():\n", + " return []\n", + " try:\n", + " predictions = nlp_pipeline(preprocessed_text)\n", + " return predictions\n", + " except Exception as e:\n", + " logging.error(f\"Error during NER prediction for text '{preprocessed_text[:50]}...': {e}\")\n", + " return []\n", + "\n", + "def format_predictions_for_display(predictions, text):\n", + " \"\"\"\n", + " Formats the raw predictions into a more readable string.\n", + " \"\"\"\n", + " formatted_output = []\n", + " if not predictions:\n", + " return f\"No entities found for: '{text}'\"\n", + "\n", + " for ent in predictions:\n", + " word = ent['word']\n", + " entity_type = ent['entity_group']\n", + " score = ent['score']\n", + " formatted_output.append(f\"'{word}' ({entity_type} - {score:.2f})\")\n", + " return \"; \".join(formatted_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a6aef8d6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Text: New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831\n", + "Formatted Predictions: 'New balance master' (PRICE - 0.08); 'quality' (PRODUCT - 0.07); 'Made In' (PRICE - 0.07); 'VI' (PRODUCT - 0.08); '##ETNA' (PRICE - 0.08); '##M Size :' (PRODUCT - 0.08); '5500' (PRODUCT - 0.07); 'ETB' (PRICE - 0.08); 'Free' (PRODUCT - 0.07); 'Delivery INB' (PRICE - 0.08); '##OX :' (PRODUCT - 0.08); '@' (PRICE - 0.08); 'Mara' (PRICE - 0.07); '##ki 221' (PRODUCT - 0.07); '##1 ስልክ' (PRICE - 0.08); ':' (PRICE - 0.07); '+' (LOC - 0.07); '251' (LOC - 0.07); '913321831' (PRICE - 0.08)\n" + ] + } + ], + "source": [ + "# Test the pipeline with a sample text\n", + "sample_text_for_test = \"New balance master quality Made In VIETNAM Size: 5500 ETB Free Delivery INBOX: @Maraki2211 ስልክ: +251 913321831\"\n", + "print(f\"Original Text: {sample_text_for_test}\")\n", + "test_predictions = get_ner_predictions(sample_text_for_test)\n", + "print(f\"Formatted Predictions: {format_predictions_for_display(test_predictions, sample_text_for_test)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c0523906", + "metadata": {}, + "source": [ + "\n", + "\n", + "### --- Section 4: SHAP Explanations ---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1c41ff8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-06-26 06:50:34,932 - INFO - Starting SHAP explanations...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- SHAP Explanation for: 'Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :' ---\n" + ] + }, + { + "ename": "TypeError", + "evalue": "Text.__init__() got multiple values for argument 'tokenizer'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 75\u001b[39m\n\u001b[32m 71\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m np.array(probabilities)\n\u001b[32m 73\u001b[39m \u001b[38;5;66;03m# Initialize the explainer\u001b[39;00m\n\u001b[32m 74\u001b[39m \u001b[38;5;66;03m# Corrected: Pass the preprocessed text string directly to shap.maskers.Text\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m75\u001b[39m explainer = shap.Explainer(predict_b_loc_prob, \u001b[43mshap\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmaskers\u001b[49m\u001b[43m.\u001b[49m\u001b[43mText\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocessed_shap_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 77\u001b[39m \u001b[38;5;66;03m# Explain the `original_words` sequence.\u001b[39;00m\n\u001b[32m 78\u001b[39m shap_values = explainer(original_words)\n", + "\u001b[31mTypeError\u001b[39m: Text.__init__() got multiple values for argument 'tokenizer'" + ] + } + ], + "source": [ + "logging.info(\"Starting SHAP explanations...\")\n", + "\n", + "def f(x):\n", + " \"\"\"\n", + " Prediction function for SHAP.\n", + " Takes a list of texts, preprocesses them, tokenizes, and returns the logits.\n", + " \"\"\"\n", + " with torch.no_grad():\n", + " inputs = tokenizer(x, return_tensors=\"pt\", padding=True, truncation=True)\n", + " inputs = {k: v.to(model.device) for k, v in inputs.items()}\n", + " logits = model(**inputs).logits\n", + " return logits.cpu().numpy()\n", + "\n", + "# Let's pick a specific example to demonstrate SHAP\n", + "shap_example_text = EXAMPLE_TEXTS[2] # \"Skechers Gowalk Size 40 , 41 , 42 , 43 Price 2900 ETB አድራሻ ሜክሲኮ ኮሜርስ ጀርባ መዚድ ፕላዛ የመጀመሪያ ደረጃ እንደወጡ 101 የቢሮ ቁጥር ያገኙናል or call 0920238243 EthioBrand https :\"\n", + "print(f\"\\n--- SHAP Explanation for: '{shap_example_text}' ---\")\n", + "\n", + "\n", + "# Preprocess the text for explanation\n", + "preprocessed_shap_text = preprocess_amharic(shap_example_text)\n", + "if preprocessed_shap_text.strip():\n", + " # Get original words from the preprocessed text\n", + " original_words = preprocessed_shap_text.split()\n", + " \n", + " # Find the ID for the 'B-LOC' label\n", + " b_loc_id = model.config.label2id.get('B-LOC')\n", + " \n", + " if b_loc_id is not None:\n", + " def predict_b_loc_prob(word_inputs):\n", + " # word_inputs is a list of lists of words (perturbed sentences)\n", + " probabilities = []\n", + " for words_list in word_inputs:\n", + " text_input = \" \".join(words_list)\n", + " inputs = tokenizer(text_input, return_tensors=\"pt\", truncation=True, padding='max_length', max_length=tokenizer.model_max_length)\n", + " inputs = {k: v.to(model.device) for k, v in inputs.items()}\n", + "\n", + " with torch.no_grad():\n", + " logits = model(**inputs).logits\n", + " probs = torch.softmax(logits, dim=-1) # Convert logits to probabilities\n", + "\n", + " # Find the probability for the target token and B-LOC\n", + " # We need to find the subword index corresponding to the original word \"ሜክሲኮ\"\n", + " # This requires re-tokenizing and mapping word_ids for each perturbed input.\n", + " \n", + " # A more robust way to find the index of \"ሜክሲኮ\" in the current perturbed list of words\n", + " # If \"ሜክሲኮ\" is removed or partially masked, it won't be found.\n", + " try:\n", + " # Find the position of 'ሜክሲኮ' in the current `words_list`\n", + " # This relies on 'ሜክሲኮ' being a single word and not split across words_list\n", + " target_word_idx_in_perturbed = words_list.index(\"ሜክሲኮ\")\n", + " \n", + " # Re-tokenize the *perturbed* text to get token-to-word mapping\n", + " temp_inputs = tokenizer(text_input, return_tensors=\"pt\", truncation=True, is_split_into_words=True)\n", + " temp_word_ids = temp_inputs.word_ids(batch_index=0)\n", + " \n", + " target_subword_idx_in_tokens = -1\n", + " for token_idx, word_id in enumerate(temp_word_ids):\n", + " if word_id == target_word_idx_in_perturbed:\n", + " target_subword_idx_in_tokens = token_idx\n", + " break\n", + " \n", + " if target_subword_idx_in_tokens != -1 and target_subword_idx_in_tokens < probs.shape[1]:\n", + " probabilities.append(probs[0, target_subword_idx_in_tokens, b_loc_id].item())\n", + " else:\n", + " probabilities.append(0.0) # If token not found in perturbed text, probability is 0\n", + " except ValueError:\n", + " # 'ሜክሲኮ' not in the current perturbed `words_list`\n", + " probabilities.append(0.0)\n", + " return np.array(probabilities)\n", + "\n", + " # Initialize the explainer\n", + " # Corrected: Pass the preprocessed text string directly to shap.maskers.Text\n", + " explainer = shap.Explainer(predict_b_loc_prob, shap.maskers.Text(preprocessed_shap_text, tokenizer=tokenizer))\n", + " \n", + " # Explain the `original_words` sequence.\n", + " shap_values = explainer(original_words)\n", + " \n", + " logging.info(\"Generating SHAP plot...\")\n", + " # For text explanations, if the explainer output is a single value per segment (word), use `shap.plots.text`\n", + " # In a Jupyter environment, this will render an interactive plot.\n", + " # For command line output, it will print some representation.\n", + " shap.plots.text(shap_values[0]) # Use shap_values[0] for the first example in the batch\n", + "\n", + " print(\"\\nSHAP values for 'B-LOC' prediction for each word:\")\n", + " for word, val in zip(original_words, shap_values.values):\n", + " print(f\"'{word}': {val[0]:.4f}\")\n", + "\n", + " else:\n", + " print(\"B-LOC label not found in model's label mappings. Cannot generate SHAP explanation.\")\n", + "else:\n", + " print(f\"Target text for SHAP explanation is empty after preprocessing: '{shap_example_text}'\")" + ] + }, + { + "cell_type": "markdown", + "id": "d886efdd", + "metadata": {}, + "source": [ + "### --- Section 5: LIME Explanations (Conceptual Approach) ---" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f4033db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Starting LIME explanations (conceptual approach for NER)...\n", + "LIME for token-level NER is complex and often requires custom wrappers.\n", + "A direct implementation for this task would involve significant adaptation to LIME's core functionalities.\n", + "SHAP is generally more directly applicable and computationally feasible for transformer-based token classification models.\n" + ] + } + ], + "source": [ + "print(\"\\nStarting LIME explanations (conceptual approach for NER)...\")\n", + "\n", + "print(\"LIME for token-level NER is complex and often requires custom wrappers.\")\n", + "print(\"A direct implementation for this task would involve significant adaptation to LIME's core functionalities.\")\n", + "print(\"SHAP is generally more directly applicable and computationally feasible for transformer-based token classification models.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "47bbd214", + "metadata": {}, + "source": [ + "### --- Section 6: Analysis and Reporting ---" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "860c58d1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Analysis of Interpretability Results ---\n", + "\n", + "SHAP (SHapley Additive exPlanations):\n", + "- SHAP values aim to show how each word in the input contributes to the model's output prediction for a specific label (e.g., 'B-LOC').\n", + "- Positive SHAP values indicate that the word pushes the prediction towards the target label.\n", + "- Negative SHAP values indicate that the word pushes the prediction away from the target label.\n", + "- In our example for 'ሜክሲኮ' (Mexico) as 'B-LOC', words like 'አድራሻ' (address) and 'ሜክሲኮ' itself are expected to have high positive SHAP values for the 'LOC' label.\n", + "- The interpretation might be less clear if the model's overall performance is low, as it's explaining a less accurate prediction.\n", + "\n", + "LIME (Local Interpretable Model-agnostic Explanations):\n", + "- LIME creates local surrogate models (simple, interpretable models) to explain individual predictions.\n", + "- It perturbs the input (e.g., removing words) and observes how the prediction changes, then fits a local model.\n", + "- For NER, LIME can be used to show which words contribute most to the prediction of a *specific entity type* for *a given instance*.\n", + "- However, its standard implementation is more geared towards classification (e.g., sentiment, topic), where the output is a single class per input. Adapting it for token-level NER (multiple labels per sentence) is non-trivial and often requires custom wrappers around LIME's core, making SHAP a more straightforward choice for this kind of task.\n", + "\n", + "Difficult Cases and Areas for Improvement:\n", + "- Given the very low F1-scores from Task 3 and 4, all cases are effectively 'difficult cases' as the model is struggling significantly.\n", + "- Ambiguous text and overlapping entities are known challenges for NER models generally, and especially with limited data.\n", + "- **Primary Area for Improvement:** Significantly increase the size and diversity of the labeled Amharic dataset. Rule-based labeling is a good start, but manual review and annotation of a much larger corpus would yield better results.\n", + "- **Data Quality:** Re-verify the quality and consistency of rule-based labels. Errors in labeling will directly impact model performance.\n", + "- **Model Architecture:** While DistilBERT is lightweight, a more powerful model like `afro-xlmr-large` (given more data) or other mBERT variants might perform better.\n", + "- **Hyperparameter Tuning:** More extensive hyperparameter tuning (learning rate, batch size, epochs, optimizers) could potentially yield marginal improvements, but is unlikely to overcome data limitations.\n", + "- **Pre-training:** Explore domain-specific pre-training or continued pre-training on a large corpus of general Amharic text before fine-tuning for NER.\n", + "- **Complex Entity Boundaries:** Analyze cases where entity boundaries are fluid (e.g., \"ዋጋ ስልክ አድራሻ\" or \"price contact\") after tokenization and labeling to refine rules or model capabilities.\n", + "\n", + "Conclusion for Interpretability:\n", + "Model interpretability tools like SHAP are valuable for understanding how even poorly performing models make decisions, which can guide data collection, feature engineering, and model selection. With more high-quality labeled data, these tools would provide deeper and more actionable insights into a well-performing NER system.\n" + ] + } + ], + "source": [ + "print(\"\\n--- Analysis of Interpretability Results ---\")\n", + "print(\"\\nSHAP (SHapley Additive exPlanations):\")\n", + "print(\"- SHAP values aim to show how each word in the input contributes to the model's output prediction for a specific label (e.g., 'B-LOC').\")\n", + "print(\"- Positive SHAP values indicate that the word pushes the prediction towards the target label.\")\n", + "print(\"- Negative SHAP values indicate that the word pushes the prediction away from the target label.\")\n", + "print(\"- In our example for 'ሜክሲኮ' (Mexico) as 'B-LOC', words like 'አድራሻ' (address) and 'ሜክሲኮ' itself are expected to have high positive SHAP values for the 'LOC' label.\")\n", + "print(\"- The interpretation might be less clear if the model's overall performance is low, as it's explaining a less accurate prediction.\")\n", + "\n", + "print(\"\\nLIME (Local Interpretable Model-agnostic Explanations):\")\n", + "print(\"- LIME creates local surrogate models (simple, interpretable models) to explain individual predictions.\")\n", + "print(\"- It perturbs the input (e.g., removing words) and observes how the prediction changes, then fits a local model.\")\n", + "print(\"- For NER, LIME can be used to show which words contribute most to the prediction of a *specific entity type* for *a given instance*.\")\n", + "print(\"- However, its standard implementation is more geared towards classification (e.g., sentiment, topic), where the output is a single class per input. Adapting it for token-level NER (multiple labels per sentence) is non-trivial and often requires custom wrappers around LIME's core, making SHAP a more straightforward choice for this kind of task.\")\n", + "\n", + "print(\"\\nDifficult Cases and Areas for Improvement:\")\n", + "print(\"- Given the very low F1-scores from Task 3 and 4, all cases are effectively 'difficult cases' as the model is struggling significantly.\")\n", + "print(\"- Ambiguous text and overlapping entities are known challenges for NER models generally, and especially with limited data.\")\n", + "print(\"- **Primary Area for Improvement:** Significantly increase the size and diversity of the labeled Amharic dataset. Rule-based labeling is a good start, but manual review and annotation of a much larger corpus would yield better results.\")\n", + "print(\"- **Data Quality:** Re-verify the quality and consistency of rule-based labels. Errors in labeling will directly impact model performance.\")\n", + "print(\"- **Model Architecture:** While DistilBERT is lightweight, a more powerful model like `afro-xlmr-large` (given more data) or other mBERT variants might perform better.\")\n", + "print(\"- **Hyperparameter Tuning:** More extensive hyperparameter tuning (learning rate, batch size, epochs, optimizers) could potentially yield marginal improvements, but is unlikely to overcome data limitations.\")\n", + "print(\"- **Pre-training:** Explore domain-specific pre-training or continued pre-training on a large corpus of general Amharic text before fine-tuning for NER.\")\n", + "print('- **Complex Entity Boundaries:** Analyze cases where entity boundaries are fluid (e.g., \"ዋጋ ስልክ አድራሻ\" or \"price contact\") after tokenization and labeling to refine rules or model capabilities.')\n", + "\n", + "print(\"\\nConclusion for Interpretability:\")\n", + "print(\"Model interpretability tools like SHAP are valuable for understanding how even poorly performing models make decisions, which can guide data collection, feature engineering, and model selection. With more high-quality labeled data, these tools would provide deeper and more actionable insights into a well-performing NER system.\")\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}