diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 86f537b7..8c7e184e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,11 +13,14 @@ Change Log
Unreleased
+[2.3.20] - 2026-01-29
+---------------------
+* feat: translate skills, jobs and industries
+
[2.3.19] - 2025-11-26
---------------------
* chore: upgrade python requirements
-
[2.3.18] - 2025-10-30
---------------------
* fix: pin `pip<25.3` to resolve make upgrade build failure
diff --git a/Makefile b/Makefile
index b22fc528..bb6c279e 100644
--- a/Makefile
+++ b/Makefile
@@ -56,7 +56,7 @@ pii_check: ## check for PII annotations on all Django models
requirements: ## install development environment requirements
pip install -qr requirements/pip.txt
- pip install -q -r requirements/pip_tools.txt -c requirements/constraints.txt
+ pip install -q -r requirements/pip-tools.txt -c requirements/constraints.txt
pip-sync requirements/dev.txt
test: clean ## run tests in the current virtualenv
diff --git a/taxonomy/__init__.py b/taxonomy/__init__.py
index 8417a10e..24f5ef26 100644
--- a/taxonomy/__init__.py
+++ b/taxonomy/__init__.py
@@ -15,4 +15,4 @@
# 2. MINOR version when you add functionality in a backwards compatible manner, and
# 3. PATCH version when you make backwards compatible bug fixes.
# More details can be found at https://semver.org/
-__version__ = '2.3.19'
+__version__ = '2.3.20'
diff --git a/taxonomy/admin.py b/taxonomy/admin.py
index 54ac102e..2f055b4b 100644
--- a/taxonomy/admin.py
+++ b/taxonomy/admin.py
@@ -11,6 +11,7 @@
from django.contrib import admin, messages
from django.http import HttpResponseRedirect
from django.urls import re_path, reverse
+from django.utils.html import format_html
from taxonomy.constants import JOB_SKILLS_URL_NAME
from taxonomy.models import (
@@ -30,6 +31,7 @@
SkillsQuiz,
SkillSubCategory,
SkillValidationConfiguration,
+ TaxonomyTranslation,
Translation,
XBlockSkillData,
XBlockSkills,
@@ -301,3 +303,122 @@ class SkillValidationConfiguratonAdmin(admin.ModelAdmin):
"""
Admin view for SkillValidationConfiguration model.
"""
+
+
+@admin.register(TaxonomyTranslation)
+class TaxonomyTranslationAdmin(admin.ModelAdmin):
+ """
+ Admin view for TaxonomyTranslation model.
+
+ Displays translations for jobs, skills, and industries across different languages.
+ """
+
+ list_display = (
+ 'id',
+ 'content_type',
+ 'external_id',
+ 'language_code',
+ 'title_preview',
+ 'view_source_object',
+ )
+
+ list_filter = (
+ 'content_type',
+ 'language_code',
+ )
+
+ search_fields = (
+ 'external_id',
+ 'title',
+ 'description',
+ )
+
+ readonly_fields = (
+ 'created',
+ 'modified',
+ 'source_hash',
+ 'source_object_link',
+ )
+
+ fieldsets = (
+ ('Entity Information', {
+ 'fields': ('content_type', 'external_id', 'language_code', 'source_object_link'),
+ }),
+ ('Translation', {
+ 'fields': ('title', 'description'),
+ }),
+ ('Metadata', {
+ 'fields': ('source_hash', 'created', 'modified'),
+ 'classes': ('collapse',),
+ }),
+ )
+
+ ordering = ('-modified',)
+
+ @admin.display(description='Title')
+ def title_preview(self, obj):
+ """
+ Display truncated title for readability.
+ """
+ return obj.title[:75] + '...' if len(obj.title) > 75 else obj.title
+
+ @admin.display(description='Source Object')
+ def view_source_object(self, obj):
+ """
+ Display a link to view the source object (Job, Skill, or Industry).
+ """
+ try:
+ if obj.content_type == 'job':
+ job = Job.objects.get(external_id=obj.external_id)
+ url = reverse('admin:taxonomy_job_change', args=[job.pk])
+ return format_html('View Job', url)
+ elif obj.content_type == 'skill':
+ skill = Skill.objects.get(external_id=obj.external_id)
+ url = reverse('admin:taxonomy_skill_change', args=[skill.pk])
+ return format_html('View Skill', url)
+ elif obj.content_type == 'industry':
+ industry = Industry.objects.get(code=obj.external_id)
+ url = reverse('admin:taxonomy_industry_change', args=[industry.pk])
+ return format_html('View Industry', url)
+ except (Job.DoesNotExist, Skill.DoesNotExist, Industry.DoesNotExist):
+ return '-'
+
+ return '-'
+
+ @admin.display(description='Source Object Details')
+ def source_object_link(self, obj):
+ """
+ Display detailed link to source object in the detail view.
+ """
+ try:
+ if obj.content_type == 'job':
+ job = Job.objects.get(external_id=obj.external_id)
+ url = reverse('admin:taxonomy_job_change', args=[job.pk])
+ return format_html(
+ ''
+ 'Open Job: {} (ID: {})',
+ url, job.name, job.external_id
+ )
+ elif obj.content_type == 'skill':
+ skill = Skill.objects.get(external_id=obj.external_id)
+ url = reverse('admin:taxonomy_skill_change', args=[skill.pk])
+ return format_html(
+ ''
+ 'Open Skill: {} (ID: {})',
+ url, skill.name, skill.external_id
+ )
+ elif obj.content_type == 'industry':
+ industry = Industry.objects.get(code=obj.external_id)
+ url = reverse('admin:taxonomy_industry_change', args=[industry.pk])
+ return format_html(
+ ''
+ 'Open Industry: {} (Code: {})',
+ url, industry.name, industry.code
+ )
+ except (Job.DoesNotExist, Skill.DoesNotExist, Industry.DoesNotExist):
+ return format_html(
+ 'Source object not found (external_id: {})',
+ obj.external_id
+ )
+
+ return '-'
diff --git a/taxonomy/management/commands/populate_taxonomy_translations.py b/taxonomy/management/commands/populate_taxonomy_translations.py
new file mode 100644
index 00000000..646b711f
--- /dev/null
+++ b/taxonomy/management/commands/populate_taxonomy_translations.py
@@ -0,0 +1,390 @@
+# -*- coding: utf-8 -*-
+"""
+Management command to populate taxonomy translations using Xpert AI.
+
+This command translates job, skill, and industry data from English to target languages.
+It uses source_hash to detect changes and avoid unnecessary retranslations.
+
+Example usage:
+ python manage.py populate_taxonomy_translations --language es
+ python manage.py populate_taxonomy_translations --language ar --content-type job
+ python manage.py populate_taxonomy_translations --language fr --force
+"""
+import logging
+
+from django.core.exceptions import ValidationError
+from django.core.management.base import BaseCommand, CommandError
+from django.db import DatabaseError, IntegrityError
+
+from taxonomy.models import Industry, Job, Skill, TaxonomyTranslation
+from taxonomy.translation_utils import (
+ TranslationError,
+ get_supported_languages,
+ translate_item_with_xpert,
+ validate_language_code,
+)
+
+LOGGER = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+ """
+ Populate taxonomy translations using Xpert AI.
+
+ This management command translates jobs, skills, and industries from English
+ to target languages. It intelligently skips translations that are already
+ up-to-date using MD5 hash comparison of source text.
+ """
+
+ help = (
+ 'Populate taxonomy translations using Xpert AI. '
+ 'Translates jobs, skills, and industries to target languages. '
+ 'Uses source_hash to skip unchanged content and avoid unnecessary API calls.'
+ )
+
+ def add_arguments(self, parser):
+ """Add command-line arguments."""
+ parser.add_argument(
+ '--language',
+ type=str,
+ required=True,
+ help=(
+ 'Target language code (ISO 639-1). '
+ 'Supported: {languages}'.format(
+ languages=', '.join(get_supported_languages())
+ )
+ )
+ )
+
+ parser.add_argument(
+ '--content-type',
+ type=str,
+ choices=['job', 'skill', 'industry', 'all'],
+ default='all',
+ help='Type of content to translate. Default: all'
+ )
+
+ parser.add_argument(
+ '--batch-size',
+ type=int,
+ default=100,
+ help='Number of items to fetch from database in each batch. Default: 100'
+ )
+
+ parser.add_argument(
+ '--force',
+ action='store_true',
+ help=(
+ 'Force retranslation even if source_hash matches. '
+ 'Useful when translation quality has improved.'
+ )
+ )
+
+ def handle(self, *args, **options):
+ """Execute the command."""
+
+ # Extract options
+ language = options['language']
+ content_type = options['content_type']
+ batch_size = options['batch_size']
+ force = options['force']
+
+ # Validate language code
+ if not validate_language_code(language):
+ raise CommandError(
+ 'Unsupported language code: {language}. '
+ 'Supported languages: {supported}'.format(
+ language=language,
+ supported=', '.join(get_supported_languages())
+ )
+ )
+
+ # Log configuration
+ LOGGER.info('=' * 60)
+ LOGGER.info('Taxonomy Translation - Xpert AI')
+ LOGGER.info('=' * 60)
+ LOGGER.info('Configuration:')
+ LOGGER.info(' • Target language: %s', language)
+ LOGGER.info(' • Content type: %s', content_type)
+ LOGGER.info(' • Database batch size: %d', batch_size)
+ LOGGER.info(' • Force retranslation: %s', 'Yes' if force else 'No')
+
+ LOGGER.info(
+ 'Starting taxonomy translation: language=%s, content_type=%s, batch_size=%d, force=%s',
+ language, content_type, batch_size, force
+ )
+
+ # Initialize statistics
+ stats = {'translated': 0, 'skipped': 0, 'errors': 0}
+
+ # Translate each content type
+ if content_type in ['job', 'all']:
+ job_stats = self.translate_content_type(
+ model=Job,
+ content_type_name='job',
+ language=language,
+ batch_size=batch_size,
+ force=force
+ )
+ for key in stats:
+ stats[key] += job_stats[key]
+
+ if content_type in ['skill', 'all']:
+ skill_stats = self.translate_content_type(
+ model=Skill,
+ content_type_name='skill',
+ language=language,
+ batch_size=batch_size,
+ force=force
+ )
+ for key in stats:
+ stats[key] += skill_stats[key]
+
+ if content_type in ['industry', 'all']:
+ industry_stats = self.translate_content_type(
+ model=Industry,
+ content_type_name='industry',
+ language=language,
+ batch_size=batch_size,
+ force=force
+ )
+ for key in stats:
+ stats[key] += industry_stats[key]
+
+ # Log summary
+ LOGGER.info('=' * 60)
+ LOGGER.info('Translation Summary')
+ LOGGER.info('=' * 60)
+ LOGGER.info('Translated: %d', stats['translated'])
+ LOGGER.info('Skipped (unchanged): %d', stats['skipped'])
+
+ if stats['errors'] > 0:
+ LOGGER.error('Errors: %d', stats['errors'])
+ LOGGER.error('Translation completed with %d errors', stats['errors'])
+ else:
+ LOGGER.info('Errors: 0')
+
+ total_processed = stats['translated'] + stats['skipped'] + stats['errors']
+ LOGGER.info('Total processed: %d', total_processed)
+
+ LOGGER.info(
+ 'Translation completed: translated=%d, skipped=%d, errors=%d',
+ stats['translated'], stats['skipped'], stats['errors']
+ )
+
+ def translate_content_type(self, model, content_type_name, language, batch_size, force):
+ """
+ Translate all entities of a content type to target language.
+
+ This is a generic method that works for Job, Skill, and Industry models.
+
+ Args:
+ model: Django model class (Job, Skill, or Industry)
+ content_type_name (str): Content type name ('job', 'skill', 'industry')
+ language (str): Target language code
+ batch_size (int): Number of items to fetch from database per batch
+ force (bool): Force retranslation
+
+ Returns:
+ dict: Statistics for this content type
+ """
+ LOGGER.info('Starting translation for content_type=%s', content_type_name)
+
+ # Get queryset - exclude items without required fields
+ queryset = model.objects.exclude(name__isnull=True)
+
+ # For jobs and skills, also exclude those without external_id
+ if hasattr(model, 'external_id'):
+ queryset = queryset.exclude(external_id__isnull=True)
+
+ total = queryset.count()
+
+ if total == 0:
+ LOGGER.info('No %ss found to translate', content_type_name)
+ return {'translated': 0, 'skipped': 0, 'errors': 0}
+
+ LOGGER.info('Found %d %ss to process', total, content_type_name)
+
+ # Track statistics for this content type
+ stats = {'translated': 0, 'skipped': 0, 'errors': 0}
+
+ # Process in batches
+ for i in range(0, total, batch_size):
+ batch = queryset[i:i + batch_size]
+ batch_stats = self.process_batch(
+ entities=batch,
+ content_type_name=content_type_name,
+ language=language,
+ force=force
+ )
+
+ stats['translated'] += batch_stats['translated']
+ stats['skipped'] += batch_stats['skipped']
+ stats['errors'] += batch_stats['errors']
+
+ LOGGER.info(
+ 'Processed %d/%d %ss',
+ min(i + batch_size, total),
+ total,
+ content_type_name
+ )
+
+ LOGGER.info(
+ 'Completed translation for content_type=%s: translated=%d, skipped=%d, errors=%d',
+ content_type_name,
+ stats['translated'],
+ stats['skipped'],
+ stats['errors']
+ )
+
+ return stats
+
+ def process_batch(self, entities, content_type_name, language, force):
+ """
+ Process a batch of entities for translation.
+
+ This is a generic method that works for any entity type (Job, Skill, Industry).
+
+ Args:
+ entities (QuerySet): Batch of entity objects
+ content_type_name (str): Content type name
+ language (str): Target language code
+ force (bool): Force retranslation
+
+ Returns:
+ dict: Batch statistics
+ """
+ batch_stats = {'translated': 0, 'skipped': 0, 'errors': 0}
+
+ items_to_translate = []
+
+ for entity in entities:
+ # - Job/Skill: use external_id
+ # - Industry: use code NAICS2 code
+ if hasattr(entity, 'external_id'):
+ external_id = entity.external_id
+ else:
+ # Industry: use NAICS2 code
+ external_id = str(entity.code)
+
+ # Get description (not available for Industry)
+ description = getattr(entity, 'description', '') or ''
+
+ # Calculate source hash
+ source_hash = TaxonomyTranslation.calculate_source_hash(
+ entity.name,
+ description
+ )
+
+ # Check if translation needs updating
+ should_translate, __ = self._should_translate(
+ external_id=external_id,
+ content_type=content_type_name,
+ language=language,
+ source_hash=source_hash,
+ force=force
+ )
+
+ if not should_translate:
+ batch_stats['skipped'] += 1
+ continue
+
+ items_to_translate.append({
+ 'external_id': external_id,
+ 'title': entity.name,
+ 'description': description,
+ 'source_hash': source_hash,
+ })
+
+ # Translate items one at a time using Xpert AI
+ for idx, item in enumerate(items_to_translate, 1):
+ LOGGER.info(
+ 'Translating %s %d/%d: %s',
+ content_type_name,
+ idx,
+ len(items_to_translate),
+ item['external_id']
+ )
+
+ try:
+ translation = translate_item_with_xpert(
+ title=item['title'],
+ description=item['description'],
+ target_language=language,
+ content_type=content_type_name,
+ external_id=item['external_id']
+ )
+
+ # Save translation using update_or_create for atomic operation
+ TaxonomyTranslation.objects.update_or_create(
+ external_id=item['external_id'],
+ content_type=content_type_name,
+ language_code=language,
+ defaults={
+ 'title': translation.get('title', ''),
+ 'description': translation.get('description', ''),
+ 'source_hash': item['source_hash'],
+ }
+ )
+
+ batch_stats['translated'] += 1
+ LOGGER.info(
+ 'Saved translation for %s %s to %s',
+ content_type_name,
+ item['external_id'],
+ language
+ )
+
+ except TranslationError as error:
+ LOGGER.error(
+ 'Translation failed for %s %s: %s',
+ content_type_name,
+ item['external_id'],
+ str(error)
+ )
+ batch_stats['errors'] += 1
+
+ except (IntegrityError, ValidationError, DatabaseError) as error:
+ LOGGER.error(
+ 'Database error saving translation for %s %s: %s',
+ content_type_name,
+ item['external_id'],
+ str(error),
+ exc_info=True
+ )
+ batch_stats['errors'] += 1
+
+ return batch_stats
+
+ def _should_translate(self, external_id, content_type, language, source_hash, force):
+ """
+ Determine if an entity needs translation.
+
+ Args:
+ external_id (str): External ID of entity
+ content_type (str): Content type name
+ language (str): Target language code
+ source_hash (str): Current source hash
+ force (bool): Force retranslation
+
+ Returns:
+ tuple: (should_translate: bool, is_update: bool)
+ """
+ try:
+ existing = TaxonomyTranslation.objects.get(
+ external_id=external_id,
+ content_type=content_type,
+ language_code=language
+ )
+
+ # Translation exists - check if it needs updating
+ if not force and existing.source_hash == source_hash:
+ # English hasn't changed, skip
+ return (False, False)
+ else:
+ # English changed or force flag set, need to update
+ return (True, True)
+
+ except TaxonomyTranslation.DoesNotExist:
+ # New translation needed
+ return (True, False)
diff --git a/taxonomy/migrations/0039_taxonomytranslation.py b/taxonomy/migrations/0039_taxonomytranslation.py
new file mode 100644
index 00000000..3565f09c
--- /dev/null
+++ b/taxonomy/migrations/0039_taxonomytranslation.py
@@ -0,0 +1,36 @@
+# Generated by Django 4.2.16 on 2026-01-27 04:59
+
+import django.utils.timezone
+from django.db import migrations, models
+
+import model_utils.fields
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('taxonomy', '0038_mariadb_uuid_conversion'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='TaxonomyTranslation',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
+ ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
+ ('external_id', models.CharField(db_index=True, help_text='EMSI external ID (e.g., ET1234567890, ES1234567890, IN123)', max_length=255)),
+ ('content_type', models.CharField(choices=[('job', 'Job'), ('skill', 'Skill'), ('industry', 'Industry')], db_index=True, help_text='Type of entity being translated (job, skill, or industry)', max_length=50)),
+ ('language_code', models.CharField(db_index=True, help_text='Target language code (e.g., es, ar, fr) following ISO 639-1', max_length=10)),
+ ('title', models.CharField(help_text='Translated name/title of the entity', max_length=500)),
+ ('description', models.TextField(blank=True, help_text='Translated description field')),
+ ('source_hash', models.CharField(help_text='MD5 hash of English source text (title + description). Used to detect when source content changes and retranslation is needed.', max_length=64)),
+ ],
+ options={
+ 'verbose_name': 'Taxonomy Translation',
+ 'verbose_name_plural': 'Taxonomy Translations',
+ 'indexes': [models.Index(fields=['language_code', 'content_type'], name='taxonomy_ta_languag_ce278d_idx'), models.Index(fields=['external_id', 'language_code'], name='taxonomy_ta_externa_b71780_idx')],
+ 'unique_together': {('external_id', 'content_type', 'language_code')},
+ },
+ ),
+ ]
diff --git a/taxonomy/models.py b/taxonomy/models.py
index 2d0da364..94478147 100644
--- a/taxonomy/models.py
+++ b/taxonomy/models.py
@@ -4,6 +4,7 @@
"""
from __future__ import unicode_literals
+import hashlib
import logging
import uuid
@@ -1117,6 +1118,139 @@ class Meta:
verbose_name_plural = 'Industries'
+class TaxonomyTranslation(TimeStampedModel):
+ """
+ Store translations for taxonomy entities (jobs, skills, industries).
+
+ This model follows the enterprise-catalog ContentTranslation pattern,
+ using source_hash to track when English source content changes and
+ retranslation is needed.
+
+ .. no_pii:
+ """
+
+ CONTENT_TYPE_CHOICES = [
+ ('job', 'Job'),
+ ('skill', 'Skill'),
+ ('industry', 'Industry'),
+ ]
+
+ external_id = models.CharField(
+ max_length=255,
+ db_index=True,
+ help_text=_(
+ 'EMSI external ID (e.g., ET1234567890, ES1234567890, IN123)'
+ )
+ )
+
+ content_type = models.CharField(
+ max_length=50,
+ choices=CONTENT_TYPE_CHOICES,
+ db_index=True,
+ help_text=_(
+ 'Type of entity being translated (job, skill, or industry)'
+ )
+ )
+
+ language_code = models.CharField(
+ max_length=10,
+ db_index=True,
+ help_text=_(
+ 'Target language code (e.g., es, ar, fr) following ISO 639-1'
+ )
+ )
+
+ # Translated fields
+ title = models.CharField(
+ max_length=500,
+ help_text=_(
+ 'Translated name/title of the entity'
+ )
+ )
+
+ description = models.TextField(
+ blank=True,
+ help_text=_(
+ 'Translated description field'
+ )
+ )
+
+ # Change detection
+ source_hash = models.CharField(
+ max_length=64,
+ help_text=_(
+ 'MD5 hash of English source text (title + description). '
+ 'Used to detect when source content changes and retranslation is needed.'
+ )
+ )
+
+ class Meta:
+ """
+ Meta options for TaxonomyTranslation.
+ """
+
+ app_label = 'taxonomy'
+ verbose_name = _('Taxonomy Translation')
+ verbose_name_plural = _('Taxonomy Translations')
+ unique_together = [('external_id', 'content_type', 'language_code')]
+ indexes = [
+ models.Index(fields=['language_code', 'content_type']),
+ models.Index(fields=['external_id', 'language_code']),
+ ]
+
+ def __str__(self):
+ """
+ Return a human-readable string representation.
+ """
+ return '{content_type}:{external_id}:{language}:{title}'.format(
+ content_type=self.content_type,
+ external_id=self.external_id,
+ language=self.language_code,
+ title=self.title[:50], # Truncate for readability
+ )
+
+ def __repr__(self):
+ """
+ Create a unique string representation of the object.
+ """
+ return ''.format(
+ self.id,
+ self.content_type,
+ self.external_id,
+ self.language_code,
+ )
+
+ @staticmethod
+ def calculate_source_hash(title, description=''):
+ """
+ Calculate MD5 hash of English source text.
+
+ This hash is used to detect when the source content has changed,
+ which triggers retranslation. The hash combines title and description
+ with a delimiter to avoid hash collisions.
+
+ Args:
+ title (str): The title/name of the entity
+ description (str): The description of the entity (optional)
+
+ Returns:
+ str: MD5 hash (32 hex characters)
+
+ Example:
+ >>> TaxonomyTranslation.calculate_source_hash('Software Engineer', 'Develops apps')
+ 'a1b2c3d4e5f6...'
+ """
+ # Combine title and description with delimiter
+ # Use || to separate fields (unlikely to appear in actual content)
+ source_text = '{title}||{description}'.format(
+ title=title or '',
+ description=description or ''
+ )
+
+ # Calculate MD5 hash
+ return hashlib.md5(source_text.encode('utf-8')).hexdigest()
+
+
class B2CJobAllowList(models.Model):
"""
Model for storing admin configuration for B2C Job Allowlist entries.
diff --git a/taxonomy/tests/test_populate_taxonomy_translations.py b/taxonomy/tests/test_populate_taxonomy_translations.py
new file mode 100644
index 00000000..82200c5f
--- /dev/null
+++ b/taxonomy/tests/test_populate_taxonomy_translations.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for populate_taxonomy_translations management command.
+"""
+import unittest
+from unittest.mock import patch
+
+import ddt
+import pytest
+
+from django.core.management import call_command
+from django.core.management.base import CommandError
+from django.db import DatabaseError
+
+from taxonomy.models import Industry, Job, Skill, TaxonomyTranslation
+from taxonomy.translation_utils import TranslationError
+
+
+@pytest.mark.django_db
+@ddt.ddt
+class TestPopulateTaxonomyTranslationsCommand(unittest.TestCase):
+ """Test the populate_taxonomy_translations management command."""
+
+ @ddt.data('job', 'skill', 'industry')
+ def test_translate_content_type_success(self, content_type):
+ """Test successful translation for each content type."""
+ # Create test data
+ if content_type == 'job':
+ Job.objects.create(external_id='ET123', name='Software Engineer', description='Develops apps')
+ elif content_type == 'skill':
+ Skill.objects.create(external_id='ES123', name='Python', description='Programming language')
+ else:
+ Industry.objects.create(code=54, name='Technology')
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ external_id = 'ET123' if content_type == 'job' else ('ES123' if content_type == 'skill' else '54')
+ mock_chat.return_value = f'{{"title": "Translated", "description": "Desc"}}'
+
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', content_type)
+
+ # Verify translation created
+ translation = TaxonomyTranslation.objects.get(
+ external_id=external_id,
+ content_type=content_type,
+ language_code='es'
+ )
+ assert translation.title == 'Translated'
+ assert translation.description == 'Desc'
+
+ @pytest.mark.django_db
+ def test_skip_unchanged_translations(self):
+ """Test that unchanged translations are skipped."""
+ job = Job.objects.create(external_id='ET123', name='Engineer', description='Develops')
+ source_hash = TaxonomyTranslation.calculate_source_hash(job.name, job.description)
+
+ # Create existing translation
+ TaxonomyTranslation.objects.create(
+ external_id='ET123',
+ content_type='job',
+ language_code='es',
+ title='Ingeniero',
+ description='Desarrolla',
+ source_hash=source_hash
+ )
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # API should not be called
+ mock_chat.assert_not_called()
+
+ @pytest.mark.django_db
+ def test_update_stale_translations(self):
+ """Test that stale translations are updated."""
+ job = Job.objects.create(external_id='ET123', name='Engineer Updated', description='New desc')
+
+ # Create stale translation with old hash
+ TaxonomyTranslation.objects.create(
+ external_id='ET123',
+ content_type='job',
+ language_code='es',
+ title='Old Translation',
+ description='Old desc',
+ source_hash='old_hash_123'
+ )
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ mock_chat.return_value = '{"title": "New Translation", "description": "New"}'
+
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # Translation should be updated
+ translation = TaxonomyTranslation.objects.get(external_id='ET123', content_type='job', language_code='es')
+ assert translation.title == 'New Translation'
+ assert translation.description == 'New'
+
+ @pytest.mark.django_db
+ def test_force_retranslation(self):
+ """Test --force flag retranslates even unchanged items."""
+ job = Job.objects.create(external_id='ET123', name='Engineer', description='Develops')
+ source_hash = TaxonomyTranslation.calculate_source_hash(job.name, job.description)
+
+ TaxonomyTranslation.objects.create(
+ external_id='ET123',
+ content_type='job',
+ language_code='es',
+ title='Ingeniero',
+ description='Desarrolla',
+ source_hash=source_hash
+ )
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ mock_chat.return_value = '{"title": "Forced Translation", "description": "Forced"}'
+
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job', '--force')
+
+ # Translation should be updated despite same hash
+ translation = TaxonomyTranslation.objects.get(external_id='ET123', content_type='job', language_code='es')
+ assert translation.title == 'Forced Translation'
+
+ @pytest.mark.django_db
+ def test_invalid_language_code(self):
+ """Test command rejects invalid language codes."""
+ with pytest.raises(CommandError):
+ call_command('populate_taxonomy_translations', '--language', 'xyz')
+
+ @pytest.mark.django_db
+ def test_no_items_to_translate(self):
+ """Test when there are no items to translate."""
+ # Don't create any jobs - empty database
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # API should not be called
+ mock_chat.assert_not_called()
+ # Should complete without errors
+ assert TaxonomyTranslation.objects.count() == 0
+
+ @pytest.mark.django_db
+ def test_translation_error_handling(self):
+ """
+ Test that command handles TranslationError gracefully.
+
+ When translate_item_with_xpert raises TranslationError (e.g., due to missing title),
+ the command should catch it, log the error, increment error counter, and continue
+ without saving the incomplete translation.
+ """
+ # Create test job
+ Job.objects.create(external_id='ET123', name='Software Engineer', description='Develops apps')
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ # Simulate incomplete translation (missing title) which triggers TranslationError
+ mock_chat.return_value = '{"title": "", "description": "Desc"}'
+
+ # Command should complete and handle the error gracefully
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # No translation should be created because TranslationError was caught
+ assert TaxonomyTranslation.objects.filter(external_id='ET123').count() == 0
+
+ @pytest.mark.django_db
+ def test_multiple_translation_errors(self):
+ """
+ Test that command handles multiple TranslationErrors gracefully.
+
+ When multiple items fail to translate (each raising TranslationError),
+ the command should catch each error, log it, and continue processing
+ remaining items without crashing.
+ """
+ # Create multiple jobs
+ Job.objects.create(external_id='ET001', name='Job 1', description='Desc 1')
+ Job.objects.create(external_id='ET002', name='Job 2', description='Desc 2')
+ Job.objects.create(external_id='ET003', name='Job 3', description='Desc 3')
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ # All translations fail (empty titles trigger TranslationError)
+ mock_chat.return_value = '{"title": "", "description": "Desc"}'
+
+ # Command should complete despite all errors being caught
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # No translations should be created since all raised TranslationError
+ assert TaxonomyTranslation.objects.count() == 0
+
+ @pytest.mark.django_db
+ def test_database_error_handling(self):
+ """
+ Test that command handles database errors gracefully.
+
+ When a DatabaseError occurs during save (after successful translation),
+ the command should catch it, log the error, increment error counter,
+ and continue without crashing.
+ """
+ # Create test job
+ Job.objects.create(external_id='ET123', name='Software Engineer', description='Develops apps')
+
+ with patch('taxonomy.translation_utils.chat_completion') as mock_chat:
+ mock_chat.return_value = '{"title": "Ingeniero", "description": "Desarrolla"}'
+
+ # Mock update_or_create to raise DatabaseError
+ with patch('taxonomy.models.TaxonomyTranslation.objects.update_or_create') as mock_save:
+ mock_save.side_effect = DatabaseError('Database connection failed')
+
+ # Command should complete and handle the database error gracefully
+ call_command('populate_taxonomy_translations', '--language', 'es', '--content-type', 'job')
+
+ # No translation should be created due to DatabaseError being caught
+ assert TaxonomyTranslation.objects.filter(external_id='ET123').count() == 0
diff --git a/taxonomy/tests/test_translation_utils.py b/taxonomy/tests/test_translation_utils.py
new file mode 100644
index 00000000..34a4eda7
--- /dev/null
+++ b/taxonomy/tests/test_translation_utils.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for translation utilities.
+"""
+import unittest
+from unittest.mock import patch
+
+import ddt
+import pytest
+
+from taxonomy.translation_utils import (
+ TranslationError,
+ _build_translation_prompt,
+ _parse_translation_response,
+ get_supported_languages,
+ translate_item_with_xpert,
+ validate_language_code,
+)
+
+
+@ddt.ddt
+class TestTranslationUtils(unittest.TestCase):
+ """Test translation utility functions."""
+
+ @ddt.data(
+ ('es', True),
+ ('en', False),
+ ('ar', False),
+ ('xyz', False),
+ )
+ @ddt.unpack
+ def test_validate_language_code(self, language_code, expected):
+ """Test language code validation - only Spanish supported."""
+ assert validate_language_code(language_code) == expected
+
+ def test_get_supported_languages(self):
+ """Test getting supported languages returns only Spanish."""
+ languages = get_supported_languages()
+ assert languages == ['es']
+
+ def test_build_translation_prompt(self):
+ """Test building translation prompt for a single item."""
+ prompt = _build_translation_prompt(
+ title='Software Engineer',
+ description='Develops software',
+ content_type='job',
+ target_language='es'
+ )
+
+ assert 'Software Engineer' in prompt
+ assert 'Spanish' in prompt
+ assert 'Develops software' in prompt
+
+ @ddt.data(
+ # Valid complete response
+ (
+ '{"title": "Ingeniero", "description": "Desarrolla"}',
+ {'title': 'Ingeniero', 'description': 'Desarrolla'}
+ ),
+ # Response with only title (description defaults to empty)
+ (
+ '{"title": "Ingeniero"}',
+ {'title': 'Ingeniero', 'description': ''}
+ ),
+ # Response with empty description
+ (
+ '{"title": "Ingeniero", "description": ""}',
+ {'title': 'Ingeniero', 'description': ''}
+ ),
+ )
+ @ddt.unpack
+ def test_parse_translation_response_success(self, response, expected):
+ """Test parsing valid single-item translation responses."""
+ result = _parse_translation_response(response)
+
+ assert result['title'] == expected['title']
+ assert result['description'] == expected['description']
+
+ def test_parse_translation_response_missing_description(self):
+ """Test parsing response with missing description field uses empty string."""
+ response = '{"title": "Ingeniero"}'
+
+ result = _parse_translation_response(response)
+
+ assert result['title'] == 'Ingeniero'
+ assert result['description'] == '' # Missing - fallback
+
+ @ddt.data(
+ 'This is not JSON',
+ '{invalid json}',
+ 'null',
+ '[]', # Array instead of object
+ '123', # Number instead of object
+ )
+ def test_parse_translation_response_invalid(self, response):
+ """Test parsing invalid responses returns empty dict."""
+ result = _parse_translation_response(response)
+
+ # Function handles errors gracefully by returning empty dict
+ assert result == {'title': '', 'description': ''}
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_success(self, mock_chat):
+ """Test successful single-item translation."""
+ mock_chat.return_value = '{"title": "Ingeniero", "description": "Desarrolla"}'
+
+ result = translate_item_with_xpert(
+ title='Engineer',
+ description='Develops',
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert result['title'] == 'Ingeniero'
+ assert result['description'] == 'Desarrolla'
+ assert mock_chat.call_count == 1
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_api_error(self, mock_chat):
+ """Test single-item translation raises TranslationError on API errors."""
+ mock_chat.side_effect = Exception('API Error')
+
+ with pytest.raises(TranslationError) as exc_info:
+ translate_item_with_xpert(
+ title='Engineer',
+ description='',
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert 'Failed to translate job ET123' in str(exc_info.value)
+ assert 'API Error' in str(exc_info.value)
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_missing_title_translation(self, mock_chat):
+ """Test that TranslationError is raised when title is provided but translation is missing."""
+ # Mock returns empty title when we provided a non-empty title
+ mock_chat.return_value = '{"title": "", "description": "Desarrolla"}'
+
+ with pytest.raises(TranslationError) as exc_info:
+ translate_item_with_xpert(
+ title='Engineer', # Non-empty title provided
+ description='Develops',
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert 'Translation missing title' in str(exc_info.value)
+ assert 'ET123' in str(exc_info.value)
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_missing_description_translation(self, mock_chat):
+ """Test that TranslationError is raised when description is provided but translation is missing."""
+ # Mock returns empty description when we provided a non-empty description
+ mock_chat.return_value = '{"title": "Ingeniero", "description": ""}'
+
+ with pytest.raises(TranslationError) as exc_info:
+ translate_item_with_xpert(
+ title='Engineer',
+ description='Develops software applications', # Non-empty description provided
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert 'Translation missing description' in str(exc_info.value)
+ assert 'ET123' in str(exc_info.value)
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_empty_inputs_no_error(self, mock_chat):
+ """Test that no error is raised when inputs are empty and translations are empty."""
+ # Empty inputs should not trigger validation error
+ mock_chat.return_value = '{"title": "", "description": ""}'
+
+ result = translate_item_with_xpert(
+ title='', # Empty title
+ description='', # Empty description
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert result['title'] == ''
+ assert result['description'] == ''
+ # No exception should be raised
+
+ @patch('taxonomy.translation_utils.chat_completion')
+ def test_translate_item_only_title_provided(self, mock_chat):
+ """Test translation when only title is provided (description is empty)."""
+ # Only title provided, description empty - should work fine
+ mock_chat.return_value = '{"title": "Ingeniero", "description": ""}'
+
+ result = translate_item_with_xpert(
+ title='Engineer', # Non-empty title
+ description='', # Empty description
+ target_language='es',
+ content_type='job',
+ external_id='ET123'
+ )
+
+ assert result['title'] == 'Ingeniero'
+ assert result['description'] == ''
+ # No exception should be raised since description was empty in input
diff --git a/taxonomy/translation_utils.py b/taxonomy/translation_utils.py
new file mode 100644
index 00000000..8b8a633f
--- /dev/null
+++ b/taxonomy/translation_utils.py
@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+"""
+Utilities for translating taxonomy data using Xpert AI.
+
+This module provides helper functions to translate job, skill, and industry data
+from English to other languages using the Xpert AI translation service.
+"""
+import json
+import logging
+
+from taxonomy.openai.client import chat_completion
+
+LOGGER = logging.getLogger(__name__)
+
+
+class TranslationError(Exception):
+ """Exception raised when translation fails or returns incomplete results."""
+
+
+def translate_item_with_xpert(title, description, target_language, content_type, external_id):
+ """
+ Translate a single taxonomy item using Xpert AI.
+
+ This function takes a taxonomy item's title and description and translates
+ them from English to the target language using Xpert AI.
+
+ Args:
+ title (str): English title/name to translate
+ description (str): English description to translate
+ target_language (str): Target language code (e.g., 'es', 'ar', 'fr')
+ content_type (str): Type of content ('job', 'skill', or 'industry')
+ external_id (str): External ID for logging purposes
+
+ Returns:
+ dict: Dict containing translated content:
+ - title (str): Translated title
+ - description (str): Translated description
+
+ Example:
+ >>> result = translate_item_with_xpert(
+ ... title='Software Engineer',
+ ... description='Develops software applications',
+ ... target_language='es',
+ ... content_type='job',
+ ... external_id='ET123'
+ ... )
+ >>> result['title']
+ 'Ingeniero de Software'
+ """
+ LOGGER.debug(
+ 'Translating %s %s to %s using Xpert AI',
+ content_type,
+ external_id,
+ target_language
+ )
+
+ try:
+ # Build translation prompt
+ prompt = _build_translation_prompt(
+ title=title,
+ description=description,
+ content_type=content_type,
+ target_language=target_language
+ )
+
+ system_message = "You are a professional translator specializing in career and education content."
+
+ # API call for single item
+ response = chat_completion(
+ prompt=prompt,
+ system_message=system_message
+ )
+
+ translation = _parse_translation_response(response)
+
+ # Validate translation completeness
+ # If title was provided, translation must have title
+ if title and not translation['title']:
+ raise TranslationError(
+ f"Translation missing title for {content_type} {external_id}. "
+ f"Input title: '{title}'"
+ )
+
+ # If description was provided, translation must have description
+ if description and not translation['description']:
+ raise TranslationError(
+ f"Translation missing description for {content_type} {external_id}. "
+ f"Input description length: {len(description)} chars"
+ )
+
+ LOGGER.debug(
+ 'Successfully translated %s %s to %s',
+ content_type,
+ external_id,
+ target_language
+ )
+
+ return translation
+
+ except TranslationError:
+ # Re-raise validation errors so they can be handled by caller
+ raise
+ except Exception as error: # pylint: disable=broad-exception-caught
+ LOGGER.error(
+ 'Error translating %s %s: %s',
+ content_type,
+ external_id,
+ str(error),
+ exc_info=True
+ )
+ # Wrap in TranslationError so caller can handle uniformly
+ raise TranslationError(
+ f"Failed to translate {content_type} {external_id}: {str(error)}"
+ ) from error
+
+
+def _build_translation_prompt(title, description, content_type, target_language):
+ """
+ Build a translation prompt for a single item.
+
+ Creates a structured prompt that instructs the AI to translate a single taxonomy
+ item (job, skill, or industry) from English to the target language.
+
+ Args:
+ title (str): Title/name to translate
+ description (str): Description to translate
+ content_type (str): Type of content ('job', 'skill', 'industry')
+ target_language (str): Target language code
+
+ Returns:
+ str: Formatted prompt for Xpert AI
+
+ Example:
+ >>> prompt = _build_translation_prompt(
+ ... title='Software Engineer',
+ ... description='Develops apps',
+ ... content_type='job',
+ ... target_language='es'
+ ... )
+ >>> 'Software Engineer' in prompt
+ True
+ """
+ # Map language codes to full language names
+ language_names = {
+ 'es': 'Spanish',
+ }
+ language_name = language_names.get(target_language, target_language)
+
+ prompt = f"""Translate the following {content_type} from English to {language_name}.
+
+CRITICAL INSTRUCTIONS:
+1. Maintain professional tone appropriate for career/education content
+2. Preserve technical terms (e.g., "Python", "JavaScript", "SQL", "AWS", "React")
+3. If description is empty, return empty string for description
+4. Return ONLY a valid JSON object - no explanations, no markdown, just the object
+5. The JSON must have exactly two fields: "title" and "description"
+
+Input to translate:
+Title: {title}
+Description: {description}
+
+Return translation in this EXACT format (JSON object):
+{{"title": "TRANSLATED_TITLE", "description": "TRANSLATED_DESCRIPTION"}}
+"""
+
+ return prompt
+
+
+def _parse_translation_response(response):
+ """
+ Parse translation response for a single item.
+
+ Validates the response contains valid JSON with title and description fields.
+
+ Args:
+ response (str): Response from Xpert AI API containing JSON object
+
+ Returns:
+ dict: Translated content with:
+ - title (str): Translated title (or empty string on error)
+ - description (str): Translated description (or empty string on error)
+
+ Example:
+ >>> response = '{"title": "Ingeniero de Software", "description": "Desarrolla aplicaciones"}'
+ >>> result = _parse_translation_response(response)
+ >>> result['title']
+ 'Ingeniero de Software'
+ """
+ try:
+ # Parse JSON object
+ translated = json.loads(response)
+ except json.JSONDecodeError as e:
+ LOGGER.error('Failed to parse translation response as JSON: %s', str(e))
+ LOGGER.debug('Response content: %s', response[:200])
+ return {'title': '', 'description': ''}
+
+ # Validate it's a dict
+ if not isinstance(translated, dict):
+ LOGGER.error('Expected JSON object, got %s', type(translated).__name__)
+ return {'title': '', 'description': ''}
+
+ # Extract and validate fields
+ title = str(translated.get('title', '')).strip()
+ description = str(translated.get('description', '')).strip()
+
+ return {
+ 'title': title,
+ 'description': description
+ }
+
+
+def get_supported_languages():
+ """
+ Get list of supported language codes for translation.
+
+ Currently only Spanish is supported. English is the source language.
+
+ Returns:
+ list: List of ISO 639-1 language codes
+
+ Example:
+ >>> languages = get_supported_languages()
+ >>> 'es' in languages
+ True
+ """
+ return ['es']
+
+
+def validate_language_code(language_code):
+ """
+ Validate that a language code is supported.
+
+ Args:
+ language_code (str): Language code to validate
+
+ Returns:
+ bool: True if language is supported, False otherwise
+
+ Example:
+ >>> validate_language_code('es')
+ True
+ >>> validate_language_code('xyz')
+ False
+ """
+ return language_code in get_supported_languages()
diff --git a/test_utils/factories.py b/test_utils/factories.py
index 5a7a5816..01c106e7 100644
--- a/test_utils/factories.py
+++ b/test_utils/factories.py
@@ -29,6 +29,7 @@
SkillsQuiz,
SkillSubCategory,
SkillValidationConfiguration,
+ TaxonomyTranslation,
Translation,
XBlockSkillData,
XBlockSkills,
@@ -418,3 +419,21 @@ class SkillValidationConfigurationFactory(factory.django.DjangoModelFactory):
class Meta:
model = SkillValidationConfiguration
+
+
+class TaxonomyTranslationFactory(factory.django.DjangoModelFactory):
+ """
+ Factory class for TaxonomyTranslation model.
+ """
+
+ class Meta:
+ model = TaxonomyTranslation
+
+ external_id = factory.Sequence(lambda n: f'ET{n:010d}')
+ content_type = 'job'
+ language_code = 'es'
+ title = factory.Faker('job')
+ description = factory.Faker('text')
+ source_hash = factory.LazyAttribute(
+ lambda obj: TaxonomyTranslation.calculate_source_hash(obj.title, obj.description)
+ )
diff --git a/tests/test_models.py b/tests/test_models.py
index 1439c92d..a620d7ab 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -822,3 +822,30 @@ def test_model_object_str_with_org_key(self):
organization = self.courses[0].key.split('+')[0]
disabled_config = factories.SkillValidationConfigurationFactory(organization=organization)
assert str(disabled_config) == 'Skill validation disabled for organization: RichX'
+
+
+@mark.django_db
+class TestTaxonomyTranslation(TestCase):
+ """
+ Tests for the ``TaxonomyTranslation`` model.
+ """
+
+ def test_string_representation(self):
+ """
+ Test the string representation of the TaxonomyTranslation model.
+ """
+ translation = factories.TaxonomyTranslationFactory(
+ external_id='ET',
+ content_type='job',
+ language_code='es',
+ title='In',
+ description='De'
+ )
+
+ expected_str = 'job:ET:es:In'
+ expected_repr = ''.format(
+ translation.id
+ )
+
+ assert expected_str == translation.__str__()
+ assert expected_repr == translation.__repr__()
diff --git a/tox.ini b/tox.ini
index e3510787..75acd8c4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -79,6 +79,7 @@ setenv =
DJANGO_SETTINGS_MODULE = test_settings
deps =
setuptools
+ Django>=4.2,<5.3
-r{toxinidir}/requirements/test.txt
commands =
code_annotations django_find_annotations --config_file .pii_annotations.yml --lint --report --coverage