NVIDIA-NeMo · johnnygreco · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -56,6 +56,7 @@ Supported locales:
 - `en_US`: United States
 - `en_IN`: India (English)
 - `en_SG`: Singapore (English)
+- `fr_FR`: France (French)
 - `hi_Deva_IN`: India (Devanagari script)
 - `hi_Latn_IN`: India (Latin script)
 - `ja_JP`: Japan
@@ -119,6 +120,9 @@ ngc registry resource download-version "nvidia/nemotron-personas/nemotron-person
 ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-hi_latn_in"
 ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_in"
 
+# For Nemotron-Personas FR
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-fr_fr"
+
 # For Nemotron-Personas JP
 ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-ja_jp"
 
@@ -183,6 +187,16 @@ For more details, see the documentation for [`SamplerColumnConfig`](../code_refe
 | `email_address` | string | |
 | `national_id` | string |
 
+**France-Specific Fields (`fr_FR`):**
+
+- `commune` - Smallest administrative division (includes arrondissements)
+- `departement` - Mid-level administrative division
+- `household_type` - Household composition (e.g., single person, couple with/without children)
+- `monthly_income_eur` - Estimated monthly income in euros
+- `first_name_heritage` - Cultural origin of the first name
+- `name_heritage` - Cultural, linguistic, or geographic origin of the surname
+- `is_first_gen_immigrant` - Whether the individual is a first-generation immigrant to France
+
 **Japan-Specific Fields (`ja_JP`):**
 
 - `area`
@@ -234,7 +248,7 @@ For more details, see the documentation for [`SamplerColumnConfig`](../code_refe
 
 | Parameter | Type | Description |
 |-----------|------|-------------|
-| `locale` | str | Language/region code - must be one of: "en_US", "en_IN", "en_SG", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR" |
+| `locale` | str | Language/region code - must be one of: "en_US", "en_IN", "en_SG", "fr_FR", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR" |
 | `sex` | str (optional) | Filter by "Male" or "Female" |
 | `city` | str or list[str] (optional) | Filter by specific city or cities within locale |
 | `age_range` | list[int] (optional) | Two-element list [min_age, max_age] (default: [18, 114]) |

@@ -15,6 +15,7 @@
     AVAILABLE_LOCALES,
     DEFAULT_AGE_RANGE,
     LOCALES_WITH_MANAGED_DATASETS,
+    LOCALES_WITH_MANAGED_DATASETS_STR,
     MAX_AGE,
     MIN_AGE,
 )
@@ -446,7 +447,7 @@ class PersonSamplerParams(ConfigBase):
             "Locale that determines the language and geographic location "
             "that a synthetic person will be sampled from. Must be a locale supported by "
             "a managed Nemotron Personas dataset. Managed datasets exist for the following locales: "
-            f"{', '.join(LOCALES_WITH_MANAGED_DATASETS)}."
+            f"{LOCALES_WITH_MANAGED_DATASETS_STR}."
         ),
     )
     sex: SexT | None = Field(
@@ -518,7 +519,7 @@ def _validate_locale_with_managed_datasets(self) -> Self:
         if self.locale not in LOCALES_WITH_MANAGED_DATASETS:
             raise ValueError(
                 "Person sampling from managed datasets is only supported for the following "
-                f"locales: {', '.join(LOCALES_WITH_MANAGED_DATASETS)}."
+                f"locales: {LOCALES_WITH_MANAGED_DATASETS_STR}."
             )
         return self
 

@@ -365,13 +365,15 @@ class NordColor(Enum):
     "en_US": "1.24 GB",
     "en_IN": "2.39 GB",
     "en_SG": "0.30 GB",
+    "fr_FR": "2.71 GB",
     "hi_Deva_IN": "4.14 GB",
     "hi_Latn_IN": "2.7 GB",
     "ja_JP": "1.69 GB",
     "pt_BR": "2.33 GB",
 }
 
 LOCALES_WITH_MANAGED_DATASETS = list[str](NEMOTRON_PERSONAS_DATASET_SIZES.keys())
+LOCALES_WITH_MANAGED_DATASETS_STR = ", ".join(LOCALES_WITH_MANAGED_DATASETS)
 
 NEMOTRON_PERSONAS_DATASET_PREFIX = "nemotron-personas-dataset-"
 

@@ -40,6 +40,14 @@
     "state",
     "email_address",
     "phone_number",
+    # France-specific fields
+    "first_name_heritage",
+    "name_heritage",
+    "is_first_gen_immigrant",
+    "household_type",
+    "monthly_income_eur",
+    "commune",
+    "departement",
     # Brazil-specific fields
     "race",
     # Japan-specific fields

@@ -6,15 +6,15 @@
 import typer
 
 from data_designer.cli.controllers.download_controller import DownloadController
-from data_designer.config.utils.constants import DATA_DESIGNER_HOME
+from data_designer.config.utils.constants import DATA_DESIGNER_HOME, LOCALES_WITH_MANAGED_DATASETS_STR
 
 
 def personas_command(
     locales: list[str] = typer.Option(
         None,
         "--locale",
         "-l",
-        help="Locales to download (en_US, en_IN, hi_Deva_IN, hi_Latn_IN, ja_JP). Can be specified multiple times.",
+        help=f"Locales to download ({LOCALES_WITH_MANAGED_DATASETS_STR}). Can be specified multiple times.",
     ),
     all_locales: bool = typer.Option(
         False,

@@ -85,8 +85,8 @@ def test_run_personas_with_all_flag(
     # Verify NGC check was called
     mock_check_ngc.assert_called_once()
 
-    # Verify all 7 locales were downloaded
-    assert mock_download.call_count == 7
+    # Verify all 8 locales were downloaded
+    assert mock_download.call_count == 8
 
     # Verify each locale was downloaded
     downloaded_locales = [call[0][0] for call in mock_download.call_args_list]
@@ -219,10 +219,11 @@ def test_determine_locales_with_all_flag(controller: DownloadController) -> None
     """Test _determine_locales returns all locales when all_locales=True."""
     result = controller._determine_locales(locales=None, all_locales=True)
 
-    assert len(result) == 7
+    assert len(result) == 8
     assert "en_US" in result
     assert "en_IN" in result
     assert "en_SG" in result
+    assert "fr_FR" in result
     assert "hi_Deva_IN" in result
     assert "hi_Latn_IN" in result
     assert "ja_JP" in result

@@ -15,7 +15,7 @@ def repository() -> PersonaRepository:
 def test_init(repository: PersonaRepository) -> None:
     """Test repository initialization creates registry."""
     assert repository._registry is not None
-    assert len(repository._registry.locales) == 7
+    assert len(repository._registry.locales) == 8
     assert repository._registry.dataset_prefix == "nemotron-personas-dataset-"
 
 
@@ -24,11 +24,11 @@ def test_list_all(repository: PersonaRepository) -> None:
     locales = repository.list_all()
 
     assert isinstance(locales, list)
-    assert len(locales) == 7
+    assert len(locales) == 8
 
     # Verify all expected locales are present
     locale_codes = {locale.code for locale in locales}
-    assert locale_codes == {"en_US", "en_IN", "en_SG", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR"}
+    assert locale_codes == {"en_US", "en_IN", "en_SG", "fr_FR", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR"}
 
     # Verify each locale has required fields
     for locale in locales:

@@ -51,10 +51,11 @@ def test_get_available_locales(service: DownloadService) -> None:
     locales = service.get_available_locales()
 
     assert isinstance(locales, dict)
-    assert len(locales) == 7
+    assert len(locales) == 8
     assert "en_US" in locales
     assert "en_IN" in locales
     assert "en_SG" in locales
+    assert "fr_FR" in locales
     assert "hi_Deva_IN" in locales
     assert "hi_Latn_IN" in locales
     assert "ja_JP" in locales