From 7f16408652654792334185d2cff4266495f64c59 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Tue, 5 Aug 2025 06:43:12 +0530 Subject: [PATCH 01/12] Update requirements click version Signed-off-by: Anand Joseph --- requirements/requirements_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index a3e90e5dc..6a43afbe1 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -1,5 +1,5 @@ black==19.10b0 -click==8.0.2 +click>=8.0.2 isort[requirements]>5.1.0,<6.0.0 parameterized pynini==2.1.6.post1 From 15f1423136b0ad9f416b40cfed68252ac5b43fc7 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Thu, 4 Sep 2025 22:56:23 +0530 Subject: [PATCH 02/12] Keep only EN tests for testing CI/CD pipeline Signed-off-by: Anand Joseph --- Jenkinsfile | 302 +--------------------------------------------------- 1 file changed, 4 insertions(+), 298 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 51ce37a10..044121b11 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,12 +1,12 @@ pipeline { agent { docker { - image 'tnitn_ci:py310' - args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + image 'tnitn_ci:py312' + args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=8g --entrypoint=""' } } options { - timeout(time: 2, unit: 'HOURS') + timeout(time: 4, unit: 'HOURS') disableConcurrentBuilds(abortPrevious: true) } environment { @@ -93,231 +93,7 @@ pipeline { } } - stage('L0: Create HI TN/ITN Grammars') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: Hi TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="१" --cache_dir ${HI_TN_CACHE}' - } - } - stage('L0: Hi ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}' - } - } - - } - } - - stage('L0: Create DE/ES TN/ITN Grammars') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: DE TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=de --text="1" --cache_dir ${DEFAULT_TN_CACHE}' - } - } - stage('L0: DE ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=de --text="ein hundert " --cache_dir ${DEFAULT_TN_CACHE}' - } - } - stage('L0: ES TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=es --text="1" --cache_dir ${ES_TN_CACHE}' - } - } - stage('L0: ES ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=es --text="ciento uno " --cache_dir ${ES_TN_CACHE}' - } - } - stage('L0: Codeswitched ES/EN ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=es_en --text="ciento uno " --cache_dir ${ES_EN_TN_CACHE}' - } - } - } - } - - stage('L0: Create AR TN/ITN Grammars') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: AR TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}' - } - } - stage('L0: AR ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}' - } - } - - } - } - - stage('L0: Create FR TN/ITN & VI ITN & HU TN & IT TN') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: FR TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}' - } - } - stage('L0: FR ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=fr --text="cent " --cache_dir ${FR_TN_CACHE}' - } - } - stage('L0: VI ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}' - } - } - stage('L0: HU TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}' - } - } - stage('L0: IT TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}' - } - } - } - } - - stage('L0: Create RU TN/ITN Grammars & SV & PT') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: RU TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --lang=ru --text="03" --cache_dir ${RU_TN_CACHE}' - } - } - stage('L0: RU ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ru --text="три " --cache_dir ${RU_TN_CACHE}' - } - } - stage('L0: SV TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}' - } - } - // stage('L0: SV ITN grammars') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=sv --text="hundra " --cache_dir ${SV_TN_CACHE}' - // } - // } - // stage('L0: PT TN grammars') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=pt --text="2" --cache_dir ${DEFAULT_TN_CACHE}' - // } - // } - stage('L0: PT ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=pt --text="dez " --cache_dir ${PT_TN_CACHE}' - } - } - } - } - stage('L0: Create HY TN/ITN Grammars & MR') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: MR ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=mr --text="शून्य " --cache_dir ${MR_TN_CACHE}' - } - } - stage('L0: HY TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hy --text="6" --cache_dir ${HY_TN_CACHE}' - } - } - stage('L0: HY ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}' - } - } - } - } - stage('L0: Create ZH TN/ITN Grammar') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: ZH ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=zh --text="你" --cache_dir ${ZH_TN_CACHE}' - } - } - stage('L0: ZH TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=zh --text="6" --cache_dir ${ZH_TN_CACHE}' - } - } - } - } - stage('L0: Create JA ITN Grammars') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: JA ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ja --text="100" --cache_dir ${JA_TN_CACHE}' - } - } - } - } // L1 Tests starts here @@ -336,80 +112,10 @@ pipeline { sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir ${EN_TN_CACHE}' } } - stage('L1: Run all DE TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/de/ -m "not pleasefixme" --cpu --tn_cache_dir ${DE_TN_CACHE}' - } - } - stage('L1: Run all ES TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_TN_CACHE}' - } - } - stage('L1: Run all HI TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hi/ -m "not pleasefixme" --cpu --tn_cache_dir ${HI_TN_CACHE}' - } - } - stage('L1: Run all Codeswitched ES/EN TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_EN_TN_CACHE}' - } - } - stage('L1: Run all AR TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ar/ -m "not pleasefixme" --cpu --tn_cache_dir ${AR_TN_CACHE}' - } - } - stage('L1: Run all FR TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/fr/ -m "not pleasefixme" --cpu --tn_cache_dir ${FR_TN_CACHE}' - } - } - stage('L1: Run all PT TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/pt/ -m "not pleasefixme" --cpu --tn_cache_dir ${PT_TN_CACHE}' - } - } - stage('L1: Run all VI TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/vi/ -m "not pleasefixme" --cpu --tn_cache_dir ${VI_TN_CACHE}' - } - } - stage('L1: Run all RU TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ru/ -m "not pleasefixme" --cpu --tn_cache_dir ${RU_TN_CACHE}' - } - } - // stage('L1: Run all SV TN/ITN tests (restore grammars from cache)') { - // steps { - // sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/sv/ -m "not pleasefixme" --cpu --tn_cache_dir ${SV_TN_CACHE}' - // } - // } - stage('L1: Run all ZH TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/zh/ -m "not pleasefixme" --cpu --tn_cache_dir ${ZH_TN_CACHE}' - } - } - stage('L1: Run all JA TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ja/ -m "not pleasefixme" --cpu --tn_cache_dir ${JA_TN_CACHE}' - } - } - stage('L1: Run all MR ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/mr/ -m "not pleasefixme" --cpu --tn_cache_dir ${MR_TN_CACHE}' - } - } - stage('L1: Run all HY TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}' - } - } } } - stage('L2: Sparrowhawk Tests') { + stage('L2: Sparrowhawk Tests') { when { anyOf { branch 'main' From a95d3216ca6ea82f9c4ff8d4c31c12aa95cf8cf0 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Fri, 5 Sep 2025 14:36:33 +0530 Subject: [PATCH 03/12] Update black version Signed-off-by: Anand Joseph --- requirements/requirements_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index 6a43afbe1..6ee8766a3 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -1,4 +1,4 @@ -black==19.10b0 +black==25.1.0 click>=8.0.2 isort[requirements]>5.1.0,<6.0.0 parameterized From 5c15fe282260952ddab5ada0ae66c8679a6c1325 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Fri, 5 Sep 2025 14:57:01 +0530 Subject: [PATCH 04/12] Update isort version Signed-off-by: Anand Joseph --- requirements/requirements_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index 6ee8766a3..aacfde319 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -1,6 +1,6 @@ black==25.1.0 click>=8.0.2 -isort[requirements]>5.1.0,<6.0.0 +isort[requirements]>5.1.0,<=6.0.1 parameterized pynini==2.1.6.post1 pytest From c7fa94263d560c0dd05e7d2e70f0b612484a4931 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Fri, 5 Sep 2025 15:31:04 +0530 Subject: [PATCH 05/12] Update Jenkinfile Signed-off-by: Anand Joseph --- Jenkinsfile | 96 ++++------------------------------------------------- 1 file changed, 7 insertions(+), 89 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 044121b11..74836a5bd 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,34 +1,19 @@ pipeline { agent { docker { - image 'tnitn_ci:py312' - args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=8g --entrypoint=""' + image 'tnitn_ci2:py312' + args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } options { - timeout(time: 4, unit: 'HOURS') + timeout(time: 2, unit: 'HOURS') disableConcurrentBuilds(abortPrevious: true) } environment { - AR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-24-24-0' - DE_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-23-24-0' - EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-04-24-0' - ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-25-24-0' - ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-30-24-0' - FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-07-25-0' - HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-16-24-0' - PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' - RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' - VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' - SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' - ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0' - IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-22-24-0' - HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0' - MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1' - JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1' - HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-22-25-0' - DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' + EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-06-25-1' + DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-15-23-0' + } stages { @@ -46,14 +31,6 @@ pipeline { } } - stage('Install test requirements') { - steps { - sh 'apt-get update && apt-get install -y bc' - } - } - - - stage('NeMo Installation') { steps { sh './reinstall.sh release' @@ -61,6 +38,7 @@ pipeline { } + stage('L0: Create EN TN/ITN Grammars') { when { anyOf { @@ -70,11 +48,6 @@ pipeline { } failFast true parallel { - stage('L0: Test utils') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/audio_based_utils/ --cpu' - } - } stage('L0: En TN grammars') { steps { sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir ${EN_TN_CACHE}' @@ -94,10 +67,7 @@ pipeline { } } - - // L1 Tests starts here - stage('L1: TN/ITN Tests CPU') { when { anyOf { @@ -115,39 +85,6 @@ pipeline { } } - stage('L2: Sparrowhawk Tests') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L2: EN ITN Run Sparrowhawk test - Lower Cased Input') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" cp -r /workspace/sparrowhawk/documentation/grammars /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_lower_cased && cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN --LANGUAGE="en"' - sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization.sh /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_lower_cased `pwd`' - - } - } - stage('L2: EN ITN Run Sparrowhawk test - Cased Input') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" cp -r /workspace/sparrowhawk/documentation/grammars /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_cased && cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --INPUT_CASE="cased" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN_cased --LANGUAGE="en"' - sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization_cased.sh /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_cased `pwd`' - - } - } - stage('L2: EN TN Run Sparrowhawk test') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" cp -r /workspace/sparrowhawk/documentation/grammars /workspace/sparrowhawk/documentation/grammars_en_tn_grammars_cased && cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_tn_grammars" --INPUT_CASE="cased" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_TN --GRAMMARS="tn_grammars" --LANGUAGE="en" ' - sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_normalization.sh /workspace/sparrowhawk/documentation/grammars_en_tn_grammars_cased `pwd`' - } - } - - } - } - stage('L2: NeMo text processing') { when { anyOf { @@ -180,25 +117,6 @@ pipeline { } } - - stage('L2: Eng alignment TN') { - steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ - cd nemo_text_processing/fst_alignment && python alignment.py --text="2615 Forest Av, 90501 CA, Santa Clara. 10kg, 12/16/2018" --grammar=tn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_tn_True_deterministic_cased__tokenize.far 2>&1 | tee $NORM_OUTPUT_DIR/pred.txt && \ - cmp --silent $NORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_norm/ci/alignment_gold.txt || exit 1 && \ - rm -rf $NORM_OUTPUT_DIR' - } - } - - stage('L2: Eng alignment ITN') { - steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ - cd nemo_text_processing/fst_alignment && python alignment.py --text="one million twenty three thousand two hundred eleven ten kilograms one hundred twenty three dollars and twenty five cents" --grammar=itn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_itn_lower_cased.far 2>&1 | tee $DENORM_OUTPUT_DIR/pred.txt && \ - cmp --silent $DENORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_denorm/ci/alignment_gold.txt || exit 1 && \ - rm -rf $DENORM_OUTPUT_DIR' - } - } - } } } From e497dd3a3d697c50cf5b81f159dd18eb6d82810f Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:00:35 +0530 Subject: [PATCH 06/12] Simple Jenkinsfile for debug Signed-off-by: Anand Joseph --- Jenkinsfile | 135 +++------------------------------------------------- 1 file changed, 7 insertions(+), 128 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 74836a5bd..5ce6db46a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,131 +1,10 @@ pipeline { - agent { - docker { - image 'tnitn_ci2:py312' - args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + agent any + stages { + stage('Print hello') { + steps { + echo 'Hello world!' + } } - } - options { - timeout(time: 2, unit: 'HOURS') - disableConcurrentBuilds(abortPrevious: true) - } - environment { - - EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-06-25-1' - DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-15-23-0' - - } - stages { - - stage('Add git safe directory'){ - steps{ - sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NTP_$GIT_BRANCH' - sh 'git config --global --add safe.directory /home/jenkinsci/workspace/NTP_$GIT_BRANCH' - } } - - stage('PyTorch version') { - steps { - sh 'python -c "import torch; print(torch.__version__)"' - sh 'python -c "import torchvision; print(torchvision.__version__)"' - } - } - - stage('NeMo Installation') { - steps { - sh './reinstall.sh release' - } - } - - - - stage('L0: Create EN TN/ITN Grammars') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L0: En TN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir ${EN_TN_CACHE}' - } - } - stage('L0: En TN non-deterministic grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text="1" --cache_dir ${EN_TN_CACHE}' - } - } - stage('L0: En ITN grammars') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir ${EN_TN_CACHE}' - } - } - - } - } - -// L1 Tests starts here - stage('L1: TN/ITN Tests CPU') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - stages { - stage('L1: Test EN non-deterministic TN & Run all En TN/ITN tests (restore grammars from cache)') { - steps { - sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir ${EN_TN_CACHE}' - } - } - } - } - - stage('L2: NeMo text processing') { - when { - anyOf { - branch 'main' - changeRequest target: 'main' - } - } - failFast true - parallel { - stage('L2: Eng TN') { - steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && \ - cd tools/text_processing_deployment && python pynini_export.py --output=$NORM_OUTPUT_DIR --grammars=tn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $NORM_OUTPUT_DIR && echo ".far files created "|| exit 1' - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ - cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkinsci/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \ - cat $NORM_OUTPUT_DIR/test.pynini.txt && \ - cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \ - rm -rf $NORM_OUTPUT_DIR' - } - } - - stage('L2: Eng ITN export') { - steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && \ - cd tools/text_processing_deployment && python pynini_export.py --output=$DENORM_OUTPUT_DIR --grammars=itn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $DENORM_OUTPUT_DIR && echo ".far files created "|| exit 1' - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ - cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkinsci/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \ - cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \ - rm -rf $DENORM_OUTPUT_DIR' - } - } - - } - } - } - - - post { - always { - sh 'chmod -R 777 .' - cleanWs() - } - } -} +} \ No newline at end of file From 9499f97f8fe4e96acb7c73f9eb5cfe85ef9784fe Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:05:21 +0530 Subject: [PATCH 07/12] Add docker Signed-off-by: Anand Joseph --- Jenkinsfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5ce6db46a..c4d421be5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,10 @@ pipeline { - agent any + agent { + docker { + image 'tnitn_ci2:py312' + args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + } + } stages { stage('Print hello') { steps { From f41389e7f804527f4ae5b3f7ca5b6f43baaafabf Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:16:58 +0530 Subject: [PATCH 08/12] Add docker Signed-off-by: Anand Joseph --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c4d421be5..2647fd53c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ pipeline { agent { docker { image 'tnitn_ci2:py312' - args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + args '--user 0:0 -v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } stages { From 218ed3a7c878fc6d3226a6814205f14e830b5f28 Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:32:13 +0530 Subject: [PATCH 09/12] Remove user Signed-off-by: Anand Joseph --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2647fd53c..0df215b2c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ pipeline { agent { docker { image 'tnitn_ci2:py312' - args '--user 0:0 -v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + args '-v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } stages { From d218b7c3cb237464465fcb12964cd7c742aad87e Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:42:58 +0530 Subject: [PATCH 10/12] Test1 Signed-off-by: Anand Joseph --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0df215b2c..d2fdc4212 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,7 +8,7 @@ pipeline { stages { stage('Print hello') { steps { - echo 'Hello world!' + echo 'Hello world! Hope' } } } From b734c9e67612fd7315ada78bcab4e00774b969ad Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 22:49:39 +0530 Subject: [PATCH 11/12] Revert to full EN tests, fix user and mounts Signed-off-by: Anand Joseph --- Jenkinsfile | 130 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d2fdc4212..4bf83cf01 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,15 +1,131 @@ pipeline { - agent { + agent { docker { image 'tnitn_ci2:py312' args '-v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } - stages { - stage('Print hello') { - steps { - echo 'Hello world! Hope' - } + options { + timeout(time: 2, unit: 'HOURS') + disableConcurrentBuilds(abortPrevious: true) + } + environment { + + EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-06-25-1' + DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-15-23-0' + + } + stages { + + stage('Add git safe directory'){ + steps{ + sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NTP_$GIT_BRANCH' + sh 'git config --global --add safe.directory /home/jenkinsci/workspace/NTP_$GIT_BRANCH' + } + } + + stage('PyTorch version') { + steps { + sh 'python -c "import torch; print(torch.__version__)"' + sh 'python -c "import torchvision; print(torchvision.__version__)"' + } + } + + stage('NeMo Installation') { + steps { + sh './reinstall.sh release' + } + } + + + + stage('L0: Create EN TN/ITN Grammars') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + parallel { + stage('L0: En TN grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir ${EN_TN_CACHE}' + } + } + stage('L0: En TN non-deterministic grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text="1" --cache_dir ${EN_TN_CACHE}' + } + } + stage('L0: En ITN grammars') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir ${EN_TN_CACHE}' + } + } + + } + } + +// L1 Tests starts here + stage('L1: TN/ITN Tests CPU') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + stages { + stage('L1: Test EN non-deterministic TN & Run all En TN/ITN tests (restore grammars from cache)') { + steps { + sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir ${EN_TN_CACHE}' + } } + } } -} \ No newline at end of file + + stage('L2: NeMo text processing') { + when { + anyOf { + branch 'main' + changeRequest target: 'main' + } + } + failFast true + parallel { + stage('L2: Eng TN') { + steps { + sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && \ + cd tools/text_processing_deployment && python pynini_export.py --output=$NORM_OUTPUT_DIR --grammars=tn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $NORM_OUTPUT_DIR && echo ".far files created "|| exit 1' + sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ + cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkinsci/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \ + cat $NORM_OUTPUT_DIR/test.pynini.txt && \ + cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \ + rm -rf $NORM_OUTPUT_DIR' + } + } + + stage('L2: Eng ITN export') { + steps { + sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && \ + cd tools/text_processing_deployment && python pynini_export.py --output=$DENORM_OUTPUT_DIR --grammars=itn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $DENORM_OUTPUT_DIR && echo ".far files created "|| exit 1' + sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ + cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkinsci/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \ + cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \ + rm -rf $DENORM_OUTPUT_DIR' + } + } + + } + } + } + + + post { + always { + sh 'chmod -R 777 .' + cleanWs() + } + } +} From 43eeeb9739f47b9bc1734e0ed542a772ffef66fd Mon Sep 17 00:00:00 2001 From: Anand Joseph Date: Mon, 22 Sep 2025 23:19:50 +0530 Subject: [PATCH 12/12] Revert user Signed-off-by: Anand Joseph --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4bf83cf01..d340dbb83 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ pipeline { agent { docker { image 'tnitn_ci2:py312' - args '-v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + args '-u 0:120 -v /mnt/jenkins/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } options {