Skip to content

Commit 10d139d

Browse files
author
nthaihoc
committed
setup dvc
1 parent 62cdf8e commit 10d139d

13 files changed

+173
-23
lines changed

.dvc/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/config.local
2+
/tmp
3+
/cache

.dvc/config

Whitespace-only changes.

.dvcignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Add patterns of files dvc should ignore, which could improve
2+
# the performance. Learn more at
3+
# https://dvc.org/doc/user-guide/dvcignore

dvc.lock

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
schema: '2.0'
2+
stages:
3+
data_ingestion:
4+
cmd: python3 src/ccs/pipeline/stage_01_data_ingestion.py
5+
deps:
6+
- path: config/config.yaml
7+
hash: md5
8+
md5: 9380aa36e63440ed993bb48e759891e3
9+
size: 873
10+
- path: src/ccs/pipeline/stage_01_data_ingestion.py
11+
hash: md5
12+
md5: 37148056a99fe2812e148b0fc6dc6ee2
13+
size: 851
14+
outs:
15+
- path: artifacts/data_ingestion/dataset_final
16+
hash: md5
17+
md5: fb8c0835550d475bdc6d20ac00304272.dir
18+
size: 2402769676
19+
nfiles: 22434
20+
prepared_model:
21+
cmd: python3 src/ccs/pipeline/stage_02_prepare_model.py
22+
deps:
23+
- path: config/config.yaml
24+
hash: md5
25+
md5: 9380aa36e63440ed993bb48e759891e3
26+
size: 873
27+
- path: params.yaml
28+
hash: md5
29+
md5: 5ebd37d7e1d428cf2d85caa6dcc4edcc
30+
size: 161
31+
- path: src/ccs/pipeline/stage_02_prepare_model.py
32+
hash: md5
33+
md5: a72369570adbf3cfd71d5333702bc6d5
34+
size: 838
35+
outs:
36+
- path: artifacts/model/model_train.keras
37+
hash: md5
38+
md5: d796e2658891a6255829c3ebd397f8e1
39+
size: 13075888
40+
training_model:
41+
cmd: python3 src/ccs/pipeline/stage_03_training_model.py
42+
deps:
43+
- path: config/config.yaml
44+
hash: md5
45+
md5: 9380aa36e63440ed993bb48e759891e3
46+
size: 873
47+
- path: params.yaml
48+
hash: md5
49+
md5: 5ebd37d7e1d428cf2d85caa6dcc4edcc
50+
size: 161
51+
- path: src/ccs/pipeline/stage_03_training_model.py
52+
hash: md5
53+
md5: 422dac12a5b9a82823d9dba7d97e8fd6
54+
size: 776
55+
outs:
56+
- path: artifacts/model/model_trained.keras
57+
hash: md5
58+
md5: b29e93b9d809808ddda2c1c6b67252ee
59+
size: 19985337
60+
evaluate_model:
61+
cmd: python3 src/ccs/pipeline/stage_04_evaluate_model.py
62+
deps:
63+
- path: config/config.yaml
64+
hash: md5
65+
md5: 9380aa36e63440ed993bb48e759891e3
66+
size: 873
67+
- path: params.yaml
68+
hash: md5
69+
md5: 5ebd37d7e1d428cf2d85caa6dcc4edcc
70+
size: 161
71+
- path: src/ccs/pipeline/stage_04_evaluate_model.py
72+
hash: md5
73+
md5: 0ebd6a6f0bba907b99a0e3441095ce66
74+
size: 873
75+
outs:
76+
- path: scores.json
77+
hash: md5
78+
md5: 4d86cd8e65758898762a8f7ee4c21910
79+
size: 79

dvc.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
stages:
2+
data_ingestion:
3+
cmd: python3 src/ccs/pipeline/stage_01_data_ingestion.py
4+
deps:
5+
- src/ccs/pipeline/stage_01_data_ingestion.py
6+
- config/config.yaml
7+
outs:
8+
- artifacts/data_ingestion/dataset_final
9+
10+
prepared_model:
11+
cmd: python3 src/ccs/pipeline/stage_02_prepare_model.py
12+
deps:
13+
- src/ccs/pipeline/stage_02_prepare_model.py
14+
- config/config.yaml
15+
- params.yaml
16+
outs:
17+
- artifacts/model/model_train.keras
18+
19+
training_model:
20+
cmd: python3 src/ccs/pipeline/stage_03_training_model.py
21+
deps:
22+
- src/ccs/pipeline/stage_03_training_model.py
23+
- config/config.yaml
24+
- params.yaml
25+
outs:
26+
- artifacts/model/model_trained.keras
27+
28+
evaluate_model:
29+
cmd: python3 src/ccs/pipeline/stage_04_evaluate_model.py
30+
deps:
31+
- src/ccs/pipeline/stage_04_evaluate_model.py
32+
- config/config.yaml
33+
- params.yaml
34+
outs:
35+
- scores.json:
36+
cache: false

main.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88
import numpy as np
99
from pathlib import Path
1010

11-
# STAGE_NAME = "Data Ingestion Stage"
12-
# try:
13-
# logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
14-
# data_ingestion = DataIngestionPipeline()
15-
# data_ingestion.main()
16-
# logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
17-
# except Exception as e:
18-
# logger.exception(e)
19-
# raise e
11+
STAGE_NAME = "Data Ingestion Stage"
12+
try:
13+
logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
14+
data_ingestion = DataIngestionPipeline()
15+
data_ingestion.main()
16+
logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
17+
except Exception as e:
18+
logger.exception(e)
19+
raise e
2020

2121
STAGE_NAME = "Prepare Model"
2222
try:
@@ -47,6 +47,4 @@
4747
logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
4848
except Exception as e:
4949
logger.exception(e)
50-
raise e
51-
52-
50+
raise e

params.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
IMAGE_SIZE: [224, 224, 3]
22
BATCH_SIZE: 16
33
INCLUDE_TOP: False
4-
EPOCHS: 10
4+
EPOCHS: 15
55
CLASSES: 5
66
WEIGHTS: imagenet
7-
LEARNING_RATE: 0.0001
7+
LEARNING_RATE: 0.001
88
BETA_1: 0.9
99
BETA_2: 0.999
1010
DECAY: 0.0001

src/ccs/components/data_ingestion.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def download_file(self) -> str:
1414
try:
1515
dataset_url = self.config.data_URL
1616
zip_download_dir = self.config.local_data_file
17-
#os.makedirs("artifacts/data_ingestion", exist_ok=True)
1817
logger.info(f"Download data from {dataset_url} into file {zip_download_dir}")
1918

2019
file_id = dataset_url.split("/")[-2]
@@ -28,7 +27,6 @@ def download_file(self) -> str:
2827

2928
def extract_zip_file(self):
3029
unzip_path = self.config.unzip_dir
31-
os.makedirs(unzip_path, exist_ok=True)
3230
with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
3331
zip_ref.extractall(unzip_path)
3432

src/ccs/config/configuration.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@ def __init__(
1515
self.config = read_yaml(config_filepath)
1616
self.params = read_yaml(params_filepath)
1717

18-
#create_directories([self.config.artifacts_root])
18+
create_directories([self.config.artifacts_root])
1919

20-
2120
def get_data_ingestion_config(self) -> DataIngestionConfig:
2221
config = self.config.data_ingestion
2322

24-
#create_directories([config.root_dir])
23+
create_directories([config.root_dir])
2524

2625
data_ingestion_config = DataIngestionConfig(
2726
root_dir=config.root_dir,

src/ccs/pipeline/stage_01_data_ingestion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from ccs.components.data_ingestion import DataIngestion
33
from ccs import logger
44

5-
STAGE_NAME = "Data Ingestion"
5+
STAGE_NAME = "DATA INGESTION"
66

77
class DataIngestionPipeline:
88
def __init__(self):
@@ -17,10 +17,10 @@ def main(self):
1717

1818
if __name__ == "__main__":
1919
try:
20-
logger.info(f">>>>> Stage {STAGE_NAME} started <<<<<")
20+
logger.info(f">>>>>> Stage {STAGE_NAME} started <<<<<<")
2121
data_ingestion = DataIngestionPipeline()
2222
data_ingestion.main()
23-
logger.info(f">>>>> Stage {STAGE_NAME} completed <<<<<")
23+
logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n\n x============x")
2424
except Exception as e:
2525
logger.exception(e)
2626
raise e

src/ccs/pipeline/stage_02_prepare_model.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from ccs import logger
44

55

6+
STAGE_NAME = "PREPARE MODEL"
7+
68
class PrepareModelPipeline:
79
def __int__(self):
810
pass
@@ -14,3 +16,12 @@ def main(self):
1416
model = prepare_model.base_model()
1517
model = prepare_model.full_model()
1618

19+
if __name__ == "__main__":
20+
try:
21+
logger.info(f">>>>>> Stage {STAGE_NAME} started <<<<<<")
22+
prepare_model = PrepareModelPipeline()
23+
prepare_model.main()
24+
logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n\n x============x")
25+
except Exception as e:
26+
logger.exception(e)
27+
raise e

src/ccs/pipeline/stage_03_training_model.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ccs.components.model_training import TrainingModel
33
from ccs.config.configuration import ConfigurationManager
44

5+
STAGE_NAME = "TRAINING MODEL"
56

67
class TrainModelPipeline:
78
def __init__ (self):
@@ -12,4 +13,14 @@ def main(self):
1213
prepare_config = config.setup_train_model()
1314
model_train = TrainingModel(config=prepare_config)
1415
model_train.train_model()
16+
17+
if __name__ == "__main__":
18+
try:
19+
logger.info(f">>>>>> Stage {STAGE_NAME} started <<<<<<")
20+
train_model = TrainModelPipeline()
21+
train_model.main()
22+
logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n\n x============x")
23+
except Exception as e:
24+
logger.exception(e)
25+
raise e
1526

src/ccs/pipeline/stage_04_evaluate_model.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ccs.components.evaluate_model import EvaluateModel
33
from ccs.config.configuration import ConfigurationManager
44

5+
STAGE_NAME = "EVALUATION MODEL"
56

67
class EvaluateModelPipeline:
78
def __init__ (self):
@@ -13,4 +14,15 @@ def main(self):
1314
evaluate_model = EvaluateModel(config=evaluate_config)
1415
evaluate_model.evaluation()
1516
evaluate_model.save_score()
16-
evaluate_model.log_into_mlflow()
17+
evaluate_model.log_into_mlflow()
18+
19+
20+
if __name__ == "__main__":
21+
try:
22+
logger.info(f">>>>>> Stage {STAGE_NAME} started <<<<<<")
23+
evaluate_model = EvaluateModelPipeline()
24+
evaluate_model.main()
25+
logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n\n x============x")
26+
except Exception as e:
27+
logger.exception(e)
28+
raise e

0 commit comments

Comments
 (0)