From 1614cbadb7ff530dc09201e150c805705a0b9456 Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 7 Jan 2025 16:49:57 +0000 Subject: [PATCH 1/9] include xpu usage --- vae/README.md | 7 ++++--- vae/main.py | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vae/README.md b/vae/README.md index cda6a33672..97778c5d8c 100644 --- a/vae/README.md +++ b/vae/README.md @@ -14,8 +14,9 @@ The main.py script accepts the following arguments: optional arguments: --batch-size input batch size for training (default: 128) --epochs number of epochs to train (default: 10) - --no-cuda enables CUDA training - --mps enables GPU on macOS + --no-cuda disables CUDA training + --no-mps disables GPU on macOS + --no-xpu disables XPU training in Intel GPUs --seed random seed (default: 1) --log-interval how many batches to wait before logging training status -``` \ No newline at end of file +``` diff --git a/vae/main.py b/vae/main.py index d69833fbe0..1d0d5ea2a8 100644 --- a/vae/main.py +++ b/vae/main.py @@ -17,6 +17,8 @@ help='disables CUDA training') parser.add_argument('--no-mps', action='store_true', default=False, help='disables macOS GPU training') +parser.add_argument('--no-xpu', action='store_true', default=False, + help='disables intel XPU training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', @@ -24,6 +26,7 @@ args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() +use_xpu = not args.no_xpu and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -31,9 +34,13 @@ device = torch.device("cuda") elif use_mps: device = torch.device("mps") +elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") +print(device) + kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, From ce449717ce7d9a9db537c98b020bca105acdca56 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Fri, 10 Jan 2025 17:40:04 +0000 Subject: [PATCH 2/9] Add support for Intel GPU training with --no-xpu flag Signed-off-by: jafraustro --- mnist/main.py | 12 ++++++++++++ run_python_examples.sh | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/mnist/main.py b/mnist/main.py index 184dc4744f..8235bb8bb3 100644 --- a/mnist/main.py +++ b/mnist/main.py @@ -86,6 +86,8 @@ def main(): help='disables CUDA training') parser.add_argument('--no-mps', action='store_true', default=False, help='disables macOS GPU training') + parser.add_argument('--no-xpu', action='store_true', default=False, + help='disables Intel GPU training') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', @@ -97,6 +99,7 @@ def main(): args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() + use_xpu = not args.no_mps and torch.xpu.is_available() torch.manual_seed(args.seed) @@ -104,6 +107,8 @@ def main(): device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_xpu: + device = torch.device("xpu") else: device = torch.device("cpu") @@ -116,6 +121,13 @@ def main(): train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) + if use_xpu: + xpu_kwargs = {'num_workers': 1, + 'pin_memory': True, + 'shuffle': True} + train_kwargs.update(xpu_kwargs) + test_kwargs.update(xpu_kwargs) + transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) diff --git a/run_python_examples.sh b/run_python_examples.sh index 0e06e4cfc0..7db53ccd2f 100755 --- a/run_python_examples.sh +++ b/run_python_examples.sh @@ -30,6 +30,22 @@ case $USE_CUDA in ;; esac +USE_XPU=$(python -c "import torchvision, torch; print(torch.xpu.is_available())") +case $USE_XPU in + "True") + echo "using xpu" + XPU=1 + XPU_FLAG="--xpu" + ;; + "False") + echo "not using xpu" + XPU=0 + XPU_FLAG="" + ;; + "") + exit 1; + + function dcgan() { start python main.py --dataset fake $CUDA_FLAG --mps --dry-run || error "dcgan failed" From 16d4ca4c6d003109741fcc1b3810c4d174ae051c Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 16:55:10 +0000 Subject: [PATCH 3/9] ADD XPU training support and add --no_xpu flag for better device management Signed-off-by: jafraustro --- mnist/main.py | 7 ------- mnist_forward_forward/main.py | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/mnist/main.py b/mnist/main.py index 8235bb8bb3..c28210468a 100644 --- a/mnist/main.py +++ b/mnist/main.py @@ -121,13 +121,6 @@ def main(): train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) - if use_xpu: - xpu_kwargs = {'num_workers': 1, - 'pin_memory': True, - 'shuffle': True} - train_kwargs.update(xpu_kwargs) - test_kwargs.update(xpu_kwargs) - transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) diff --git a/mnist_forward_forward/main.py b/mnist_forward_forward/main.py index a175126067..be62cfe382 100644 --- a/mnist_forward_forward/main.py +++ b/mnist_forward_forward/main.py @@ -104,6 +104,9 @@ def train(self, x_pos, x_neg): parser.add_argument( "--no_cuda", action="store_true", default=False, help="disables CUDA training" ) + parser.add_argument( + "--no_xpu", action="store_true", default=False, help="disables XPU training" + ) parser.add_argument( "--no_mps", action="store_true", default=False, help="disables MPS training" ) @@ -138,9 +141,12 @@ def train(self, x_pos, x_neg): ) args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() + use_xpu = not args.no_xpu and torch.xpu.is_available() use_mps = not args.no_mps and torch.backends.mps.is_available() if use_cuda: device = torch.device("cuda") + elif use_xpu: + device = torch.device("xpu") elif use_mps: device = torch.device("mps") else: From 93098dc1a04a24d55b83e0ad76eaa76c47a85e10 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 16:55:20 +0000 Subject: [PATCH 4/9] Add --no_xpu option to README for disabling XPU training Signed-off-by: jafraustro --- mnist_forward_forward/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/mnist_forward_forward/README.md b/mnist_forward_forward/README.md index f6ae12e56d..2ed14733fc 100644 --- a/mnist_forward_forward/README.md +++ b/mnist_forward_forward/README.md @@ -17,6 +17,7 @@ optional arguments: --epochs EPOCHS number of epochs to train (default: 1000) --lr LR learning rate (default: 0.03) --no_cuda disables CUDA training + --no_xpu disables XPU training --no_mps disables MPS training --seed SEED random seed (default: 1) --save_model For saving the current Model From 3fe53025ed97813e962c30aff2b4d506962390ed Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 17:52:32 +0000 Subject: [PATCH 5/9] Add --xpu option for enabling XPU training in README and main.py Signed-off-by: jafraustro --- mnist_hogwild/README.md | 1 + mnist_hogwild/main.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/mnist_hogwild/README.md b/mnist_hogwild/README.md index 5f12161d53..9c254fe8ab 100644 --- a/mnist_hogwild/README.md +++ b/mnist_hogwild/README.md @@ -21,6 +21,7 @@ optional arguments: --log_interval how many batches to wait before logging training status --num_process how many training processes to use (default: 2) --cuda enables CUDA training + --xpu enables CUDA training --dry-run quickly check a single pass --save-model For Saving the current Model ``` diff --git a/mnist_hogwild/main.py b/mnist_hogwild/main.py index 6fa449233d..6c152d4d2a 100644 --- a/mnist_hogwild/main.py +++ b/mnist_hogwild/main.py @@ -29,6 +29,8 @@ help='how many training processes to use (default: 2)') parser.add_argument('--cuda', action='store_true', default=False, help='enables CUDA training') +parser.add_argument('--xpu', action='store_true', default=False, + help='enables XPU training') parser.add_argument('--mps', action='store_true', default=False, help='enables macOS GPU training') parser.add_argument('--save_model', action='store_true', default=False, @@ -59,11 +61,14 @@ def forward(self, x): args = parser.parse_args() use_cuda = args.cuda and torch.cuda.is_available() + use_xpu = args.xpu and torch.xpu.is_available() use_mps = args.mps and torch.backends.mps.is_available() if use_cuda: device = torch.device("cuda") elif use_mps: device = torch.device("mps") + elif use_mps: + device = torch.device("xpu") else: device = torch.device("cpu") @@ -85,6 +90,7 @@ def forward(self, x): torch.manual_seed(args.seed) mp.set_start_method('spawn', force=True) + multiprocessing_context='fork' if torch.backends.xpu.is_available() else None model = Net().to(device) model.share_memory() # gradients are allocated lazily, so they are not shared here From 420bd70b9962a795d5bf24ce063c1a7935db79b4 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 18:10:47 +0000 Subject: [PATCH 6/9] Remove unused multiprocessing context assignment in main.py Signed-off-by: jafraustro --- mnist_hogwild/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mnist_hogwild/main.py b/mnist_hogwild/main.py index 6c152d4d2a..d473c603f1 100644 --- a/mnist_hogwild/main.py +++ b/mnist_hogwild/main.py @@ -90,7 +90,6 @@ def forward(self, x): torch.manual_seed(args.seed) mp.set_start_method('spawn', force=True) - multiprocessing_context='fork' if torch.backends.xpu.is_available() else None model = Net().to(device) model.share_memory() # gradients are allocated lazily, so they are not shared here From 917b9d61e67a07998104acc277bd340e17ea5501 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 18:27:38 +0000 Subject: [PATCH 7/9] Enhance device selection output in main.py for better clarity on active device Signed-off-by: jafraustro --- mnist_hogwild/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mnist_hogwild/main.py b/mnist_hogwild/main.py index d473c603f1..5e08edff9c 100644 --- a/mnist_hogwild/main.py +++ b/mnist_hogwild/main.py @@ -63,11 +63,14 @@ def forward(self, x): use_cuda = args.cuda and torch.cuda.is_available() use_xpu = args.xpu and torch.xpu.is_available() use_mps = args.mps and torch.backends.mps.is_available() + + print(f"Using device: {'cuda' if use_cuda else 'xpu' if use_xpu else 'mps' if use_mps else 'cpu'}") + if use_cuda: device = torch.device("cuda") elif use_mps: device = torch.device("mps") - elif use_mps: + elif use_xpu: device = torch.device("xpu") else: device = torch.device("cpu") @@ -86,7 +89,7 @@ def forward(self, x): kwargs.update({'num_workers': 1, 'pin_memory': True, }) - + torch.manual_seed(args.seed) mp.set_start_method('spawn', force=True) From 5976f6d0e75b33b4986dced1c72f8b3c7eaddfb6 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 18:41:18 +0000 Subject: [PATCH 8/9] Remove XPU training option from README and main.py Signed-off-by: jafraustro --- mnist_hogwild/README.md | 1 - mnist_hogwild/main.py | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/mnist_hogwild/README.md b/mnist_hogwild/README.md index 9c254fe8ab..5f12161d53 100644 --- a/mnist_hogwild/README.md +++ b/mnist_hogwild/README.md @@ -21,7 +21,6 @@ optional arguments: --log_interval how many batches to wait before logging training status --num_process how many training processes to use (default: 2) --cuda enables CUDA training - --xpu enables CUDA training --dry-run quickly check a single pass --save-model For Saving the current Model ``` diff --git a/mnist_hogwild/main.py b/mnist_hogwild/main.py index 5e08edff9c..b969cb8689 100644 --- a/mnist_hogwild/main.py +++ b/mnist_hogwild/main.py @@ -29,8 +29,6 @@ help='how many training processes to use (default: 2)') parser.add_argument('--cuda', action='store_true', default=False, help='enables CUDA training') -parser.add_argument('--xpu', action='store_true', default=False, - help='enables XPU training') parser.add_argument('--mps', action='store_true', default=False, help='enables macOS GPU training') parser.add_argument('--save_model', action='store_true', default=False, @@ -61,17 +59,13 @@ def forward(self, x): args = parser.parse_args() use_cuda = args.cuda and torch.cuda.is_available() - use_xpu = args.xpu and torch.xpu.is_available() use_mps = args.mps and torch.backends.mps.is_available() - - print(f"Using device: {'cuda' if use_cuda else 'xpu' if use_xpu else 'mps' if use_mps else 'cpu'}") if use_cuda: device = torch.device("cuda") elif use_mps: device = torch.device("mps") - elif use_xpu: - device = torch.device("xpu") + else: device = torch.device("cpu") From 8349b91627140285b471bed418b76e84901f7397 Mon Sep 17 00:00:00 2001 From: jafraustro Date: Tue, 14 Jan 2025 19:04:55 +0000 Subject: [PATCH 9/9] Add XPU training support in mnist_rnn README and main.py Signed-off-by: jafraustro --- mnist_hogwild/README.md | 1 - mnist_rnn/README.md | 18 ++++++++++++++++++ mnist_rnn/main.py | 5 +++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/mnist_hogwild/README.md b/mnist_hogwild/README.md index 5f12161d53..6d1a613510 100644 --- a/mnist_hogwild/README.md +++ b/mnist_hogwild/README.md @@ -22,5 +22,4 @@ optional arguments: --num_process how many training processes to use (default: 2) --cuda enables CUDA training --dry-run quickly check a single pass - --save-model For Saving the current Model ``` diff --git a/mnist_rnn/README.md b/mnist_rnn/README.md index c879cb367f..e6cfa15cf9 100644 --- a/mnist_rnn/README.md +++ b/mnist_rnn/README.md @@ -8,3 +8,21 @@ pip install -r requirements.txt python main.py # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` +The main.py script accepts the following arguments: + +```bash +optional arguments: + -h, --help show this help message and exit + --batch_size input batch_size for training (default:64) + --testing_batch_size input batch size for testing (default: 1000) + --epochs EPOCHS number of epochs to train (default: 14) + --lr LR learning rate (default: 0.1) + --gamma learning rate step gamma (default: 0.7) + --cuda enables CUDA training + --xpu enables XPU training + --mps enables macos GPU training + --seed SEED random seed (default: 1) + --save_model For saving the current Model + --log_interval how many batches to wait before logging training status + --dry-run quickly check a single pass +``` \ No newline at end of file diff --git a/mnist_rnn/main.py b/mnist_rnn/main.py index 2fa64c00d6..9ab7c3f80d 100644 --- a/mnist_rnn/main.py +++ b/mnist_rnn/main.py @@ -93,6 +93,8 @@ def main(): help='learning rate step gamma (default: 0.7)') parser.add_argument('--cuda', action='store_true', default=False, help='enables CUDA training') + parser.add_argument('--xpu', action='store_true', default=False, + help='enables XPU training') parser.add_argument('--mps', action="store_true", default=False, help="enables MPS training") parser.add_argument('--dry-run', action='store_true', default=False, @@ -109,6 +111,8 @@ def main(): device = "cuda" elif args.mps and not args.cuda: device = "mps" + elif args.xpu: + device = "xpu" else: device = "cpu" @@ -117,6 +121,7 @@ def main(): torch.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} + train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([