Open
Description
Add Link
https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html
Describe the bug
Follow the tutorial, I write this code, and find that the segmentation fault occur when the tensor(and the GraphModule model) moved to cuda:0
# main.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
# LeNet Model definition
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc1_drop = nn.Dropout()
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.reshape(-1, 320)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def save_quant():
model = Net()
model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
print(model)
model = model.to("cuda:0")
model.eval()
from torchvision import datasets, transforms
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, download=True,
transform=transforms.Compose([ transforms.ToTensor(), ])
),
batch_size=1)
# launch code for Intel® Neural Compressor
from neural_compressor.experimental import Quantization
quantizer = Quantization("./conf.yaml")
quantizer.model = model
quantizer.calib_dataloader = test_loader
quantizer.eval_dataloader = test_loader
q_model = quantizer()
# q_model = q_model.to("cuda:0")
q_model.save('./output')
def good_quant():
from neural_compressor.utils.pytorch import load
model = Net()
int8_model = load('./output', model)
first = np.random.rand(1,1,28,28).astype(np.float32)
first = torch.from_numpy(first)
print(f"when tensor on={first.device}")
x=int8_model(first)
print(x)
def bad_quant():
from neural_compressor.utils.pytorch import load
model = Net()
device = "cuda:0"
int8_model = load('./output', model).to(device)
first = np.random.rand(1,1,28,28).astype(np.float32)
first = torch.from_numpy(first).to(device)
print(f"when tensor on={first.device}")
x=int8_model(first)
print(x)
save_quant()
good_quant()
bad_quant()
and the yaml file:
# conf.yaml
version: 2.0
model:
name: LeNet
framework: pytorch_fx
evaluation:
accuracy:
metric:
topk: 1
tuning:
accuracy_criterion:
relative: 0.01
Expected Result: the quantized model run smoothly and efficiently on GPU
Actual Result: segmentation fault
Segmentation fault (core dumped)
Describe your environment
- Platform(Linux)
- CUDA(11, 12)
- 2.0.1+cu117
cc @ezyang @gchanan @zou3519 @kadeng @frank-wei @jgong5 @mingfeima @XiaobingSuper @sanchitintel @ashokei @jingxu10 @jerryzh168 @jianyuh @raghuramank100 @jamesr66a @vkuzo @Xia-Weiwen @leslie-fang-intel
Metadata
Metadata
Assignees
Labels
Type
Projects
Milestone
Relationships
Development
No branches or pull requests
Activity
colesbury commentedon Nov 6, 2023
Marked as high priority due to segmentation fault
HDCharles commentedon Nov 15, 2023
looks like @ftian1 @holly1238 @yqhu wrote/landed the tutorial, can one of you guys take a look at this?
The pytorch quantization oncall is listed for this issue but the tutorial is for an external repo and isn't actually using any of the pytorch quantization APIs.
malfet commentedon Nov 20, 2023
Transferring back to tutorials, though looks like the crash comes from
neural_compressor
libraryftian1 commentedon Nov 21, 2023
thanks for raising this bug. I am checking it and will fix asap.