Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Is the MIG service free? #16

Open
qingfenghcy opened this issue Mar 12, 2023 · 1 comment
Open

Is the MIG service free? #16

qingfenghcy opened this issue Mar 12, 2023 · 1 comment

Comments

@qingfenghcy
Copy link

thanks

@MehdiTantaoui-99
Copy link

I am using this to know if it's free or not:

def get_nvidia_smi_xml():
    result = subprocess.run(['nvidia-smi', '-q', '-x'], stdout=subprocess.PIPE)
    xml_output = result.stdout.decode('utf-8')
    return xml_output

def parse_nvidia_smi_xml(xml_output):
    root = ET.fromstring(xml_output)
    gpus = []
    for gpu in root.findall('gpu'):
        gpu_info = {}
        gpu_info['id'] = int(gpu.find('minor_number').text)
        gpu_info['name'] = gpu.find('product_name').text.strip()
        total_memory_str = gpu.find('fb_memory_usage/total').text.strip()
        gpu_info['total_memory'] = int(total_memory_str.replace(' MiB', ''))
        mig_mode = gpu.find('mig_mode/current_mig').text.strip()
        gpu_info['supports_mig'] = (mig_mode == 'Enabled')
        gpu_info['mig_devices'] = []

        # Parse MIG devices
        for mig_device in gpu.findall('mig_devices/mig_device'):
            mig_info = {}
            mig_info['index'] = int(mig_device.find('index').text)
            mig_info['gpu_instance_id'] = int(mig_device.find('gpu_instance_id').text)
            mig_info['compute_instance_id'] = int(mig_device.find('compute_instance_id').text)
            mig_info['is_in_use'] = False  # Initialize as not in use

            # Extract memory size of MIG device
            memory_total_elem = mig_device.find('fb_memory_usage/total')
            if memory_total_elem is not None:
                memory_total_str = memory_total_elem.text.strip()
                mig_info['memory'] = int(memory_total_str.replace(' MiB', ''))
            else:
                mig_info['memory'] = 0

            # Extract multiprocessor count
            sm_count_elem = mig_device.find('device_attributes/shared/multiprocessor_count')
            if sm_count_elem is not None:
                mig_info['sm_count'] = int(sm_count_elem.text.strip())
            else:
                mig_info['sm_count'] = 0

            # Infer profile name based on memory and SM count
            mig_info['name'] = infer_profile_name(gpu_info['name'], mig_info['memory'], mig_info['sm_count'])

            gpu_info['mig_devices'].append(mig_info)

        # Parse processes and map them to MIG devices
        for proc in gpu.findall('processes/process_info'):
            gpu_instance_id = int(proc.find('gpu_instance_id').text)
            compute_instance_id = int(proc.find('compute_instance_id').text)
            pid = int(proc.find('pid').text)
            process_name = proc.find('process_name').text
            used_memory_str = proc.find('used_memory').text.strip()
            used_memory = int(used_memory_str.replace(' MiB', ''))

            # Find the corresponding MIG device
            for mig_device in gpu_info['mig_devices']:
                if (mig_device['gpu_instance_id'] == gpu_instance_id and
                    mig_device['compute_instance_id'] == compute_instance_id):
                    mig_device['is_in_use'] = True
                    mig_device['process'] = {
                        'pid': pid,
                        'process_name': process_name,
                        'used_memory': used_memory
                    }

        gpus.append(gpu_info)
    return gpus
xml_output = get_nvidia_smi_xml()
gpus = parse_nvidia_smi_xml(xml_output)
print(f"Parsed GPUs: {gpus}")

Output sample:

Parsed GPUs: [{'id': 0, 'name': 'NVIDIA A30', 'total_memory': 24576, 'supports_mig': True, 'mig_devices': [{'index': 0, 'gpu_instance_id': 3, 'compute_instance_id': 0, 'is_in_use': True, 'memory': 6016, 'sm_count': 14, 'name': '1g.6gb', 'process': {'pid': 1295432, 'process_name': 'tritonserver', 'used_memory': 214}}]}]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants