diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 9e055b5ce03d..682c53245286 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -8,7 +8,6 @@ config DRM_ACCEL_IVPU select FW_LOADER select DRM_GEM_SHMEM_HELPER select GENERIC_ALLOCATOR - select WANT_DEV_COREDUMP help Choose this option if you have a system with an 14th generation Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) @@ -16,12 +15,3 @@ config DRM_ACCEL_IVPU and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. - -config DRM_ACCEL_IVPU_DEBUG - bool "Intel NPU debug mode" - depends on DRM_ACCEL_IVPU - help - Choose this option to enable additional - debug features for the Intel NPU driver: - - Always print debug messages regardless of dyndbg config, - - Enable unsafe module params. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 1029e0bab061..ebd682a42eb1 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -16,14 +16,8 @@ intel_vpu-y := \ ivpu_mmu_context.o \ ivpu_ms.o \ ivpu_pm.o \ - ivpu_sysfs.o \ - ivpu_trace_points.o + ivpu_sysfs.o intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o -intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o - -subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG - -CFLAGS_ivpu_trace_points.o = -I$(src) diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c deleted file mode 100644 index 16ad0c30818c..000000000000 --- a/drivers/accel/ivpu/ivpu_coredump.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2024 Intel Corporation - */ - -#include -#include - -#include "ivpu_coredump.h" -#include "ivpu_fw.h" -#include "ivpu_gem.h" -#include "vpu_boot_api.h" - -#define CRASH_DUMP_HEADER "Intel NPU crash dump" -#define CRASH_DUMP_HEADERS_SIZE SZ_4K - -void ivpu_dev_coredump(struct ivpu_device *vdev) -{ - struct drm_print_iterator pi = {}; - struct drm_printer p; - size_t coredump_size; - char *coredump; - - coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + - ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); - coredump = vmalloc(coredump_size); - if (!coredump) - return; - - pi.data = coredump; - pi.remain = coredump_size; - p = drm_coredump_printer(&pi); - - drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); - drm_printf(&p, "FW version: %s\n", vdev->fw->version); - ivpu_fw_log_print(vdev, false, &p); - - dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); -} diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h deleted file mode 100644 index 8efb09d02441..000000000000 --- a/drivers/accel/ivpu/ivpu_coredump.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2020-2024 Intel Corporation - */ - -#ifndef __IVPU_COREDUMP_H__ -#define __IVPU_COREDUMP_H__ - -#include - -#include "ivpu_drv.h" -#include "ivpu_fw_log.h" - -#ifdef CONFIG_DEV_COREDUMP -void ivpu_dev_coredump(struct ivpu_device *vdev); -#else -static inline void ivpu_dev_coredump(struct ivpu_device *vdev) -{ - struct drm_printer p = drm_info_printer(vdev->drm.dev); - - ivpu_fw_log_print(vdev, false, &p); -} -#endif - -#endif /* __IVPU_COREDUMP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 8180b95ed69d..8d50981594d1 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -45,14 +45,6 @@ static int fw_name_show(struct seq_file *s, void *v) return 0; } -static int fw_version_show(struct seq_file *s, void *v) -{ - struct ivpu_device *vdev = seq_to_ivpu(s); - - seq_printf(s, "%s\n", vdev->fw->version); - return 0; -} - static int fw_trace_capability_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); @@ -127,7 +119,6 @@ static int firewall_irq_counter_show(struct seq_file *s, void *v) static const struct drm_debugfs_info vdev_debugfs_list[] = { {"bo_list", bo_list_show, 0}, {"fw_name", fw_name_show, 0}, - {"fw_version", fw_version_show, 0}, {"fw_trace_capability", fw_trace_capability_show, 0}, {"fw_trace_config", fw_trace_config_show, 0}, {"last_bootmode", last_bootmode_show, 0}, @@ -136,23 +127,32 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = { {"firewall_irq_counter", firewall_irq_counter_show, 0}, }; -static int dvfs_mode_get(void *data, u64 *dvfs_mode) +static ssize_t +dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { - struct ivpu_device *vdev = (struct ivpu_device *)data; + struct ivpu_device *vdev = file->private_data; + struct ivpu_fw_info *fw = vdev->fw; + u32 dvfs_mode; + int ret; - *dvfs_mode = vdev->fw->dvfs_mode; - return 0; -} + ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode); + if (ret < 0) + return ret; -static int dvfs_mode_set(void *data, u64 dvfs_mode) -{ - struct ivpu_device *vdev = (struct ivpu_device *)data; + fw->dvfs_mode = dvfs_mode; - vdev->fw->dvfs_mode = (u32)dvfs_mode; - return pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; + + return size; } -DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n"); +static const struct file_operations dvfs_mode_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dvfs_mode_fops_write, +}; static ssize_t fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) @@ -201,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l if (!size) return -EINVAL; - ivpu_fw_log_mark_read(vdev); + ivpu_fw_log_clear(vdev); return size; } @@ -346,23 +346,49 @@ static const struct file_operations ivpu_force_recovery_fops = { .write = ivpu_force_recovery_fn, }; -static int ivpu_reset_engine_fn(void *data, u64 val) +static ssize_t +ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { - struct ivpu_device *vdev = (struct ivpu_device *)data; + struct ivpu_device *vdev = file->private_data; + + if (!size) + return -EINVAL; - return ivpu_jsm_reset_engine(vdev, (u32)val); + if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) + return -ENODEV; + if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY)) + return -ENODEV; + + return size; } -DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n"); +static const struct file_operations ivpu_reset_engine_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = ivpu_reset_engine_fn, +}; -static int ivpu_resume_engine_fn(void *data, u64 val) +static ssize_t +ivpu_resume_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { - struct ivpu_device *vdev = (struct ivpu_device *)data; + struct ivpu_device *vdev = file->private_data; + + if (!size) + return -EINVAL; - return ivpu_jsm_hws_resume_engine(vdev, (u32)val); + if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) + return -ENODEV; + if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COPY)) + return -ENODEV; + + return size; } -DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n"); +static const struct file_operations ivpu_resume_engine_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = ivpu_resume_engine_fn, +}; static int dct_active_get(void *data, u64 *active_percent) { @@ -406,7 +432,7 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); - debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev, + debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev, &dvfs_mode_fops); debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev, diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 1e8ffbe25eee..cc90466bdb24 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -15,7 +14,7 @@ #include #include -#include "ivpu_coredump.h" +#include "vpu_boot_api.h" #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" @@ -30,10 +29,10 @@ #include "ivpu_ms.h" #include "ivpu_pm.h" #include "ivpu_sysfs.h" -#include "vpu_boot_api.h" #ifndef DRIVER_VERSION_STR -#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE +#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ + __stringify(DRM_IVPU_DRIVER_MINOR) "." #endif static struct lock_class_key submitted_jobs_xa_lock_class_key; @@ -43,10 +42,8 @@ module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; -#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); -#endif u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); @@ -56,9 +53,9 @@ u8 ivpu_pll_max_ratio = U8_MAX; module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency"); -int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO; +int ivpu_sched_mode; module_param_named(sched_mode, ivpu_sched_mode, int, 0444); -MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler"); +MODULE_PARM_DESC(sched_mode, "Scheduler mode: 0 - Default scheduler, 1 - Force HW scheduler"); bool ivpu_disable_mmu_cont_pages; module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444); @@ -106,8 +103,6 @@ static void file_priv_release(struct kref *ref) pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&vdev->context_list_lock); file_priv_unbind(vdev, file_priv); - drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa)); - xa_destroy(&file_priv->cmdq_xa); mutex_unlock(&vdev->context_list_lock); pm_runtime_put_autosuspend(vdev->drm.dev); @@ -121,6 +116,8 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; + drm_WARN_ON(&vdev->drm, !file_priv); + ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -260,13 +257,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id); - file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK; - - xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1); - file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID; - file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID; - mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx); @@ -352,7 +342,7 @@ static int ivpu_hw_sched_init(struct ivpu_device *vdev) { int ret = 0; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_setup_priority_bands(vdev); if (ret) { ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret); @@ -386,7 +376,10 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); - goto err_diagnose_failure; + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); + ivpu_fw_log_dump(vdev); + return ret; } ivpu_hw_irq_clear(vdev); @@ -397,20 +390,12 @@ int ivpu_boot(struct ivpu_device *vdev) if (ivpu_fw_is_cold_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) - goto err_diagnose_failure; + return ret; - ret = ivpu_hw_sched_init(vdev); - if (ret) - goto err_diagnose_failure; + return ivpu_hw_sched_init(vdev); } return 0; - -err_diagnose_failure: - ivpu_hw_diagnose_failure(vdev); - ivpu_mmu_evtq_dump(vdev); - ivpu_dev_coredump(vdev); - return ret; } void ivpu_prepare_for_reset(struct ivpu_device *vdev) @@ -457,8 +442,8 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - - .major = 1, + .major = DRM_IVPU_DRIVER_MAJOR, + .minor = DRM_IVPU_DRIVER_MINOR, }; static void ivpu_context_abort_invalid(struct ivpu_device *vdev) @@ -616,9 +601,6 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); - vdev->db_limit.min = IVPU_MIN_DB; - vdev->db_limit.max = IVPU_MAX_DB; - ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 3fdff3f6cffd..d0f750c5c453 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -21,6 +21,7 @@ #define DRIVER_NAME "intel_vpu" #define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" +#define DRIVER_DATE "20230117" #define PCI_DEVICE_ID_MTL 0x7d1d #define PCI_DEVICE_ID_ARL 0xad1d @@ -46,22 +47,17 @@ #define IVPU_MIN_DB 1 #define IVPU_MAX_DB 255 -#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) -#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) - +#define IVPU_NUM_ENGINES 2 #define IVPU_NUM_PRIORITIES 4 -#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES) +#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_ENGINES * IVPU_NUM_PRIORITIES) -#define IVPU_CMDQ_MIN_ID 1 -#define IVPU_CMDQ_MAX_ID 255 +#define IVPU_CMDQ_INDEX(engine, priority) ((engine) * IVPU_NUM_PRIORITIES + (priority)) #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 #define IVPU_PLATFORM_INVALID 8 -#define IVPU_SCHED_MODE_AUTO -1 - #define IVPU_DBG_REG BIT(0) #define IVPU_DBG_IRQ BIT(1) #define IVPU_DBG_MMU BIT(2) @@ -139,8 +135,6 @@ struct ivpu_device { struct xa_limit context_xa_limit; struct xarray db_xa; - struct xa_limit db_limit; - u32 db_next; struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -159,7 +153,6 @@ struct ivpu_device { int tdr; int autosuspend; int d0i3_entry_msg; - int state_dump_msg; } timeout; }; @@ -171,15 +164,11 @@ struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; struct mutex lock; /* Protects cmdq */ - struct xarray cmdq_xa; + struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX]; struct ivpu_mmu_context ctx; struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ struct list_head ms_instance_list; struct ivpu_bo *ms_info_bo; - struct xa_limit job_limit; - u32 job_id_next; - struct xa_limit cmdq_limit; - u32 cmdq_id_next; bool has_mmu_faults; bool bound; bool aborted; @@ -197,9 +186,9 @@ extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) #define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) #define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) -#define IVPU_TEST_MODE_MIP_DISABLE BIT(6) +#define IVPU_TEST_MODE_PREEMPTION_DISABLE BIT(6) +#define IVPU_TEST_MODE_HWS_EXTRA_EVENTS BIT(7) #define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) -#define IVPU_TEST_MODE_TURBO BIT(9) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 6037ec0b3096..ab7c7f157c60 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -25,6 +25,7 @@ #define FW_SHAVE_NN_MAX_SIZE SZ_2M #define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) #define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) +#define FW_VERSION_HEADER_SIZE SZ_4K #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) #define WATCHDOG_MSS_REDIRECT 32 @@ -46,10 +47,8 @@ #define IVPU_FOCUS_PRESENT_TIMER_MS 1000 static char *ivpu_firmware; -#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/.."); -#endif static struct { int gen; @@ -139,15 +138,6 @@ static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range return true; } -static u32 -ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) -{ - if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) - return ivpu_sched_mode; - - return VPU_SCHEDULING_MODE_OS; -} - static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -204,10 +194,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", fw_hdr->header_version, fw_hdr->image_format); - if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) - ivpu_warn(vdev, "Missing firmware version\n"); - - ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); + ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, + (const char *)fw_hdr + VPU_FW_HEADER_SIZE); if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) return -EINVAL; @@ -223,16 +211,14 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->cold_boot_entry_point = fw_hdr->entry_point; fw->entry_point = fw->cold_boot_entry_point; - fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); + fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL); fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; fw->trace_hw_component_mask = -1; fw->dvfs_mode = 0; - fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; - ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, @@ -328,7 +314,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) goto err_free_fw_mem; } - if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO) + if (ivpu_log_level <= IVPU_FW_LOG_INFO) log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE; else log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE; @@ -623,8 +609,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev); boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev); - boot_params->vpu_scheduling_mode = vdev->fw->sched_mode; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + boot_params->vpu_scheduling_mode = vdev->hw->sched_mode; + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS; boot_params->dvfs_mode = vdev->fw->dvfs_mode; if (!IVPU_WA(disable_d0i3_msg)) diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 1d0b2bd9d65c..40d9d17be3f5 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -1,16 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ -#include "vpu_jsm_api.h" - -#define FW_VERSION_HEADER_SIZE SZ_4K -#define FW_VERSION_STR_SIZE SZ_256 - struct ivpu_device; struct ivpu_bo; struct vpu_boot_params; @@ -18,7 +13,6 @@ struct vpu_boot_params; struct ivpu_fw_info { const struct firmware *file; const char *name; - char version[FW_VERSION_STR_SIZE]; struct ivpu_bo *mem; struct ivpu_bo *mem_shave_nn; struct ivpu_bo *mem_log_crit; @@ -38,7 +32,6 @@ struct ivpu_fw_info { u32 secondary_preempt_buf_size; u64 read_only_addr; u32 read_only_size; - u32 sched_mode; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c index 337c906b0210..ef0adb5e0fbe 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.c +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include @@ -15,19 +15,19 @@ #include "ivpu_fw_log.h" #include "ivpu_gem.h" -#define IVPU_FW_LOG_LINE_LENGTH 256 +#define IVPU_FW_LOG_LINE_LENGTH 256 -unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR; -module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444); -MODULE_PARM_DESC(fw_log_level, - "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG) +unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR; +module_param(ivpu_log_level, uint, 0444); +MODULE_PARM_DESC(ivpu_log_level, + "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG) " info=" __stringify(IVPU_FW_LOG_INFO) " warn=" __stringify(IVPU_FW_LOG_WARN) " error=" __stringify(IVPU_FW_LOG_ERROR) " fatal=" __stringify(IVPU_FW_LOG_FATAL)); -static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, - struct vpu_tracing_buffer_header **out_log) +static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, + struct vpu_tracing_buffer_header **log_header) { struct vpu_tracing_buffer_header *log; @@ -48,7 +48,7 @@ static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *off return -EINVAL; } - *out_log = log; + *log_header = log; *offset += log->size; ivpu_dbg(vdev, FW_BOOT, @@ -59,7 +59,7 @@ static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *off return 0; } -static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) +static void buffer_print(char *buffer, u32 size, struct drm_printer *p) { char line[IVPU_FW_LOG_LINE_LENGTH]; u32 index = 0; @@ -87,89 +87,56 @@ static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) } line[index] = 0; if (index != 0) - drm_printf(p, "%s", line); + drm_printf(p, "%s\n", line); } -static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix, - bool only_new_msgs, struct drm_printer *p) +static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log, + const char *prefix, bool only_new_msgs, struct drm_printer *p) { - char *log_data = (void *)log + log->header_size; - u32 data_size = log->size - log->header_size; - u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0; - u32 log_end = READ_ONCE(log->write_index); - - if (log->wrap_count == log->read_wrap_count) { - if (log_end <= log_start) { - drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); - return; - } - } else if (log->wrap_count == log->read_wrap_count + 1) { - if (log_end > log_start) - log_start = log_end; - } else { - log_start = log_end; + char *log_buffer = (void *)log + log->header_size; + u32 log_size = log->size - log->header_size; + u32 log_start = log->read_index; + u32 log_end = log->write_index; + + if (!(log->write_index || log->wrap_count) || + (log->write_index == log->read_index && only_new_msgs)) { + drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); + return; } drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name); - if (log_end > log_start) { - fw_log_print_lines(log_data + log_start, log_end - log_start, p); + if (log->write_index > log->read_index) { + buffer_print(log_buffer + log_start, log_end - log_start, p); } else { - fw_log_print_lines(log_data + log_start, data_size - log_start, p); - fw_log_print_lines(log_data, log_end, p); + buffer_print(log_buffer + log_end, log_size - log_end, p); + buffer_print(log_buffer, log_end, p); } - drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */ + drm_printf(p, "\x1b[0m"); drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name); } -static void -fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name, - struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p) -{ - struct vpu_tracing_buffer_header *log; - u32 next = 0; - - while (fw_log_from_bo(vdev, bo, &next, &log) == 0) - fw_log_print_buffer(log, name, only_new_msgs, p); -} - void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) { - fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p); - fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p); -} - -void ivpu_fw_log_mark_read(struct ivpu_device *vdev) -{ - struct vpu_tracing_buffer_header *log; - u32 next; + struct vpu_tracing_buffer_header *log_header; + u32 next = 0; - next = 0; - while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { - log->read_index = READ_ONCE(log->write_index); - log->read_wrap_count = READ_ONCE(log->wrap_count); - } + while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) + fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p); next = 0; - while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { - log->read_index = READ_ONCE(log->write_index); - log->read_wrap_count = READ_ONCE(log->wrap_count); - } + while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) + fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p); } -void ivpu_fw_log_reset(struct ivpu_device *vdev) +void ivpu_fw_log_clear(struct ivpu_device *vdev) { - struct vpu_tracing_buffer_header *log; - u32 next; + struct vpu_tracing_buffer_header *log_header; + u32 next = 0; - next = 0; - while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { - log->read_index = 0; - log->read_wrap_count = 0; - } + while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) + log_header->read_index = log_header->write_index; next = 0; - while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { - log->read_index = 0; - log->read_wrap_count = 0; - } + while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) + log_header->read_index = log_header->write_index; } diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h index 8bb528a73cb7..0b2573f6f315 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.h +++ b/drivers/accel/ivpu/ivpu_fw_log.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #ifndef __IVPU_FW_LOG_H__ @@ -8,6 +8,8 @@ #include +#include + #include "ivpu_drv.h" #define IVPU_FW_LOG_DEFAULT 0 @@ -17,15 +19,20 @@ #define IVPU_FW_LOG_ERROR 4 #define IVPU_FW_LOG_FATAL 5 +extern unsigned int ivpu_log_level; + #define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M #define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M #define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K -extern unsigned int ivpu_fw_log_level; - void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); -void ivpu_fw_log_mark_read(struct ivpu_device *vdev); -void ivpu_fw_log_reset(struct ivpu_device *vdev); +void ivpu_fw_log_clear(struct ivpu_device *vdev); + +static inline void ivpu_fw_log_dump(struct ivpu_device *vdev) +{ + struct drm_printer p = drm_info_printer(vdev->drm.dev); + ivpu_fw_log_print(vdev, false, &p); +} #endif /* __IVPU_FW_LOG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 16178054e629..c8daffd90f30 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -384,9 +384,6 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); - /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */ - timeout += 1; - obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -EINVAL; diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index 4e1054f3466e..ac095fa00973 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -89,14 +89,12 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.tdr = 2000000; vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = 500; - vdev->timeout.state_dump_msg = 10; } else if (ivpu_is_simics(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 500; vdev->timeout.tdr = 10000; - vdev->timeout.autosuspend = 100; + vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = 100; - vdev->timeout.state_dump_msg = 10; } else { vdev->timeout.boot = 1000; vdev->timeout.jsm = 500; @@ -106,7 +104,6 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; } } diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index fc4dbfc980c8..a96a05b2acda 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -46,6 +46,7 @@ struct ivpu_hw_info { u32 profiling_freq; } pll; u32 tile_fuse; + u32 sched_mode; u32 sku; u16 config; int dma_bits; diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 3212c99f3682..f3a0312f4e7f 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -157,6 +157,7 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + hw->sched_mode = ivpu_sched_mode; return 0; } @@ -171,6 +172,7 @@ static int info_init_lnl(struct ivpu_device *vdev) if (ret) return ret; + hw->sched_mode = ivpu_sched_mode; hw->tile_fuse = tile_fuse_config; hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; @@ -307,6 +309,10 @@ static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0; wp->epp = enable ? PLL_EPP_DEFAULT : 0; } + + /* Simics cannot start without at least one tile */ + if (enable && ivpu_is_simics(vdev)) + wp->cfg = 1; } static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable) @@ -453,6 +459,9 @@ int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev) if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) return 0; + if (ivpu_is_simics(vdev)) + return 0; + return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); } diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 029dd065614b..bd2582a8c80f 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -303,6 +303,9 @@ static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable) val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); + + if (enable) + ndelay(500); } static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) @@ -315,6 +318,9 @@ static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); + + if (!enable) + ndelay(500); } static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable) @@ -333,11 +339,9 @@ static void pwr_island_enable(struct ivpu_device *vdev) { if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { pwr_island_trickle_drive_37xx(vdev, true); - ndelay(500); pwr_island_drive_37xx(vdev, true); } else { pwr_island_trickle_drive_40xx(vdev, true); - ndelay(500); pwr_island_drive_40xx(vdev, true); } } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 01ebf88fe6ef..29b723039a34 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -15,7 +15,6 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" -#include "ivpu_trace.h" #define IPC_MAX_RX_MSG 128 @@ -228,7 +227,6 @@ int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, stru goto unlock; ivpu_ipc_tx(vdev, cons->tx_vpu_addr); - trace_jsm("[tx]", req); unlock: mutex_unlock(&ipc->lock); @@ -280,13 +278,12 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { - ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } if (jsm_msg) memcpy(jsm_msg, rx_msg->jsm_msg, size); - trace_jsm("[rx]", rx_msg->jsm_msg); } ivpu_ipc_rx_msg_del(vdev, rx_msg); @@ -356,32 +353,6 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, return ret; } -int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - u32 channel, unsigned long timeout_ms) -{ - struct ivpu_ipc_consumer cons; - int ret; - - ret = ivpu_rpm_get(vdev); - if (ret < 0) - return ret; - - ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); - - ret = ivpu_ipc_send(vdev, &cons, req); - if (ret) { - ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); - goto consumer_del; - } - - msleep(timeout_ms); - -consumer_del: - ivpu_ipc_consumer_del(vdev, &cons); - ivpu_rpm_put(vdev); - return ret; -} - static bool ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) @@ -536,6 +507,7 @@ void ivpu_ipc_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; + drm_WARN_ON(&vdev->drm, ipc->on); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index b4dfb504679b..fb4de7fb8210 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -107,7 +107,5 @@ int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); -int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 7149312f16e1..be2e2bf0f43f 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -18,10 +18,11 @@ #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" -#include "ivpu_trace.h" #include "vpu_boot_api.h" #define CMD_BUF_IDX 0 +#define JOB_ID_JOB_MASK GENMASK(7, 0) +#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) @@ -34,20 +35,24 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, { u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE); u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE); + struct ivpu_addr_range range; - if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW || - ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE) + if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW) return 0; - cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, - primary_size, DRM_IVPU_BO_WC); + range.start = vdev->hw->ranges.user.end - (primary_size * IVPU_NUM_CMDQS_PER_CTX); + range.end = vdev->hw->ranges.user.end; + cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, primary_size, + DRM_IVPU_BO_WC); if (!cmdq->primary_preempt_buf) { ivpu_err(vdev, "Failed to create primary preemption buffer\n"); return -ENOMEM; } - cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, - secondary_size, DRM_IVPU_BO_WC); + range.start = vdev->hw->ranges.shave.end - (secondary_size * IVPU_NUM_CMDQS_PER_CTX); + range.end = vdev->hw->ranges.shave.end; + cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, secondary_size, + DRM_IVPU_BO_WC); if (!cmdq->secondary_preempt_buf) { ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); goto err_free_primary; @@ -57,24 +62,24 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, err_free_primary: ivpu_bo_free(cmdq->primary_preempt_buf); - cmdq->primary_preempt_buf = NULL; return -ENOMEM; } static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW) return; - if (cmdq->primary_preempt_buf) - ivpu_bo_free(cmdq->primary_preempt_buf); - if (cmdq->secondary_preempt_buf) - ivpu_bo_free(cmdq->secondary_preempt_buf); + drm_WARN_ON(&vdev->drm, !cmdq->primary_preempt_buf); + drm_WARN_ON(&vdev->drm, !cmdq->secondary_preempt_buf); + ivpu_bo_free(cmdq->primary_preempt_buf); + ivpu_bo_free(cmdq->secondary_preempt_buf); } static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { + struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB}; struct ivpu_device *vdev = file_priv->vdev; struct ivpu_cmdq *cmdq; int ret; @@ -83,33 +88,25 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) if (!cmdq) return NULL; - ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, - GFP_KERNEL); - if (ret < 0) { + ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL); + if (ret) { ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); goto err_free_cmdq; } - ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, - &file_priv->cmdq_id_next, GFP_KERNEL); - if (ret < 0) { - ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret); - goto err_erase_db_xa; - } - cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!cmdq->mem) - goto err_erase_cmdq_xa; + goto err_erase_xa; ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); if (ret) - ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n"); + goto err_free_cmdq_mem; return cmdq; -err_erase_cmdq_xa: - xa_erase(&file_priv->cmdq_xa, cmdq->id); -err_erase_db_xa: +err_free_cmdq_mem: + ivpu_bo_free(cmdq->mem); +err_erase_xa: xa_erase(&vdev->db_xa, cmdq->db_id); err_free_cmdq: kfree(cmdq); @@ -133,13 +130,13 @@ static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq struct ivpu_device *vdev = file_priv->vdev; int ret; - ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, + ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->db_id, task_pid_nr(current), engine, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (ret) return ret; - ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, + ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->db_id, priority); if (ret) return ret; @@ -152,22 +149,21 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq * struct ivpu_device *vdev = file_priv->vdev; int ret; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) - ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) + ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->db_id, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); else ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (!ret) - ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n", - cmdq->db_id, cmdq->id, file_priv->ctx.id); + ivpu_dbg(vdev, JOB, "DB %d registered to ctx %d\n", cmdq->db_id, file_priv->ctx.id); return ret; } static int -ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority) +ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, u8 priority) { struct ivpu_device *vdev = file_priv->vdev; struct vpu_job_queue_header *jobq_header; @@ -183,18 +179,13 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 prio cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = VPU_ENGINE_COMPUTE; + jobq_header->engine_idx = engine; jobq_header->head = 0; jobq_header->tail = 0; - if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { - ivpu_dbg(vdev, JOB, "Turbo mode enabled"); - jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; - } - wmb(); /* Flush WC buffer for jobq->header */ - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { - ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority); + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_hws_cmdq_init(file_priv, cmdq, engine, priority); if (ret) return ret; } @@ -220,10 +211,10 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm cmdq->db_registered = false; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { - ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->db_id); if (!ret) - ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id); + ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->db_id); } ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); @@ -233,46 +224,55 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm return 0; } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority) +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine, + u8 priority) { - struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; + int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); + struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; int ret; lockdep_assert_held(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - if (cmdq->priority == priority) - break; - if (!cmdq) { cmdq = ivpu_cmdq_alloc(file_priv); if (!cmdq) return NULL; - cmdq->priority = priority; + file_priv->cmdq[cmdq_idx] = cmdq; } - ret = ivpu_cmdq_init(file_priv, cmdq, priority); + ret = ivpu_cmdq_init(file_priv, cmdq, engine, priority); if (ret) return NULL; return cmdq; } -void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) +static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine, u8 priority) { - struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; + int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); + struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; lockdep_assert_held(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { - xa_erase(&file_priv->cmdq_xa, cmdq_id); + if (cmdq) { + file_priv->cmdq[cmdq_idx] = NULL; ivpu_cmdq_fini(file_priv, cmdq); ivpu_cmdq_free(file_priv, cmdq); } } +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) +{ + u16 engine; + u8 priority; + + lockdep_assert_held(&file_priv->lock); + + for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) + for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) + ivpu_cmdq_release_locked(file_priv, engine, priority); +} + /* * Mark the doorbell as unregistered * This function needs to be called when the VPU hardware is restarted @@ -281,13 +281,20 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) */ static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) { - struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; + u16 engine; + u8 priority; mutex_lock(&file_priv->lock); - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - cmdq->db_registered = false; + for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) { + for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) { + int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); + struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; + + if (cmdq) + cmdq->db_registered = false; + } + } mutex_unlock(&file_priv->lock); } @@ -307,11 +314,17 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv) { - struct ivpu_cmdq *cmdq; - unsigned long cmdq_id; + u16 engine; + u8 priority; - xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) - ivpu_cmdq_fini(file_priv, cmdq); + for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) { + for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) { + int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); + + if (file_priv->cmdq[cmdq_idx]) + ivpu_cmdq_fini(file_priv, file_priv->cmdq[cmdq_idx]); + } + } } void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) @@ -322,7 +335,7 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) ivpu_cmdq_fini_all(file_priv); - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_OS) ivpu_jsm_context_release(vdev, file_priv->ctx.id); } @@ -336,29 +349,24 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) /* Check if there is space left in job queue */ if (next_entry == header->head) { - ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", - job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); return -EBUSY; } - entry = &cmdq->jobq->slot[tail].job; + entry = &cmdq->jobq->job[tail]; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; - if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { - if (cmdq->primary_preempt_buf) { - entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; - entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); - } - - if (cmdq->secondary_preempt_buf) { - entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; - entry->secondary_preempt_buf_size = - ivpu_bo_size(cmdq->secondary_preempt_buf); - } + if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW && + (unlikely(!(ivpu_test_mode & IVPU_TEST_MODE_PREEMPTION_DISABLE)))) { + entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; + entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); + entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; + entry->secondary_preempt_buf_size = ivpu_bo_size(cmdq->secondary_preempt_buf); } wmb(); /* Ensure that tail is updated after filling entry */ @@ -449,7 +457,6 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); - trace_job("create", job); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); return job; @@ -489,7 +496,6 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job->bos[CMD_BUF_IDX]->job_status = job_status; dma_fence_signal(job->done_fence); - trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); @@ -513,6 +519,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; + struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; bool is_first_job; int ret; @@ -523,7 +530,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(file_priv, priority); + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx, priority); if (!cmdq) { ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", file_priv->ctx.id, job->engine_idx, priority); @@ -531,11 +538,13 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) goto err_unlock_file_priv; } + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; + xa_lock(&vdev->submitted_jobs_xa); is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, - &file_priv->job_id_next, GFP_KERNEL); - if (ret < 0) { + ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + if (ret) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; @@ -557,7 +566,6 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) vdev->busy_start_ts = ktime_get(); } - trace_job("submit", job); ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); @@ -665,7 +673,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) int idx, ret; u8 priority; - if (params->engine != DRM_IVPU_ENGINE_COMPUTE) + if (params->engine > DRM_IVPU_ENGINE_COPY) return -EINVAL; if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 8b19e3f8b4cf..6accb94028c7 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -28,10 +28,8 @@ struct ivpu_cmdq { struct ivpu_bo *secondary_preempt_buf; struct ivpu_bo *mem; u32 entry_count; - u32 id; u32 db_id; bool db_registered; - u8 priority; }; /** diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 30a40be76930..88105963c1b2 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -48,10 +48,9 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); - IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); - IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); @@ -132,7 +131,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) struct vpu_jsm_msg resp; int ret; - if (engine != VPU_ENGINE_COMPUTE) + if (engine > VPU_ENGINE_COPY) return -EINVAL; req.payload.query_engine_hb.engine_idx = engine; @@ -155,7 +154,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine != VPU_ENGINE_COMPUTE) + if (engine > VPU_ENGINE_COPY) return -EINVAL; req.payload.engine_reset.engine_idx = engine; @@ -174,7 +173,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id struct vpu_jsm_msg resp; int ret; - if (engine != VPU_ENGINE_COMPUTE) + if (engine > VPU_ENGINE_COPY) return -EINVAL; req.payload.engine_preempt.engine_idx = engine; @@ -197,7 +196,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN); ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, - VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm); + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); @@ -346,7 +345,7 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine != VPU_ENGINE_COMPUTE) + if (engine >= VPU_ENGINE_NB) return -EINVAL; req.payload.hws_resume_engine.engine_idx = engine; @@ -394,6 +393,8 @@ int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u3 req.payload.hws_set_scheduling_log.host_ssid = host_ssid; req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va; req.payload.hws_set_scheduling_log.notify_index = 0; + req.payload.hws_set_scheduling_log.enable_extra_events = + ivpu_test_mode & IVPU_TEST_MODE_HWS_EXTRA_EVENTS; ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); @@ -554,11 +555,3 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev) return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); } - -int ivpu_jsm_state_dump(struct ivpu_device *vdev) -{ - struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; - - return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, - vdev->timeout.state_dump_msg); -} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index 9e84d3526a14..e4e42c0ff6e6 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -43,6 +43,4 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas u64 buffer_size, u32 *sample_size, u64 *info_size); int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); int ivpu_jsm_dct_disable(struct ivpu_device *vdev); -int ivpu_jsm_state_dump(struct ivpu_device *vdev); - #endif diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 0af614dfb6f9..891967a95bc3 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -612,22 +612,18 @@ int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) { ivpu_err(vdev, "Failed to allocate root page table for reserved context\n"); ret = -ENOMEM; - goto err_ctx_fini; + goto unlock; } ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable); if (ret) { ivpu_err(vdev, "Failed to set context descriptor for reserved context\n"); - goto err_ctx_fini; + goto unlock; } +unlock: mutex_unlock(&vdev->rctx.lock); return ret; - -err_ctx_fini: - mutex_unlock(&vdev->rctx.lock); - ivpu_mmu_context_fini(vdev, &vdev->rctx); - return ret; } void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index ffe7b10f8a76..2f9d37f5c208 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -11,7 +11,7 @@ #include "ivpu_ms.h" #include "ivpu_pm.h" -#define MS_INFO_BUFFER_SIZE SZ_64K +#define MS_INFO_BUFFER_SIZE SZ_16K #define MS_NUM_BUFFERS 2 #define MS_READ_PERIOD_MULTIPLIER 2 #define MS_MIN_SAMPLE_PERIOD_NS 1000000 diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 87d7411ae059..10b7ae0f866c 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -9,25 +9,21 @@ #include #include -#include "ivpu_coredump.h" +#include "vpu_boot_api.h" #include "ivpu_drv.h" +#include "ivpu_hw.h" #include "ivpu_fw.h" #include "ivpu_fw_log.h" -#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_ms.h" #include "ivpu_pm.h" -#include "ivpu_trace.h" -#include "vpu_boot_api.h" static bool ivpu_disable_recovery; -#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected"); -#endif static unsigned long ivpu_tdr_timeout_ms; module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); @@ -41,7 +37,6 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_cmdq_reset_all_contexts(vdev); ivpu_ipc_reset(vdev); - ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; } @@ -78,8 +73,8 @@ static int ivpu_resume(struct ivpu_device *vdev) int ret; retry: - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); pci_restore_state(to_pci_dev(vdev->drm.dev)); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); ret = ivpu_hw_power_up(vdev); if (ret) { @@ -128,8 +123,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work) if (ret) ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - ivpu_jsm_state_dump(vdev); - ivpu_dev_coredump(vdev); + ivpu_fw_log_dump(vdev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); @@ -201,7 +195,6 @@ int ivpu_pm_suspend_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); unsigned long timeout; - trace_pm("suspend"); ivpu_dbg(vdev, PM, "Suspend..\n"); timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); @@ -219,7 +212,6 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_pm_prepare_warm_boot(vdev); ivpu_dbg(vdev, PM, "Suspend done.\n"); - trace_pm("suspend done"); return 0; } @@ -230,7 +222,6 @@ int ivpu_pm_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; - trace_pm("resume"); ivpu_dbg(vdev, PM, "Resume..\n"); ret = ivpu_resume(vdev); @@ -238,7 +229,6 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to resume: %d\n", ret); ivpu_dbg(vdev, PM, "Resume done.\n"); - trace_pm("resume done"); return ret; } @@ -253,7 +243,6 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); - trace_pm("runtime suspend"); ivpu_dbg(vdev, PM, "Runtime suspend..\n"); ivpu_mmu_disable(vdev); @@ -273,14 +262,13 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) if (!is_idle || ret_d0i3) { ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); atomic_inc(&vdev->pm->reset_counter); - ivpu_dev_coredump(vdev); + ivpu_fw_log_dump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); } ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); - trace_pm("runtime suspend done"); return 0; } @@ -291,7 +279,6 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; - trace_pm("runtime resume"); ivpu_dbg(vdev, PM, "Runtime resume..\n"); ret = ivpu_resume(vdev); @@ -299,7 +286,6 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); ivpu_dbg(vdev, PM, "Runtime resume done.\n"); - trace_pm("runtime resume done"); return ret; } @@ -425,7 +411,7 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { - ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); + ivpu_err_ratelimited(vdev, "Filed to enable DCT: %d\n", ret); return ret; } @@ -442,7 +428,7 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) ret = ivpu_jsm_dct_disable(vdev); if (ret) { - ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); + ivpu_err_ratelimited(vdev, "Filed to disable DCT: %d\n", ret); return ret; } diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 616477fc17fa..913669f1786e 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -6,8 +6,6 @@ #include #include -#include "ivpu_drv.h" -#include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_sysfs.h" @@ -41,30 +39,8 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b static DEVICE_ATTR_RO(npu_busy_time_us); -/** - * DOC: sched_mode - * - * The sched_mode is used to report current NPU scheduling mode. - * - * It returns following strings: - * - "HW" - Hardware Scheduler mode - * - "OS" - Operating System Scheduler mode - * - */ -static ssize_t -sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct ivpu_device *vdev = to_ivpu_device(drm); - - return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS"); -} - -static DEVICE_ATTR_RO(sched_mode); - static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, - &dev_attr_sched_mode.attr, NULL, }; diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h deleted file mode 100644 index eb792038e701..000000000000 --- a/drivers/accel/ivpu/ivpu_trace.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2020-2024 Intel Corporation - */ - -#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) -#define __IVPU_TRACE_H__ - -#include -#include "ivpu_drv.h" -#include "ivpu_job.h" -#include "vpu_jsm_api.h" -#include "ivpu_jsm_msg.h" -#include "ivpu_ipc.h" - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM vpu -#define TRACE_INCLUDE_FILE ivpu_trace - -TRACE_EVENT(pm, - TP_PROTO(const char *event), - TP_ARGS(event), - TP_STRUCT__entry(__field(const char *, event)), - TP_fast_assign(__entry->event = event;), - TP_printk("%s", __entry->event) -); - -TRACE_EVENT(job, - TP_PROTO(const char *event, struct ivpu_job *job), - TP_ARGS(event, job), - TP_STRUCT__entry(__field(const char *, event) - __field(u32, ctx_id) - __field(u32, engine_id) - __field(u32, job_id) - ), - TP_fast_assign(__entry->event = event; - __entry->ctx_id = job->file_priv->ctx.id; - __entry->engine_id = job->engine_idx; - __entry->job_id = job->job_id;), - TP_printk("%s context:%d engine:%d job:%d", - __entry->event, - __entry->ctx_id, - __entry->engine_id, - __entry->job_id) -); - -TRACE_EVENT(jsm, - TP_PROTO(const char *event, struct vpu_jsm_msg *msg), - TP_ARGS(event, msg), - TP_STRUCT__entry(__field(const char *, event) - __field(const char *, type) - __field(enum vpu_ipc_msg_status, status) - __field(u32, request_id) - __field(u32, result) - ), - TP_fast_assign(__entry->event = event; - __entry->type = ivpu_jsm_msg_type_to_str(msg->type); - __entry->status = msg->status; - __entry->request_id = msg->request_id; - __entry->result = msg->result;), - TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x", - __entry->event, - __entry->type, - __entry->status, - __entry->request_id, - __entry->result) -); - -#endif /* __IVPU_TRACE_H__ */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#include diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c deleted file mode 100644 index f8fb99de0de3..000000000000 --- a/drivers/accel/ivpu/ivpu_trace_points.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020-2024 Intel Corporation - */ - -#ifndef __CHECKER__ -#define CREATE_TRACE_POINTS -#include "ivpu_trace.h" -#endif diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 908e68ea1c39..82954b91b748 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (c) 2020-2024, Intel Corporation. + * Copyright (c) 2020-2023, Intel Corporation. */ #ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H /* - * The below values will be used to construct the version info this way: + * =========== FW API version information beginning ================ + * The bellow values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes @@ -26,18 +27,19 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 26 +#define VPU_BOOT_API_VER_MINOR 24 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 3 +#define VPU_BOOT_API_VER_PATCH 0 /* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 +/* ------------ FW API version information end ---------------------*/ #pragma pack(push, 4) @@ -162,6 +164,8 @@ enum vpu_trace_destination { /* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ #define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT #define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 +#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST +#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST struct vpu_boot_l2_cache_config { u8 use; @@ -195,17 +199,6 @@ struct vpu_warm_boot_section { */ #define POWER_PROFILE_SURVIVABILITY 0x1 -/** - * Enum for dvfs_mode boot param. - */ -enum vpu_governor { - VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ - VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ - VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ - VPU_GOV_POWER_SAVE = 3, /* Power save governor */ - VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ -}; - struct vpu_boot_params { u32 magic; u32 vpu_id; @@ -308,14 +301,7 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - /** - * DVFS Mode: - * 0 - Default, DVFS mode selected by the firmware - * 1 - Max Performance - * 2 - On Demand - * 3 - Power Save - * 4 - On Demand Priority Aware - */ + /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ u32 dvfs_mode; /** * Depending on DVFS Mode: @@ -346,8 +332,8 @@ struct vpu_boot_params { u64 d0i3_entry_vpu_ts; /* * The system time of the host operating system in microseconds. - * E.g the number of microseconds since 1st of January 1970, or whatever - * date the host operating system uses to maintain system time. + * E.g the number of microseconds since 1st of January 1970, or whatever date the + * host operating system uses to maintain system time. * This value will be used to track system time on the VPU. * The KMD is required to update this value on every VPU reset. */ @@ -396,7 +382,10 @@ struct vpu_boot_params { u32 pad6[734]; }; -/* Magic numbers set between host and vpu to detect corruption of tracing init */ +/* + * Magic numbers set between host and vpu to detect corruptio of tracing init + */ + #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) /* Tracing buffer message format definitions */ @@ -416,9 +405,7 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - /* keeps track of wrapping on the reader side */ - u32 read_wrap_count; - u32 pad_to_cache_line_size_0[13]; + u32 pad_to_cache_line_size_0[14]; /* End of first cache line */ /** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7215c144158c..33f462b1a25d 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 25 +#define VPU_JSM_API_VER_MINOR 16 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -36,7 +36,7 @@ /* * Number of Priority Bands for Hardware Scheduling - * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) + * Bands: RealTime, Focus, Normal, Idle */ #define VPU_HWS_NUM_PRIORITY_BANDS 4 @@ -74,7 +74,6 @@ #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU /* Job status returned when the job was preempted mid-inference */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU -#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU /* * Host <-> VPU IPC channels. @@ -87,58 +86,18 @@ /* * Job flags bit masks. */ -enum { - /* - * Null submission mask. - * When set, batch buffer's commands are not processed but returned as - * successful immediately, except fences and timestamps. - * When cleared, batch buffer's commands are processed normally. - * Used for testing and profiling purposes. - */ - VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), - /* - * Inline command mask. - * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). - * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). - */ - VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), - /* - * VPU private data mask. - * Reserved for the VPU to store private data about the job (or inline command) - * while being processed. - */ - VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U -}; +#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 +#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 /* - * Job queue flags bit masks. + * Sizes of the reserved areas in jobs, in bytes. */ -enum { - /* - * No job done notification mask. - * When set, indicates that no job done notification should be sent for any - * job from this queue. When cleared, indicates that job done notification - * should be sent for every job completed from this queue. - */ - VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), - /* - * Native fence usage mask. - * When set, indicates that job queue uses native fences (as inline commands - * in job queue). Such queues may also use legacy fences (as commands in batch buffers). - * When cleared, indicates the job queue only uses legacy fences. - * NOTE: For queues using native fences, VPU expects that all jobs in the queue - * are immediately followed by an inline command object. This object is expected - * to be a fence signal command in most cases, but can also be a NOP in case the host - * does not need per-job fence signalling. Other inline commands objects can be - * inserted between "job and inline command" pairs. - */ - VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), +#define VPU_JOB_RESERVED_BYTES 8 - /* - * Enable turbo mode for testing NPU performance; not recommended for regular usage. - */ - VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U) -}; +/* + * Sizes of the reserved areas in job queues, in bytes. + */ +#define VPU_JOB_QUEUE_RESERVED_BYTES 52 /* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -181,113 +140,24 @@ enum { */ #define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL -/* - * Inline commands types. - */ -/* - * NOP. - * VPU does nothing other than consuming the inline command object. - */ -#define VPU_INLINE_CMD_TYPE_NOP 0x0 -/* - * Fence wait. - * VPU waits for the fence current value to reach monitored value. - * Fence wait operations are executed upon job dispatching. While waiting for - * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. - * Jobs present in the queue prior to the fence wait object may be processed - * concurrently. - */ -#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 -/* - * Fence signal. - * VPU sets the fence current value to the provided value. If new current value - * is equal to or higher than monitored value, VPU sends fence signalled notification - * to the host. Fence signal operations are executed upon completion of all the jobs - * present in the queue prior to them, and in-order relative to each other in the queue. - * But jobs in-between them may be processed concurrently and may complete out-of-order. - */ -#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 - -/* - * Job scheduling priority bands for both hardware scheduling and OS scheduling. - */ -enum vpu_job_scheduling_priority_band { - VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, - VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, - VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, - VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, - VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, -}; - /* * Job format. - * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - /**< Address of VPU commands batch buffer */ - u64 batch_buf_addr; - /**< Job ID */ - u32 job_id; - /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u32 flags; - /** - * Doorbell ring timestamp taken by KMD from SoC's global system clock, in - * microseconds. NPU can convert this value to its own fixed clock's timebase, - * to match other profiling timestamps. - */ - u64 doorbell_timestamp; - /**< Extra id for job tracking, used only in the firmware perf traces */ - u64 host_tracking_id; - /**< Address of the primary preemption buffer to use for this job */ + u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ + u32 job_id; /**< Job ID */ + u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u64 root_page_table_addr; /**< Address of root page table to use for this job */ + u64 root_page_table_update_counter; /**< Page tables update events counter */ u64 primary_preempt_buf_addr; - /**< Size of the primary preemption buffer to use for this job */ + /**< Address of the primary preemption buffer to use for this job */ u32 primary_preempt_buf_size; - /**< Size of secondary preemption buffer to use for this job */ + /**< Size of the primary preemption buffer to use for this job */ u32 secondary_preempt_buf_size; - /**< Address of secondary preemption buffer to use for this job */ + /**< Size of secondary preemption buffer to use for this job */ u64 secondary_preempt_buf_addr; - u64 reserved_0; -}; - -/* - * Inline command format. - * Inline commands are the commands executed at scheduler level (typically, - * synchronization directives). Inline command and job objects must be of - * the same size and have flags field at same offset. - */ -struct vpu_inline_cmd { - u64 reserved_0; - /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ - u32 type; - /* Flags bit field, see VPU_JOB_FLAGS_* above. */ - u32 flags; - /* Inline command payload. Depends on inline command type. */ - union { - /* Fence (wait and signal) commands' payload. */ - struct { - /* Fence object handle. */ - u64 fence_handle; - /* User VA of the current fence value. */ - u64 current_value_va; - /* User VA of the monitored fence value (read-only). */ - u64 monitored_value_va; - /* Value to wait for or write in fence location. */ - u64 value; - /* User VA of the log buffer in which to add log entry on completion. */ - u64 log_buffer_va; - } fence; - /* Other commands do not have a payload. */ - /* Payload definition for future inline commands can be inserted here. */ - u64 reserved_1[6]; - } payload; -}; - -/* - * Job queue slots can be populated either with job objects or inline command objects. - */ -union vpu_jobq_slot { - struct vpu_job_queue_entry job; - struct vpu_inline_cmd inline_cmd; + /**< Address of secondary preemption buffer to use for this job */ + u8 reserved_0[VPU_JOB_RESERVED_BYTES]; }; /* @@ -297,21 +167,7 @@ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u32 flags; - /* Set to 1 to indicate priority_band field is valid */ - u32 priority_band_valid; - /* - * Priority for the work of this job queue, valid only if the HWS is NOT used - * and the `priority_band_valid` is set to 1. It is applied only during - * the VPU_JSM_MSG_REGISTER_DB message processing. - * The device firmware might use the `priority_band` to optimize the power - * management logic, but it will not affect the order of jobs. - * Available priority bands: @see enum vpu_job_scheduling_priority_band - */ - u32 priority_band; - /* Inside realtime band assigns a further priority, limited to 0..31 range */ - u32 realtime_priority_level; - u32 reserved_0[9]; + u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; }; /* @@ -319,7 +175,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - union vpu_jobq_slot slot[]; + struct vpu_job_queue_entry job[]; }; /** @@ -341,7 +197,9 @@ enum vpu_trace_entity_type { struct vpu_hws_log_buffer_header { /* Written by VPU after adding a log entry. Initialised by host to 0. */ u32 first_free_entry_index; - /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ + /* Incremented by VPU every time the VPU overwrites the 0th entry; + * initialised by host to 0. + */ u32 wraparound_count; /* * This is the number of buffers that can be stored in the log buffer provided by the host. @@ -372,80 +230,14 @@ struct vpu_hws_log_buffer_entry { u64 operation_data[2]; }; -/* Native fence log buffer types. */ -enum vpu_hws_native_fence_log_type { - VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, - VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 -}; - -/* HWS native fence log buffer header. */ -struct vpu_hws_native_fence_log_header { - union { - struct { - /* Index of the first free entry in buffer. */ - u32 first_free_entry_idx; - /* Incremented each time NPU wraps around the buffer to write next entry. */ - u32 wraparound_count; - }; - /* Field allowing atomic update of both fields above. */ - u64 atomic_wraparound_and_entry_idx; - }; - /* Log buffer type, see enum vpu_hws_native_fence_log_type. */ - u64 type; - /* Allocated number of entries in the log buffer. */ - u64 entry_nb; - u64 reserved[2]; -}; - -/* Native fence log operation types. */ -enum vpu_hws_native_fence_log_op { - VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, - VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 -}; - -/* HWS native fence log entry. */ -struct vpu_hws_native_fence_log_entry { - /* Newly signaled/unblocked fence value. */ - u64 fence_value; - /* Native fence object handle to which this operation belongs. */ - u64 fence_handle; - /* Operation type, see enum vpu_hws_native_fence_log_op. */ - u64 op_type; - u64 reserved_0; - /* - * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence - * wait was started (in NPU SysTime). - */ - u64 fence_wait_start_ts; - u64 reserved_1; - /* Timestamp at which fence operation was completed (in NPU SysTime). */ - u64 fence_end_ts; -}; - -/* Native fence log buffer. */ -struct vpu_hws_native_fence_log_buffer { - struct vpu_hws_native_fence_log_header header; - struct vpu_hws_native_fence_log_entry entry[]; -}; - /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, - /* IPC Host -> Device, Async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, - /** - * Preempt engine. The NPU stops (preempts) all the jobs currently - * executing on the target engine making the engine become idle and ready to - * execute new jobs. - * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of - * the target engine, but it stops processing them (until the queue doorbell - * is rung again); the host is responsible to reset the job queue, either - * after preemption or when resubmitting jobs to the queue. - */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, VPU_JSM_MSG_REGISTER_DB = 0x1102, VPU_JSM_MSG_UNREGISTER_DB = 0x1103, @@ -531,10 +323,9 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC commands before this one. * */ VPU_JSM_MSG_STATE_DUMP = 0x11FF, - /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, + VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to * Linux command: `echo '' > /dynamic_debug/control`. @@ -544,12 +335,8 @@ enum vpu_ipc_msg_type { * Perform the save procedure for the D0i3 entry */ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, - /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, - /* IPC Device -> Host, Fence signalled */ - VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, - /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, @@ -635,7 +422,6 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC responses before this one. * */ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, - /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, @@ -814,6 +600,11 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; }; +struct vpu_ipc_msg_payload_blob_deinit { + /* 64-bit unique ID for the blob to be de-initialized. */ + u64 blob_id; +}; + struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ u32 engine_idx; @@ -831,21 +622,6 @@ struct vpu_ipc_msg_payload_job_done { u64 cmdq_id; }; -/* - * Notification message upon native fence signalling. - * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED - */ -struct vpu_ipc_msg_payload_native_fence_signalled { - /* Engine ID. */ - u32 engine_idx; - /* Host SSID. */ - u32 host_ssid; - /* CMDQ ID */ - u64 cmdq_id; - /* Fence object handle. */ - u64 fence_handle; -}; - struct vpu_jsm_engine_reset_context { /* Host SSID */ u32 host_ssid; @@ -924,6 +700,11 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; }; +struct vpu_ipc_msg_payload_blob_deinit_done { + /* 64-bit unique ID for the blob de-initialized. */ + u64 blob_id; +}; + /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* @@ -1013,10 +794,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* - * Priority band to assign to work of this context. - * Available priority bands: @see enum vpu_job_scheduling_priority_band - */ + /* Priority band to assign to work of this context */ u32 priority_band; /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; @@ -1091,7 +869,9 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { */ u64 notify_index; /* - * Field is now deprecated, will be removed when KMD is updated to support removal + * Enable extra events to be output to log for debug of scheduling algorithm. + * Interpreted by VPU as a boolean to enable or disable, expected values are + * 0 and 1. */ u32 enable_extra_events; /* Zero Padding */ @@ -1463,10 +1243,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; + struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; - struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -1474,6 +1254,7 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; + struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index a35b97b097bf..084fb529e1e9 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -12,6 +12,9 @@ extern "C" { #endif +#define DRM_IVPU_DRIVER_MAJOR 1 +#define DRM_IVPU_DRIVER_MINOR 0 + #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 @@ -258,7 +261,7 @@ struct drm_ivpu_bo_info { /* drm_ivpu_submit engines */ #define DRM_IVPU_ENGINE_COMPUTE 0 -#define DRM_IVPU_ENGINE_COPY 1 /* Deprecated */ +#define DRM_IVPU_ENGINE_COPY 1 /** * struct drm_ivpu_submit - Submit commands to the VPU @@ -289,6 +292,10 @@ struct drm_ivpu_submit { * %DRM_IVPU_ENGINE_COMPUTE: * * Performs Deep Learning Neural Compute Inference Operations + * + * %DRM_IVPU_ENGINE_COPY: + * + * Performs memory copy operations to/from system memory allocated for VPU */ __u32 engine;