diff --git a/README.md b/README.md index 3ce76b044..2c76fc972 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ This is a fork with many changes, including but not limited to: The `NEWS` file contains a more detailed changelog. -[✻] Works on IVB and newer, broken on SNB and ILK (for now) +[✻] Works on IVB and newer, semi-broken on SNB and ILK, unsupported on CTG # Release schedule diff --git a/configure.ac b/configure.ac index c36cb4e76..afcea3d24 100644 --- a/configure.ac +++ b/configure.ac @@ -62,6 +62,11 @@ AC_ARG_ENABLE([hybrid-codec], [build with hybrid codec support @<:@default=no@:>@])], [], [enable_hybrid_codec="no"]) +AC_ARG_ENABLE([h264-ctg], + [AC_HELP_STRING([--enable-h264-ctg], + [build with experimental H.264 support for G45 @<:@default=no@:>@])], + [], [enable_h264_ctg="no"]) + AC_ARG_ENABLE([tests], [AC_HELP_STRING([--enable-tests], [build tests @<:@default=no@:>@])], @@ -131,6 +136,10 @@ if test "$enable_hybrid_codec" = "yes"; then AC_DEFINE([HAVE_HYBRID_CODEC], [1], [Defined to 1 if hybrid codec is needed]) fi +if test "$enable_h264_ctg" = "yes"; then + AC_DEFINE([I965_H264_ENABLE_CTG], [1], [Defined to 1 if H264 support on CTG is needed]) +fi + AM_CONDITIONAL(ENABLE_TESTS, test "$enable_tests" = "yes") VA_VERSION=`$PKG_CONFIG --modversion libva` diff --git a/meson_options.txt b/meson_options.txt index ec07a066d..4c679a7e9 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -2,4 +2,6 @@ option('driverdir', type : 'string', description : 'drivers path') option('with_x11', type : 'combo', choices : ['yes', 'no', 'auto'], value : 'auto') option('with_wayland', type : 'combo', choices : ['yes', 'no', 'auto'], value : 'auto') option('enable_hybrid_codec', type : 'boolean', value : false) +option('enable_h264_ctg', type : 'boolean', value : false, + description: 'Enables shader-based H.264 decoding for GMA 4500 (M)HD hardware.') option('enable_tests', type : 'boolean', value : false) diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index b23164754..d67a7c01c 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -327,7 +327,12 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, slice_param->chroma_weight_l0[j][1] == 128) i965_h264_context->weight128_chroma_l0 |= (1 << j); } else { - /* FIXME: workaround for weight 128 */ + /** + * FIXME: workaround for weight 128 + * + * TODO(irql): There is an alternate version in the G4X patchset, + * we might have to switch to it, for now leave as is. + */ if (slice_param->luma_weight_l0[j] == 128 || slice_param->chroma_weight_l0[j][0] == 128 || slice_param->chroma_weight_l0[j][1] == 128) @@ -357,6 +362,10 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, slice_param->chroma_weight_l1[j][1] == 128) i965_h264_context->weight128_chroma_l1 |= (1 << j); } else { + /** + * TODO(irql): There is an alternate version in the G4X patchset, + * we might have to switch to it, for now leave as is. + */ if (slice_param->luma_weight_l0[j] == 128 || slice_param->chroma_weight_l0[j][0] == 128 || slice_param->chroma_weight_l0[j][1] == 128) @@ -610,7 +619,7 @@ g4x_avc_bsd_object(VADriverContextP ctx, OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2)); OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */ OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */ - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, slice_index); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); @@ -781,10 +790,132 @@ static void i965_avc_bsd_phantom_slice(VADriverContextP ctx, struct decode_state *decode_state, VAPictureParameterBufferH264 *pic_param, + int prev_slice_type, struct i965_h264_context *i965_h264_context) { - i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, 0, i965_h264_context); + i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, prev_slice_type, i965_h264_context); +} + +#if defined(I965_H264_ENABLE_CTG) + +static int +i965_list_find_weight(short *list, int size, short value) +{ + int i; + + for (i = 0; i < size; i++) + { + if (list[i] == value) + return 1; + } + + return 0; +} + +static void +i965_weight128_workaround(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context) +{ + struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context; + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param; + short weight128_offset0 = 0; + int i, j; + + i965_h264_context->weight128_offset0_flag = 0; + i965_h264_context->weight128_offset0 = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + + for (j = 0; j < decode_state->num_slice_params; j++) + { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) + { + + if ((slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) && + (pic_param->pic_fields.bits.weighted_pred_flag == 1)) + { + i965_h264_context->weight128_offset0_flag = + i965_list_find_weight(&slice_param->luma_weight_l0[0], 32, 128) || + i965_list_find_weight(&slice_param->chroma_weight_l0[0][0], 64, 128); + } + + if ((slice_param->slice_type == SLICE_TYPE_B) && + (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) + { + i965_h264_context->weight128_offset0_flag = + i965_list_find_weight(&slice_param->luma_weight_l0[0], 32, 128) || + i965_list_find_weight(&slice_param->chroma_weight_l0[0][0], 64, 128) || + i965_list_find_weight(&slice_param->luma_weight_l1[0], 32, 128) || + i965_list_find_weight(&slice_param->chroma_weight_l1[0][0], 64, 128); + } + + if (i965_h264_context->weight128_offset0_flag) + break; + + slice_param++; + } + + if (i965_h264_context->weight128_offset0_flag) + break; + } + + if (!i965_h264_context->weight128_offset0_flag) + return; + + for (weight128_offset0 = 0; weight128_offset0 < 128; weight128_offset0++) + { + int bfound = 0; + + for (j = 0; j < decode_state->num_slice_params; j++) + { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) + { + + if ((slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) && + (pic_param->pic_fields.bits.weighted_pred_flag == 1)) + { + bfound = + i965_list_find_weight(&slice_param->luma_weight_l0[0], 32, weight128_offset0) || + i965_list_find_weight(&slice_param->chroma_weight_l0[0][0], 64, weight128_offset0); + } + + if ((slice_param->slice_type == SLICE_TYPE_B) && + (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) + { + bfound = + i965_list_find_weight(&slice_param->luma_weight_l0[0], 32, weight128_offset0) || + i965_list_find_weight(&slice_param->chroma_weight_l0[0][0], 64, weight128_offset0) || + i965_list_find_weight(&slice_param->luma_weight_l1[0], 32, weight128_offset0) || + i965_list_find_weight(&slice_param->chroma_weight_l1[0][0], 64, weight128_offset0); + } + + if (bfound) + break; + + slice_param++; + } + + if (bfound) + break; + } + + if (!bfound) + { + i965_h264_context->weight128_offset0 = weight128_offset0; + break; + } + } } +#endif void i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context) @@ -793,13 +924,17 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v struct intel_batchbuffer *batch = i965_h264_context->batch; VAPictureParameterBufferH264 *pic_param; VASliceParameterBufferH264 *slice_param; - int i, j; + int i, j, prev_slice_type = SLICE_TYPE_I; assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; intel_update_avc_frame_store_index(ctx, decode_state, pic_param, i965_h264_context->fsid_list, &i965_h264_context->fs_ctx); +#if defined(I965_H264_ENABLE_CTG) + i965_weight128_workaround(ctx, decode_state, h264_context); +#endif + i965_h264_context->enable_avc_ildb = 0; i965_h264_context->picture.i_flag = 1; @@ -820,6 +955,7 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v break; } + prev_slice_type = slice_param->slice_type; slice_param++; } } @@ -855,7 +991,7 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v } } - i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context); + i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, prev_slice_type, i965_h264_context); intel_batchbuffer_emit_mi_flush(batch); intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); diff --git a/src/i965_avc_ildb.c b/src/i965_avc_ildb.c index efdd9ff7b..eaf85a66d 100644 --- a/src/i965_avc_ildb.c +++ b/src/i965_avc_ildb.c @@ -595,8 +595,11 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context) avc_ildb_context->urb.vfe_start = 0; avc_ildb_context->urb.cs_start = avc_ildb_context->urb.vfe_start + avc_ildb_context->urb.num_vfe_entries * avc_ildb_context->urb.size_vfe_entry; + +#if !defined(I965_H264_ENABLE_CTG) assert(avc_ildb_context->urb.cs_start + avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); +#endif for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) { dri_bo_unreference(avc_ildb_context->surface[i].s_bo); diff --git a/src/i965_defines.h b/src/i965_defines.h index c0c04e147..d1976860d 100644 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -35,6 +35,7 @@ #define CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD CMD(2, 0, 2) #define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3) #define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4) +#define CMD_MEDIA_OBJECT_PRT CMD(2, 1, 2) #define CMD_MEDIA_OBJECT_WALKER CMD(2, 1, 3) #define CMD_PIPELINED_POINTERS CMD(3, 0, 0) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 7fd765aa5..a7da83af2 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -88,6 +88,9 @@ static struct hw_codec_info g4x_hw_codec_info = { .min_linear_hpitch = 4, .has_mpeg2_decoding = 1, +#if defined(I965_H264_ENABLE_CTG) + .has_h264_decoding = 1, +#endif .num_filters = 0, }; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 393df8917..71af981fc 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -6274,7 +6274,17 @@ i965_GetSurfaceAttributes( if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attrib_list[i].value.value.i = VA_FOURCC_I420; - } else { + } +#if defined(I965_H264_ENABLE_CTG) + else if (obj_config->profile == VAProfileH264ConstrainedBaseline || + obj_config->profile == VAProfileH264Main || + obj_config->profile == VAProfileH264High) + { + attrib_list[i].value.value.i = VA_FOURCC_NV12; + } +#endif + else + { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } @@ -6306,12 +6316,27 @@ i965_GetSurfaceAttributes( } else { if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || - obj_config->profile == VAProfileMPEG2Main) { + obj_config->profile == VAProfileMPEG2Main) + { if (attrib_list[i].value.value.i != VA_FOURCC_I420) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } - } else { + } +#if defined(I965_H264_ENABLE_CTG) + else if (obj_config->profile == VAProfileH264ConstrainedBaseline || + obj_config->profile == VAProfileH264Main || + obj_config->profile == VAProfileH264High) + { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) + { + attrib_list[i].value.value.i = 0; + attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; + } + } +#endif + else + { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } diff --git a/src/i965_media.c b/src/i965_media.c index 28cb18754..0ae80dc8b 100644 --- a/src/i965_media.c +++ b/src/i965_media.c @@ -352,6 +352,10 @@ g4x_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: +#if defined(I965_H264_ENABLE_CTG) + i965_media_h264_dec_context_init(ctx, media_context); + break; +#endif case VAProfileVC1Simple: case VAProfileVC1Main: case VAProfileVC1Advanced: diff --git a/src/i965_media.h b/src/i965_media.h index e7fadda3c..0c126aa90 100644 --- a/src/i965_media.h +++ b/src/i965_media.h @@ -37,7 +37,12 @@ #include "i965_structs.h" +#if defined(I965_H264_ENABLE_CTG) +#define MAX_INTERFACE_DESC 32 +#else #define MAX_INTERFACE_DESC 16 +#endif + #define MAX_MEDIA_SURFACES 34 struct decode_state; diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c index fd1ed5f88..6c760caf0 100644 --- a/src/i965_media_h264.c +++ b/src/i965_media_h264.c @@ -152,6 +152,16 @@ struct intra_kernel_header intra_kernel_header_gen4 = { (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP) }; +#if defined(I965_H264_ENABLE_CTG) +/* software scoreboad kernel entry */ +static unsigned long avc_sw_scoreboard_kernel_offset[] = { +#define GEN4_SCOREBOARD_IP 2484 +#define GEN4_SCOREBOARD_MBAFF_IP 2568 + GEN4_SCOREBOARD_IP * INST_UNIT_GEN4, + GEN4_SCOREBOARD_MBAFF_IP * INST_UNIT_GEN4 +}; +#endif + static const uint32_t h264_avc_combined_gen4[][4] = { #include "shaders/h264/mc/avc_mc.g4b" }; @@ -448,7 +458,18 @@ i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i9 memset(desc, 0, sizeof(*desc)); desc->desc0.grf_reg_blocks = 7; desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */ - desc->desc1.const_urb_entry_read_offset = 0; + +#if defined(I965_H264_ENABLE_CTG) + if (!i965_h264_context->use_avc_hw_scoreboard) + { + desc->desc1.const_urb_entry_read_offset = (i < FRAMEMB_MOTION) ? 0 : 2; + } + else +#endif + { + desc->desc1.const_urb_entry_read_offset = 0; + } + desc->desc1.const_urb_entry_read_len = 2; desc->desc3.binding_table_entry_count = 0; desc->desc3.binding_table_pointer = @@ -468,6 +489,38 @@ i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i9 desc++; } +#if defined(I965_H264_ENABLE_CTG) + if (!i965_h264_context->use_avc_hw_scoreboard) + { + /* Index [0-15] goes through the Interface Descriptor Remap Table + * so don't use it for software scoreboard kernel + */ + for (; i < 16; i++) + { + desc++; + } + + for (; i < 18; i++) + { + int kernel_offset = avc_sw_scoreboard_kernel_offset[i - 16]; + memset(desc, 0, sizeof(*desc)); + desc->desc0.grf_reg_blocks = 15; + desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */ + desc->desc1.const_urb_entry_read_offset = 0; + desc->desc1.const_urb_entry_read_len = 0; + desc->desc3.binding_table_entry_count = 0; + desc->desc3.binding_table_pointer = 0; + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + desc->desc0.grf_reg_blocks + kernel_offset, + i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0), + i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo); + desc++; + } + } +#endif + dri_bo_unmap(bo); } @@ -490,6 +543,11 @@ i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media vfe_state->vfe1.children_present = 0; vfe_state->vfe2.interface_descriptor_base = media_context->idrt.bo->offset >> 4; /* reloc */ + +#if defined(I965_H264_ENABLE_CTG) + vfe_state->vfe1.debug_counter_control = 2; /* for software scoreboard kernel */ +#endif + dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, @@ -648,6 +706,25 @@ i965_media_h264_upload_constants(VADriverContextP ctx, assert(media_context->curbe.bo->virtual); constant_buffer = media_context->curbe.bo->virtual; +#if 0 +/** + * TODO(irql): I have no idea if the G4X variant is better of the one left here. + */ + if (unlikely(i965_h264_context->is_g4x_context)) + { + /* constant (64 bytes) for Intra kernel */ + memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header)); + + struct inter_kernel_header inter_header; + inter_header.weight_offset = i965_h264_context->weight128_offset0; + inter_header.weight_offset_flag = !i965_h264_context->weight128_offset0_flag; + inter_header.pad0 = 0; + + constant_buffer += 64; + memcpy(constant_buffer, &inter_header, sizeof(inter_header)); + } +#endif + /* HW solution for W=128 */ if (i965_h264_context->use_hw_w128) { memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header)); @@ -693,6 +770,41 @@ i965_media_h264_states_setup(VADriverContextP ctx, i965_media_h264_upload_constants(ctx, decode_state, media_context); } +#if defined(I965_H264_ENABLE_CTG) +static void +i965_avc_sw_scoreboard_objects(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) +{ + struct intel_batchbuffer *batch = i965_h264_context->batch; + int width_in_mb_minus_1 = i965_h264_context->picture.width_in_mbs - 1; + int height_in_mb_minus_1 = i965_h264_context->avc_it_command_mb_info.mbs / (width_in_mb_minus_1 + 1) / (1 + !!i965_h264_context->picture.mbaff_frame_flag) - 1; + int total_mb = i965_h264_context->avc_it_command_mb_info.mbs; + int kernel_index = i965_h264_context->picture.mbaff_frame_flag ? 17 : 16; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_MEDIA_OBJECT_PRT | (16 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + ((kernel_index << 24) | /* Interface Descriptor Offset. */ + (1 << 23))); /* PRT_Fence Needed */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + ((height_in_mb_minus_1 << 16) | + (width_in_mb_minus_1))); + OUT_BATCH(batch, total_mb); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} +#endif + static void i965_media_h264_objects(VADriverContextP ctx, struct decode_state *decode_state, @@ -705,6 +817,13 @@ i965_media_h264_objects(VADriverContextP ctx, assert(media_context->private_context); i965_h264_context = (struct i965_h264_context *)media_context->private_context; +#if defined(I965_H264_ENABLE_CTG) + if (!i965_h264_context->use_avc_hw_scoreboard) + { + i965_avc_sw_scoreboard_objects(ctx, i965_h264_context); + } +#endif + dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True); assert(i965_h264_context->avc_it_command_mb_info.bo->virtual); object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual; @@ -715,7 +834,7 @@ i965_media_h264_objects(VADriverContextP ctx, dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6) /* | (1 << 8) */); OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, I915_GEM_DOMAIN_COMMAND, 0, 0); @@ -886,6 +1005,12 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context media_context->urb.size_vfe_entry = 16; media_context->urb.num_cs_entries = 1; + + /** + * TODO(irql): + * + * This is two on the G4X patchset. + */ media_context->urb.size_cs_entry = 1; media_context->urb.vfe_start = 0; diff --git a/src/i965_media_h264.h b/src/i965_media_h264.h index 9b0b8d624..ffe1f39af 100644 --- a/src/i965_media_h264.h +++ b/src/i965_media_h264.h @@ -46,7 +46,11 @@ struct i965_h264_context { } picture; int enable_avc_ildb; - int use_avc_hw_scoreboard; + /** + * 0 = G4X (which requires a shader-based scoreboard) + * 1 = Ironlake + */ + unsigned int use_avc_hw_scoreboard; int use_hw_w128; unsigned int weight128_luma_l0; diff --git a/src/intel_driver.c b/src/intel_driver.c index b1358c40d..1e3702160 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -177,9 +177,25 @@ intel_driver_init(VADriverContextP ctx) intel->mocs_state = GEN9_PTE_CACHE; intel_driver_get_revid(intel, &intel->revision); + + if (g_intel_debug_option_flags & VA_INTEL_DUMP_KERNEL_CAPS) + intel_driver_dump_kernel_caps(*intel); + return true; } +void intel_driver_dump_kernel_caps(struct intel_driver_data intel) +{ + fprintf(stderr, "Dumping kernel caps...\r\n\r\n"); + + fprintf(stderr, "HAS_EXECBUFFER2: %s\r\n", (intel.has_exec2 ? "true" : "false")); + fprintf(stderr, "HAS_BSD_RING : %s\r\n", (intel.has_bsd ? "true" : "false")); + fprintf(stderr, "HAS_BLT_ENGINE : %s\r\n", (intel.has_blt ? "true" : "false")); + fprintf(stderr, "HAS_VEBOX_RING : %s\r\n", (intel.has_vebox ? "true" : "false")); + fprintf(stderr, "HAS_BSD2_RING : %s\r\n", (intel.has_bsd2 ? "true" : "false")); + fprintf(stderr, "HAS_LOADED_HUC : %s\r\n", (intel.has_huc ? "true" : "false")); +} + void intel_driver_terminate(VADriverContextP ctx) { diff --git a/src/intel_driver.h b/src/intel_driver.h index 0f916780b..bfd1a3b52 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -111,6 +111,7 @@ extern uint32_t g_intel_debug_option_flags; #define VA_INTEL_DEBUG_OPTION_BENCH (1 << 1) #define VA_INTEL_DEBUG_OPTION_DUMP_AUB (1 << 2) #define VA_INTEL_DEBUG_VERBOSE (1 << 3) +#define VA_INTEL_DUMP_KERNEL_CAPS (1 << 4) #define ASSERT_RET(value, fail_ret) do { \ if (!(value)) { \ @@ -237,6 +238,7 @@ struct intel_driver_data { }; bool intel_driver_init(VADriverContextP ctx); +void intel_driver_dump_kernel_caps(struct intel_driver_data intel); void intel_driver_terminate(VADriverContextP ctx); static INLINE struct intel_driver_data * diff --git a/src/meson.build b/src/meson.build index a957cd3c8..79fa8a0d6 100644 --- a/src/meson.build +++ b/src/meson.build @@ -6,6 +6,8 @@ config_cfg.set('INTEL_DRIVER_MINOR_VERSION', intel_vaapi_driver_minor_version) config_cfg.set('INTEL_DRIVER_MICRO_VERSION', intel_vaapi_driver_micro_version) config_cfg.set('INTEL_DRIVER_PRE_VERSION', intel_vaapi_driver_pre_version) config_cfg.set10('HAVE_HYBRID_CODEC', get_option('enable_hybrid_codec')) +config_cfg.set10('I965_H264_ENABLE_CTG', get_option('enable_h264_ctg')) + if WITH_X11 config_cfg.set('HAVE_VA_X11', 1) endif diff --git a/src/shaders/h264/mc/AVCMCInter.asm b/src/shaders/h264/mc/AVCMCInter.asm index 96f1e8d50..d485f72a7 100644 --- a/src/shaders/h264/mc/AVCMCInter.asm +++ b/src/shaders/h264/mc/AVCMCInter.asm @@ -1,40 +1,40 @@ /* * All inter-prediction macroblock kernels * Copyright © <2010>, Intel Corporation. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * This file was originally licensed under the following license - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file was originally licensed under the following license + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * */ // Kernel name: AVCMCInter.asm @@ -258,7 +258,7 @@ INTERLABEL(EXIT_LOOP): #ifdef SW_SCOREBOARD wait n0:ud // Now wait for scoreboard to response - #include "Soreboard_update.asm" // scorboard update function + #include "scoreboard_update.asm" // scoreboard update function #else // Check for write commit first if SW scoreboard is disabled mov (1) gREG_WRITE_COMMIT_Y<1>:ud gREG_WRITE_COMMIT_Y<0;1,0>:ud // Make sure Y write is committed diff --git a/src/shaders/h264/mc/Makefile.am b/src/shaders/h264/mc/Makefile.am index a207389dc..17d3f177b 100644 --- a/src/shaders/h264/mc/Makefile.am +++ b/src/shaders/h264/mc/Makefile.am @@ -74,7 +74,6 @@ INTEL_MC_ASM = \ INTEL_MC_INC = \ AllAVC_Build.inc \ AllAVC_Export.inc \ - export.inc \ header.inc \ HwmcOnlyHeader.inc \ inter_Header.inc \ @@ -166,10 +165,12 @@ INTEL_ILDB_ASM = \ ../ildb/writeURB_UV_Child.asm \ ../ildb/writeURB_Y_Child.asm +INTEL_MC_GEN4_ASM = avc_mc.gen4.asm INTEL_MC_GEN5_ASM = avc_mc.gen5.asm TARGETS = if HAVE_GEN4ASM +TARGETS += $(INTEL_MC_G4B) TARGETS += $(INTEL_MC_G4B_GEN5) TARGETS += $(INTEL_G4B) TARGETS += $(INTEL_G4B_GEN5) @@ -177,7 +178,7 @@ endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4b .g4b.gen5 .gen5.asm +SUFFIXES = .g4a .g4b .gen4.asm .gen5.asm if HAVE_GEN4ASM .g4a.g4m: @@ -187,6 +188,20 @@ if HAVE_GEN4ASM .g4m.g4b.gen5: $(AM_V_GEN)$(GEN4ASM) -g 5 -o $@ $*.g4m +$(INTEL_MC_GEN4_ASM): $(INTEL_MC_ASM) $(INTEL_MC_INC) $(INTEL_ILDB_ASM) + cpp -DDEV_CTG -DBOOTSTRAP -I ../ildb/ AllAVC.asm > _mc0.$@ && \ + ../../gpp.py _mc0.$@ $@ && \ + intel-gen4asm -l list -a -e tmp.$(INTEL_MC_EXPORT) $@ \ + -o /dev/null && \ + mv tmp.$(INTEL_MC_EXPORT) $(INTEL_MC_EXPORT) && \ + cpp -DDEV_CTG -I ../ildb/ AllAVC.asm > _mc1.$@ && \ + ../../gpp.py _mc1.$@ $@ && \ + rm _mc0.$@ _mc1.$@ + +$(INTEL_MC_G4B): $(INTEL_MC_GEN4_ASM) + intel-gen4asm -l list -a -e tmp.$(INTEL_MC_EXPORT) $< -o $@ && \ + mv tmp.$(INTEL_MC_EXPORT) $(INTEL_MC_EXPORT) + $(INTEL_MC_GEN5_ASM): $(INTEL_MC_ASM) $(INTEL_MC_INC) $(INTEL_ILDB_ASM) $(AM_V_GEN)cpp -DDEV_ILK -DBOOTSTRAP -I $(srcdir)/../ildb/ $(srcdir)/AllAVC.asm > _mc0.$@ && \ $(PYTHON3) $(top_srcdir)/src/shaders/gpp.py _mc0.$@ $@ && \ @@ -207,11 +222,8 @@ $(INTEL_MC_G4B_GEN5): $(INTEL_MC_GEN5_ASM) $(INTEL_G4B): $(INTEL_G4I) endif -CLEANFILES = \ - $(INTEL_G4M) \ - $(INTEL_MC_GEN5_ASM) - DISTCLEANFILES = $(TARGETS) $(INTEL_MC_EXPORT_GEN5) +CLEANFILES = $(INTEL_MC_GEN4_ASM) $(INTEL_MC_GEN5_ASM) EXTRA_DIST = \ $(INTEL_G4A) \ diff --git a/src/shaders/h264/mc/list b/src/shaders/h264/mc/list index 000e0fbad..53132c11e 100644 --- a/src/shaders/h264/mc/list +++ b/src/shaders/h264/mc/list @@ -5,6 +5,8 @@ INTRA_PCM FRAME_MB FIELD_MB MBAFF_MB +SCOREBOARD +SCOREBOARD_MBAFF SETHWSCOREBOARD SETHWSCOREBOARD_MBAFF AVC_ILDB_ROOT_Y_ILDB_FRAME diff --git a/src/shaders/h264/mc/scoreboard.asm b/src/shaders/h264/mc/scoreboard.asm index 54a619e6d..40454a81d 100644 --- a/src/shaders/h264/mc/scoreboard.asm +++ b/src/shaders/h264/mc/scoreboard.asm @@ -1,40 +1,40 @@ /* * Dependency control scoreboard kernel * Copyright © <2010>, Intel Corporation. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * This file was originally licensed under the following license - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file was originally licensed under the following license + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * */ // Kernel name: scoreboard.asm @@ -73,7 +73,7 @@ mov (1) acc0:ud 0xf0aa55a5:ud #endif #include "header.inc" -#include "scoreboard_header.inc" +#include "Scoreboard_header.inc" // // Now, begin source code.... diff --git a/src/shaders/h264/mc/scoreboard_MBAFF.asm b/src/shaders/h264/mc/scoreboard_MBAFF.asm index 1a6472a96..8d40385ee 100644 --- a/src/shaders/h264/mc/scoreboard_MBAFF.asm +++ b/src/shaders/h264/mc/scoreboard_MBAFF.asm @@ -1,40 +1,40 @@ /* * Dependency control scoreboard kernel for MBAFF frame * Copyright © <2010>, Intel Corporation. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * This file was originally licensed under the following license - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file was originally licensed under the following license + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * */ // Kernel name: scoreboard_MBAFF.asm @@ -73,7 +73,7 @@ mov (1) acc0:ud 0xffaa55a5:ud #endif #include "header.inc" -#include "scoreboard_header.inc" +#include "Scoreboard_header.inc" // // Now, begin source code....