diff --git a/RELEASENOTES-1.4.docu b/RELEASENOTES-1.4.docu index d001e604..2a684414 100644 --- a/RELEASENOTES-1.4.docu +++ b/RELEASENOTES-1.4.docu @@ -529,4 +529,9 @@ Fixed a bug where executed callbacks posted via immediate after statements would not trigger the device state change notifier. + Added the `thread_aware` device + template to provide support for writing thread-aware device models. + For more information, see the Device templates subsection of the + Libraries and Built-ins section of the DML 1.4 Reference + Manual. diff --git a/include/simics/dmllib.h b/include/simics/dmllib.h index dfed0bdd..1263c074 100644 --- a/include/simics/dmllib.h +++ b/include/simics/dmllib.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -633,6 +634,22 @@ FOR_ENDIAN_VARIANTS(DEFINE_PRECHANGE); #undef FOR_ALL_BITSIZES #undef FOR_ENDIAN_VARIANTS + +#define DML_THREAD_AWARE_IFACE_CALL(target_obj, call_expr) (({ \ + domain_lock_t *__lock; \ + SIM_ACQUIRE_TARGET(target_obj, &__lock); \ + __auto_type __iface_ret = call_expr; \ + SIM_RELEASE_TARGET(target_obj, &__lock); \ + __iface_ret; \ + })) + +#define DML_THREAD_AWARE_IFACE_CALL_VOID(target_obj, call_expr) (({ \ + domain_lock_t *__lock; \ + SIM_ACQUIRE_TARGET(target_obj, &__lock); \ + call_expr; \ + SIM_RELEASE_TARGET(target_obj, &__lock); \ + })) + UNUSED static attr_value_t _serialize_identity(const _id_info_t *id_info_array, const _identity_t id) { if (unlikely(id.id) == 0) { @@ -1293,15 +1310,23 @@ _DML_execute_immediate_afters_now(conf_object_t *dev, UNUSED static void _DML_execute_immediate_afters(conf_object_t *dev, lang_void *aux) { _dml_immediate_after_state_t *state = (_dml_immediate_after_state_t *)aux; - ASSERT(state->posted); + // Only acquire the device if it hasn't been deleted to avoid a + // use-after-free. There's no possibility of a data race on state->deleted + // (nor any other part of the state if state->deleted is true) as the + // device can only be deleted in global context. if (unlikely(state->deleted)) { + ASSERT(state->posted); ASSERT(QEMPTY(state->queue)); // No need to call QFREE, already done MM_FREE(state); return; } + domain_lock_t *lock; + SIM_ACQUIRE_OBJECT(dev, &lock); + ASSERT(state->posted); if (unlikely(QEMPTY(state->queue))) { state->posted = false; + SIM_RELEASE_OBJECT(dev, &lock); return; } _dml_immediate_after_queue_elem_t elem = QREMOVE(state->queue); @@ -1314,6 +1339,7 @@ _DML_execute_immediate_afters(conf_object_t *dev, lang_void *aux) { } elem.callback(dev, elem.data.indices, elem.data.args); _free_simple_event_data(elem.data); + SIM_RELEASE_OBJECT(dev, &lock); } UNUSED static void diff --git a/lib/1.4/dml-builtins.dml b/lib/1.4/dml-builtins.dml index 711484ae..78b33847 100644 --- a/lib/1.4/dml-builtins.dml +++ b/lib/1.4/dml-builtins.dml @@ -16,6 +16,7 @@ header %{ import "simics/C.dml"; import "simics/devs/io-memory.dml"; import "simics/model-iface/bank-instrumentation.dml"; +import "simics/model-iface/concurrency.dml"; import "simics/model-iface/int-register.dml"; import "simics/model-iface/register-view.dml"; import "simics/model-iface/register-view-read-only.dml"; @@ -46,6 +47,15 @@ extern size_t strlen(const char *s); extern char *strcpy(char *s, const char *s); extern void *memcpy(void *dest, const void *src, size_t n); +extern void SIM_ACQUIRE_OBJECT(conf_object_t *obj, domain_lock_t **lock); +extern void SIM_RELEASE_OBJECT(conf_object_t *obj, domain_lock_t **lock); + +extern void SIM_ACQUIRE_CELL(conf_object_t *obj, domain_lock_t **lock); +extern void SIM_RELEASE_CELL(conf_object_t *obj, domain_lock_t **lock); + +extern void SIM_ACQUIRE_TARGET(conf_object_t *obj, domain_lock_t **lock); +extern void SIM_RELEASE_TARGET(conf_object_t *obj, domain_lock_t **lock); + extern typedef struct { } _callback_entry_t; @@ -528,6 +538,9 @@ The `device` template contains the following parameters: this device, as specified by the `--simics-api` command-line argument; e.g. `"6"` for the Simics 6 API. +* `_thread_aware` *[bool]*: Whether the device model is declared to be + *thread-aware*. This is `true` if and only if the [`_thread_aware` + template](#device-templates) has been instantiated for the device. */ template device { is object; @@ -570,6 +583,9 @@ template device { // this carried semantics in DML 1.2; deprecated in 1.4 param _confidentiality = undefined; + param __thread_aware default false; + param _thread_aware = __thread_aware; + method _init() { _rec_init(); } method _post_init() { _rec_post_init(); } method init() default {} @@ -578,6 +594,139 @@ template device { method destroy() default { } } +/** +## Device templates + +As the top-level scope defines the device object, device templates are +templates intended to be instantiated on the top-level. Such templates are +typically written in order to describe logic which affects the entire device +model as a whole. + +The only built-in device template (save for the `device` template which defines +the object type itself) is the `_thread_aware` template; however, the DML +standard library (`utility.dml`) also defines the device templates [`poreset`, +`hreset`, and `sreset`](utility.html#templates-for-reset), which declare device +support for different kinds of resets. + +The `_thread_aware` template is offered as a means of writing *thread-aware* +DML device models, in the sense defined by the *Simics API Reference Manual* +(in section 2.7). This can be necessary in a multicore-acceleration or +subsystem-threading setting, if the models would otherwise present a bottleneck +in the simulation. The `_thread_aware` template **must be** and **should only +be** used for this purpose. + +Instantiating the `_thread_aware` template on top-level declares the device +model as being thread-aware, which carries a large number of ramifications and +responsibilities. + +The consequences of instantiating the `_thread_aware` template are as follows: +* The [`_thread_aware` parameter](#device-objects) of the device will resolve + to constant `true` rather than `false`. This can be leveraged by model code: + `dev._thread_aware` is suitable for use together with `#if` to either + conditionally generate additional code needed to support thread-aware device + models, or to conditionally raise compile-time errors via the [`error` + statement/declaration](language.html#error-statements) if some piece of code + does not support thread-aware device models. +* An implementation of the `concurrency_mode` interface is provided for the + device object + * The `_thread_aware` template declares the top-level `supported_modes` + parameter to allow the modeller to configure the supported concurrency + modes that the device reports. Its default value is + `Sim_Concurrency_Mode_Serialized | Sim_Concurrency_Mode_Serialized_Memory`. + * The `_thread_aware` template declares the top-level + `init_concurrency_mode` parameter to allow the modeller to configure + the default concurrency mode the device places itself if never told + to switch modes. Its default value is `Sim_Concurrency_Mode_Serialized`. + * The current concurrency mode of the device can be accessed within DML via + `dev.concurrency_mode.current`. +* All events of the device model (including those generated for `after` + statements) are registered with the `Sim_EC_No_Serialize` flag. +* **DMLC will insert code to automatically acquire and release locks required + by any feature directly provided by DML.** The following is guaranteed: + * DMLC will ensure that the thread domain associated with the device object + is automatically acquired and later released together with **any** entry + into the device. This includes interface and attribute implementations, + callbacks of event objects and `after` statements, exported methods, + and method references converted to function pointers (except if the + method is `independent`.) + * Any call to an interface of a `connect` is automatically enclosed by + `SIM_ACQUIRE_TARGET`/`SIM_RELEASE_TARGET` on the connected object. + * When a connect is being configured by Simics, the cell domain of the + connected device is guaranteed to be acquired over the course of the + subsequent calls to `set()` and `validate()` of the `connect` object. + * Any operations performed on the device clock by `event` objects, the + `after` statement, or the `cancel_after()` operation will automatically + hold the cell domain of the device clock as the operation is performed. + (Note: *only* as the operation is performed. If an event is posted, then + cell domain is held only for the post operation — it will *not* + then keep being held while the event is pending.) + * Any built-in template or template provided by the DML standard library + can be used with thread-aware device models unless explicitly noted + otherwise (in which case trying to use that template in a thread-aware + model will lead to a compile-time error.) +* **For all other cases, it is the modeller's responsibility to ensure that all + DML code used by the model acquires and releases locks as needed.** + * Any direct invocation of a Simics API function (`SIM_` functions) must + be guarded by acquiring any relevant locks as given by API Reference + Manual. For example, if the specified execution contet of a Simics API + function is cell context, the cell thread domain must be held when the + call is made. + * Any calls to DML methods must ensure that any lock the methods require + to be held are indeed being held (except for the thread domain of the + device object, as DMLC automatically ensures it's acquired.) + * For example, any manual calls to `set()` and `validate()` methods + of connects must ensure that the cell domain of the passed target is + acquired (if non-`NULL`) before the call is made. + * **Each DML library used by the model must be able to support + thread-aware models**, unless the modeller can **trivially** ensure + that the library is used safely. + * If the DML library is not written to support thread-aware devices, + then usage of it is only safe if the modeller can ensure that every + method of the DML library is only called in contexts when the + relevant locks the method requires be held are indeed are being held + (as stated previously.) + * As libraries may feature object declarations, templates, and + `in each` declarations that may override and invoke methods + in ways that might not be clear to the modeller, safe usage is + very difficult to ensure for all but the simplest of libraries. + It is therefore better to prefer libraries have been + intentionally written to support thread-aware devices. +*/ +template _thread_aware is device { + param __thread_aware = true; + param init_concurrency_mode default Sim_Concurrency_Mode_Serialized; + param supported_modes default Sim_Concurrency_Mode_Serialized + | Sim_Concurrency_Mode_Serialized_Memory; + + group _init_current_concurrency_mode is init { + method init() { + dev.concurrency_mode.current = dev.init_concurrency_mode; + assert (dev.concurrency_mode.current + & dev.concurrency_mode.current - 1) == 0; + assert (dev.concurrency_mode.current + & dev.supported_modes) != 0; + } + } + + implement concurrency_mode { + saved concurrency_mode_t current; + + method supported_modes() -> (concurrency_mode_t) default { + return dev.supported_modes; + } + + method switch_mode(concurrency_mode_t mode) default { + assert (mode & mode - 1) == 0; + assert (mode & dev.supported_modes) != 0; + current = mode; + } + + method current_mode() -> (concurrency_mode_t) default { + return current; + } + } +} + /** ## Group objects @@ -1308,8 +1457,11 @@ template connect is _conf_attribute { : (port != NULL && strcmp(port, this.port) == 0))) return Sim_Set_Ok; + local domain_lock_t *lock; + // Check if the new setting is valid if (obj) { + #if (dev._thread_aware) SIM_ACQUIRE_CELL(obj, &lock); local bool valid = true; foreach iface in (this._each_interface) { if (iface._required) { @@ -1329,13 +1481,16 @@ template connect is _conf_attribute { } } } - if (!valid) + if (!valid) { + #if (dev._thread_aware) SIM_RELEASE_CELL(obj, &lock); return Sim_Set_Interface_Not_Found; + } local const char *old_port = this.port; this.port = port; local bool ok = validate(obj); this.port = old_port; if (!ok) { + #if (dev._thread_aware) SIM_RELEASE_CELL(obj, &lock); return Sim_Set_Illegal_Value; } } @@ -1346,6 +1501,7 @@ template connect is _conf_attribute { this.port = port ? MM_STRDUP(port) : NULL; /*% COVERITY var_deref_model %*/ this.set(obj); + #if (dev._thread_aware) if (obj) SIM_RELEASE_CELL(obj, &lock); return Sim_Set_Ok; } @@ -1386,6 +1542,9 @@ The `init_as_subobj` inherits the [`init`](#init) and template init_as_subobj is (connect, init) { param classname : const char *; param configuration default "none"; + #if (dev._thread_aware) { + error "`init_as_subobj` can't be used in a thread-aware device model"; + } method init() default { local int portname_len = this._nongroup_parent.objtype == "device" #? 0 #: strlen(this._nongroup_parent._qname()) + 1; @@ -3640,7 +3799,10 @@ template event is (object, shown_desc) { if (!SIM_object_clock(dev.obj)) // without a clock, we cannot have posted any events return; + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); SIM_event_cancel_time(dev.obj, evclass, dev.obj, NULL, NULL); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); } // internal callbacks, invoked by auto-generated callback functions method __describe_event(void *data) -> (char *) default { @@ -3785,12 +3947,20 @@ template _time_event is _event { if (clk == NULL) log error: "The 'queue' attribute is not set," + " cannot post event"; - else + else { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); SIM_event_post_time(clk, *_pevclass, dev.obj, when, data); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + } } shared method _next(void *data) -> (double) { - return SIM_event_find_next_time(dev.obj, *_pevclass, - dev.obj, DML_pointer_eq, data); + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); + local double upcoming = SIM_event_find_next_time( + dev.obj, *_pevclass, dev.obj, DML_pointer_eq, data); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + return upcoming; } } @@ -3800,12 +3970,20 @@ template _cycle_event is _event { if (clk == NULL) log error: "The 'queue' attribute is not set," + " cannot post event"; - else + else { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); SIM_event_post_cycle(clk, *_pevclass, dev.obj, when, data); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + } } shared method _next(void *data) -> (uint64) { - return SIM_event_find_next_cycle(dev.obj, *_pevclass, - dev.obj, DML_pointer_eq, data); + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); + local double upcoming = SIM_event_find_next_cycle( + dev.obj, *_pevclass, dev.obj, DML_pointer_eq, data); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + return upcoming; } } @@ -3834,13 +4012,20 @@ template _simple_event is _event { shared method _destroy(void *data) {} shared method posted() -> (bool) { - return SIM_event_find_next_cycle(dev.obj, *_pevclass, - dev.obj, DML_pointer_eq, NULL) >= 0; + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); + local double upcoming = SIM_event_find_next_cycle( + dev.obj, *_pevclass, dev.obj, DML_pointer_eq, NULL); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + return upcoming >= 0; } shared method remove() { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); SIM_event_cancel_time(dev.obj, *_pevclass, dev.obj, DML_pointer_eq, NULL); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); } } @@ -3940,14 +4125,20 @@ template _uint64_event is _event { } shared method posted(uint64 data) -> (bool) { - return SIM_event_find_next_cycle(dev.obj, *_pevclass, - dev.obj, DML_pointer_eq, - _int_to_voidp(data)) >= 0; + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); + local double upcoming = SIM_event_find_next_cycle( + dev.obj, *_pevclass, dev.obj, DML_pointer_eq, _int_to_voidp(data)); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); + return upcoming >= 0; } shared method remove(uint64 data) { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); SIM_event_cancel_time(dev.obj, *_pevclass, dev.obj, DML_pointer_eq, _int_to_voidp(data)); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); } shared method event(uint64 data); diff --git a/lib/1.4/utility.dml b/lib/1.4/utility.dml index e2d68170..cb002878 100644 --- a/lib/1.4/utility.dml +++ b/lib/1.4/utility.dml @@ -1127,6 +1127,8 @@ template function_io_memory { local uint64 offset = map_info.start - map_info.base; foreach b in (each function_mapped_bank in (dev)) { if (b.function == map_info.function) { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); if (!b.use_io_memory) { local map_target_t *mt = SIM_new_map_target(b._bank_obj(), NULL, NULL); @@ -1134,6 +1136,7 @@ template function_io_memory { local exception_type_t _exc = SIM_clear_exception(); log error: "failed to create map target for %s: %s", SIM_object_name(b._bank_obj()), SIM_last_error(); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); return Sim_PE_IO_Not_Taken; } // hack: VT_map_target_access doesn't accept a map_info @@ -1146,14 +1149,17 @@ template function_io_memory { = VT_map_target_access(mt, mem_op); SIM_set_mem_op_physical_address(mem_op, before); SIM_free_map_target(mt); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); return ret; } if (b.io_memory_access( mem_op, SIM_get_mem_op_physical_address(mem_op) + offset, NULL)) { + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); return Sim_PE_No_Exception; } else { + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); return Sim_PE_IO_Not_Taken; } } @@ -1373,12 +1379,15 @@ template map_target is (connect, _qname) { return Sim_PE_IO_Not_Taken; } } + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(obj, &lock); local exception_type_t exc = SIM_issue_transaction(map_target, t, addr); local bool fail = exc != Sim_PE_No_Exception; log info, (fail ? 2 : 4) : "%s%s %d bytes @ 0x%x in %s", fail ? "failed to " : "", SIM_transaction_is_read(t) ? "read" : fail ? "write" : "wrote", SIM_transaction_size(t), addr, SIM_object_name(obj); + #if (dev._thread_aware) SIM_RELEASE_CELL(obj, &lock); return exc; } } @@ -1441,8 +1450,11 @@ template signal_connect is (connect, post_init) { } method post_init() default { + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); if (!SIM_is_restoring_state(dev.obj) && signal.val && signal.high) signal.signal_raise(); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); } method set(conf_object_t *obj) default { @@ -1456,8 +1468,11 @@ template signal_connect is (connect, post_init) { // * reverse executing a hotplug (quite unlikely), i.e. the signal is // currently high but the effects of the hotplug has already taken // place so we should NOT treat it as a hotplug. + local domain_lock_t *lock; + #if (dev._thread_aware) SIM_ACQUIRE_CELL(dev.obj, &lock); local bool hotplug = SIM_object_is_configured(dev.obj) && !SIM_is_restoring_state(dev.obj); + #if (dev._thread_aware) SIM_RELEASE_CELL(dev.obj, &lock); if (hotplug && signal.val && signal.high) signal.signal_lower(); default(obj); diff --git a/py/dml/c_backend.py b/py/dml/c_backend.py index 48229685..0113e491 100644 --- a/py/dml/c_backend.py +++ b/py/dml/c_backend.py @@ -94,6 +94,26 @@ def output_dml_state_change(device_ref): f"{device_ref});\n") out("}\n", preindent = -1) +def output_domain_lock_decl(lock_name): + if dml.globals.thread_aware: + out(f'domain_lock_t *{lock_name};\n') + +def output_acquire_object(device_ref, lock): + if dml.globals.thread_aware: + out(f'SIM_ACQUIRE_OBJECT({device_ref}, &{lock});\n') + +def output_release_object(device_ref, lock): + if dml.globals.thread_aware: + out(f'SIM_RELEASE_OBJECT({device_ref}, &{lock});\n') + +def output_acquire_cell(device_ref, lock): + if dml.globals.thread_aware: + out(f'SIM_ACQUIRE_CELL({device_ref}, &{lock});\n') + +def output_release_cell(device_ref, lock): + if dml.globals.thread_aware: + out(f'SIM_RELEASE_CELL({device_ref}, &{lock});\n') + registered_attribute_names = {} def register_attribute(site, port, name): '''remember that attribute 'name' is registered on port 'port', @@ -402,6 +422,9 @@ def generate_attr_setter(fname, node, port, dimsizes, cprefix, loopvars, out(crep.structtype(device)+' *_dev UNUSED = (' + crep.structtype(device)+'*)_obj;\n') port_indices = () + + output_domain_lock_decl('_lock') + output_acquire_object('&_dev->obj', '_lock') out('set_error_t _status = Sim_Set_Illegal_Value;\n') if loopvars: out('uint32 ' + ','.join(v.str for v in loopvars) + ';\n') @@ -442,6 +465,7 @@ def generate_attr_setter(fname, node, port, dimsizes, cprefix, loopvars, out('}\n', preindent = -1) out('exit:\n') output_dml_state_change('_dev') + output_release_object('&_dev->obj', '_lock') out('return _status;\n') out('}\n\n', preindent = -1) splitting_point() @@ -464,6 +488,8 @@ def generate_attr_getter(fname, node, port, dimsizes, cprefix, loopvars): out(crep.structtype(device)+' *_dev UNUSED = (' + crep.structtype(device)+'*)_obj;\n') port_indices = () + output_domain_lock_decl('_lock') + output_acquire_object('&_dev->obj', '_lock') out('attr_value_t _val0;\n') if loopvars: @@ -497,6 +523,7 @@ def generate_attr_getter(fname, node, port, dimsizes, cprefix, loopvars): out('}\n', preindent = -1) output_dml_state_change('_dev') + output_release_object('&_dev->obj', '_lock') out('return _val0;\n') out('}\n\n', preindent = -1) splitting_point() @@ -744,6 +771,8 @@ def wrap_method(meth, wrapper_name, indices=()): assert meth.dimensions == len(indices) out(devstruct+' *_dev UNUSED = ('+devstruct+'*)_obj;\n') indices = tuple(mkIntegerLiteral(meth.site, i) for i in indices) + output_domain_lock_decl('_lock') + output_acquire_object('&_dev->obj', '_lock') with crep.DeviceInstanceContext(): if retvar: mkDeclaration(meth.site, retvar, rettype, @@ -757,6 +786,7 @@ def wrap_method(meth, wrapper_name, indices=()): indices, inargs, outargs).toc() output_dml_state_change('_dev') + output_release_object('&_dev->obj', '_lock') if retvar: out('return '+retvar+';\n') out('}\n', preindent = -1) @@ -1036,6 +1066,8 @@ def generate_simple_events(device): out('_simple_event_data_t *data = ' + '(_simple_event_data_t *)_data;\n') + output_domain_lock_decl('_lock') + output_acquire_object('_obj', '_lock') # If data is NULL, report it and use emergency indices/args if info.dimensions or info.args_type: out('if (!data) {', postindent=1) @@ -1066,6 +1098,7 @@ def generate_simple_events(device): out('MM_FREE(data);\n') out('}\n', preindent=-1) output_dml_state_change('_dev') + output_release_object('_obj', '_lock') out('}\n\n', preindent = -1) splitting_point() @@ -1222,6 +1255,10 @@ def generate_immediate_after_callbacks(device): indices_lit = 'indices' if info.dimensions else None args_lit = 'args' if info.args_type else None info.generate_callback_call(indices_lit, args_lit) + # Minor HACK: this callback function takes care of state change + # notification, but not lock acquisition and release. That's because + # that must be taken care of by _DML_execute_immediate_afters, + # as it does its own inspection on the device state. output_dml_state_change('_dev') out('}\n\n', preindent = -1) splitting_point() @@ -1232,10 +1269,14 @@ def generate_simple_events_control_methods(device): out('{\n', postindent = 1) out(crep.structtype(device) + ' *_dev UNUSED = (' + crep.structtype(device) + '*)_obj;\n') - for key in dml.globals.after_delay_infos: - out('SIM_event_cancel_time(' - + f'SIM_object_clock(_obj), {crep.get_evclass(key)}, _obj, ' - + '_simple_event_predicate, (lang_void *) &domain);\n') + if dml.globals.after_delay_infos: + output_domain_lock_decl('_lock') + output_acquire_cell('_obj', '_lock') + for key in dml.globals.after_delay_infos: + out('SIM_event_cancel_time(' + + f'SIM_object_clock(_obj), {crep.get_evclass(key)}, _obj, ' + + '_simple_event_predicate, (lang_void *) &domain);\n') + output_release_cell('_obj', '_lock') site = logging.SimpleSite('<_cancel_simple_events>') by_dims = {} @@ -1296,25 +1337,27 @@ def generate_register_events(device): if not events and not dml.globals.after_delay_infos: out('return;\n') else: + flags = 'Sim_EC_No_Serialize' if dml.globals.thread_aware else '0' for event in events: if (dml.globals.dml_version == (1, 2) and param_str(event, 'timebase') == 'stacked' and event.dimensions > 0): raise ICE(event, "stacked event array not supported") for indices in event.all_indices(): - out('%s%s = SIM_register_event("%s", class, 0, %s);\n' + out('%s%s = SIM_register_event("%s", class, %s, %s);\n' % (crep.get_evclass(event), - ''.join('[' + str(i) + ']' for i in indices), - event.logname_anonymized(indices), - ', '.join( - cname - for (_, cname) in event_callbacks(event, + ''.join('[' + str(i) + ']' for i in indices), + event.logname_anonymized(indices), + flags, + ', '.join( + cname + for (_, cname) in event_callbacks(event, indices)))) for (key, info) in dml.globals.after_delay_infos.items(): - out(('%s = SIM_register_event(%s, class, 0, %s, %s, %s, %s, ' + out(('%s = SIM_register_event(%s, class, %s, %s, %s, %s, %s, ' + 'NULL);\n') % (crep.get_evclass(key), string_literal(info.string_key), - info.cident_callback, '_destroy_simple_event_data', + flags, info.cident_callback, '_destroy_simple_event_data', info.cident_get_value, info.cident_set_value)) out('}\n\n', preindent = -1) splitting_point() @@ -1429,9 +1472,12 @@ def generate_state_notify(device): out("if (!dev->_issuing_state_callbacks) {\n", postindent = 1) out("dev->_issuing_state_callbacks = true;\n") + output_domain_lock_decl('_cell_lock') + output_acquire_cell('&dev->obj', '_cell_lock') out("SIM_notify(&dev->obj, Sim_Notify_State_Change)" ";\n") out("dev->_issuing_state_callbacks = false;\n") + output_release_cell('&dev->obj', '_cell_lock') out("}\n", preindent = -1) out("}\n", preindent = -1) @@ -1654,9 +1700,13 @@ def generate_deinit(device): for i in range(len(dims)): out('}\n', preindent=-1) - for key in dml.globals.after_delay_infos: - out(f'SIM_event_cancel_time(_obj, {crep.get_evclass(key)}, _obj, ' - + '0, NULL);\n') + if dml.globals.after_delay_infos: + output_domain_lock_decl('_cell_lock') + output_acquire_cell('_obj', '_cell_lock') + for key in dml.globals.after_delay_infos: + out(f'SIM_event_cancel_time(_obj, {crep.get_evclass(key)}, ' + + '_obj, 0, NULL);\n') + output_release_cell('_obj', '_cell_lock') with LogFailure( device.get_component('destroy', 'method').site, device, ()): @@ -1996,6 +2046,8 @@ def generate_static_trampoline(func): "%s(%s)" % ("_trampoline" + func.get_cname(), params_string))) out("{\n", postindent=1) out('ASSERT(_obj);\n') + output_domain_lock_decl('_lock') + output_acquire_object('_obj', '_lock') out('ASSERT(SIM_object_class(_obj) == _dev_class);\n') (name, typ) = func.cparams[0] out("%s = (%s)_obj;\n" % (typ.declaration(name), typ.declaration(""))) @@ -2004,6 +2056,7 @@ def generate_static_trampoline(func): func.get_cname(), ", ".join(n for (n, t) in func.cparams))) output_dml_state_change(name) + output_release_object('_obj', '_lock') if not func.rettype.void: out("return result;\n") out("}\n", preindent=-1) diff --git a/py/dml/ctree.py b/py/dml/ctree.py index d9481a70..33803cd0 100644 --- a/py/dml/ctree.py +++ b/py/dml/ctree.py @@ -701,9 +701,14 @@ def lm_out(*args, **kwargs): data = '(lang_void *)_data' else: data = 'NULL' + if dml.globals.thread_aware: + lm_out(f'domain_lock_t *_cell_lock;\n') + lm_out(f'SIM_ACQUIRE_CELL({objarg}, &_cell_lock);\n') lm_out(f'SIM_event_post_{self.unit}(SIM_object_clock({objarg}), ' + f'{crep.get_evclass(self.info.key)}, {objarg}, ' + f'{self.delay.read()}, {data});\n') + if dml.globals.thread_aware: + lm_out(f'SIM_RELEASE_CELL({objarg}, &_cell_lock);\n') lm_out("}\n", preindent = -1) mkAfter = After @@ -2880,9 +2885,15 @@ def __str__(self): self.node_expr, self.method_name, '(' + ", ".join(str(e) for e in self.args) + ')') def read(self): - return "(%s)->%s(%s)" % ( + args = [e.read() for e in self.args] + call = "(%s)->%s(%s)" % ( read_iface_struct(self.node_expr), self.method_name, - ", ".join(e.read() for e in self.args)) + ", ".join(args)) + if not dml.globals.thread_aware: + return call + + maybe_void = '_VOID' * realtype_shallow(self.type).void + return f'DML_THREAD_AWARE_IFACE_CALL{maybe_void}({args[0]}, {call})' class InterfaceMethodRef(NonValue): '''Reference to an interface method''' diff --git a/py/dml/globals.py b/py/dml/globals.py index d3a50181..ed698033 100644 --- a/py/dml/globals.py +++ b/py/dml/globals.py @@ -69,6 +69,10 @@ def compat_dml12_int(site): coverity_pragmas = {} +# Whether the device instantiates the `_thread_aware` template, and should be +# generating lock acquisition and releases +thread_aware = False + # Enable features that are not supported in any capacity, for testing purposes. # This is reserved for features that are fully implemented, but whose design # cannot be finalized until later in the future. diff --git a/py/dml/structure.py b/py/dml/structure.py index c04cff86..88f6bed8 100644 --- a/py/dml/structure.py +++ b/py/dml/structure.py @@ -1491,6 +1491,13 @@ def mkobj2(obj, obj_specs, params, each_stmts): for (issite, tpl) in obj_spec.templates: if tpl.trait: obj_traits.append((issite, tpl.trait)) + # TODO remove once thread-awareness support is public. + # Determining thread-awareness via the _thread_aware param is + # cleaner, but doesn't provide is-site info. + if (obj.objtype == 'device' + and tpl.name == '_thread_aware' + and dml.globals.dml_version != (1, 2)): + report(WEXPERIMENTAL(issite, 'thread-aware device model')) for obj_spec in obj_specs: for (templates, spec) in obj_spec.in_eachs: @@ -1955,6 +1962,9 @@ def mkobj2(obj, obj_specs, params, each_stmts): mark_method_referenced(func) mark_method_exported(func, name, export.site) + if dml.globals.dml_version != (1, 2): + dml.globals.thread_aware = param_bool(obj, '_thread_aware') + elif obj.objtype == 'bank': set_confidential_object(obj) if logging.show_porting: diff --git a/test/1.4/misc/thread_aware.dml b/test/1.4/misc/thread_aware.dml new file mode 100644 index 00000000..6535337f --- /dev/null +++ b/test/1.4/misc/thread_aware.dml @@ -0,0 +1,241 @@ +/* + © 2024 Intel Corporation + SPDX-License-Identifier: MPL-2.0 +*/ + +dml 1.4; +device test; + +import "utility.dml"; +import "simics/util/os.dml"; + +saved int s; + +// This test should accomplish two things: +// 1. Only pass if the device is actually considered thread-aware, and is +// entered without the cell domain being taken. +// 2. Only pass if the device acquires and releases locks correctly +// +// Accomplished by two cooperative devices that have a piece of shared state: +// 1. is accomplished by manipulating the shared state without locking, +// relying on sleeps for one thread to observe the changes made by the +// other, and performing tests on that state that can only succeed if +// the two devices are entered concurrently. +// 2. is accomplished by manipulations on the shared state while holding +// an object lock; then having a device entry to inspect the +// state be blocked by an existing entry that modifies it. This blocking is +// ensured by also leveraging sleeps. The test is practically guaranteed to +// succeed only if object locking is done correctly. + +/// WARNING WEXPERIMENTAL +is _thread_aware; + +header %{ + static volatile int32 shared_state = 0; +%} + +extern int32 shared_state; + +connect buddy { + session int32 *state; + param configuration = "optional"; + interface pulse; +} + +attribute setup_main is write_only_attr { + param type = "n"; + method set(attr_value_t val) throws default { + after 1 cycles: event(); + } + + method event() { + os_millisleep(50); + // Second resolved (due to the sleep in setup_buddy.event() being + // longer) + + // This essentially serves as a check that setup_buddy.event() is + // ongoing; and thus the buddy is holding its own lock + assert shared_state == 1; + // Modify shared state without acquiring lock. + shared_state = 2; + // Then try to enter the buddy, and become blocked + buddy.pulse.pulse(); + } +} + +attribute setup_buddy is write_only_attr { + param type = "n"; + method set(attr_value_t val) throws default { + after 1 cycles: event(); + } + + method event() { + // First resolved (due to the sleep in setup_main.event()) + shared_state = 1; + os_millisleep(100); + // Third resolved. + assert shared_state == 2; + shared_state = 3; + // Once this is left, the object lock of the buddy will be released + } +} + +implement pulse { + method pulse() { + // Last resolved, when setup_main.event() becomes unblocked and may + // enter the buddy + assert shared_state == 3; + } +} + +attribute shared_state_attr { + param type = "i"; + method get() -> (attr_value_t) { + return SIM_make_attr_int64(shared_state); + } + method set(attr_value_t val) throws { + shared_state = SIM_attr_integer(val); + } +} + +bank b; +port p; +subdevice sd { + subdevice sd { + bank b; + port p; + } + bank b; + port p; +} + +// Remainder is skeleton code stolen from 1.4/misc/notify_state. +// We have to do similar tests that object locking is done for the various ways +// the device may be entered. +// ... That, or write those tests via ctree_tests.py + +header %{ + #include + extern uint64 *get_count_pointer(conf_object_t *obj); + void state_cb(conf_object_t *obj, conf_object_t *_, lang_void *__) { + (*get_count_pointer(obj))++; + } + extern void exported_method(conf_object_t *); + void exported_trampoline(conf_object_t * obj, conf_object_t *_, + lang_void *__) { + exported_method(obj); + } + extern void independent_exported_method(conf_object_t *); + void independent_exported_trampoline(conf_object_t * obj, conf_object_t *_, + lang_void *__) { + independent_exported_method(obj); + } + void register_callbacks(conf_object_t *obj) { + SIM_add_notifier(obj, + Sim_Notify_State_Change, + obj, + state_cb, + NULL); + notifier_type_t notifier_type = SIM_notifier_type("exported-entry"); + SIM_register_notifier(SIM_object_class(obj), notifier_type, + "exported entry"); + SIM_add_notifier(obj, notifier_type, obj, exported_trampoline, NULL); + + notifier_type = SIM_notifier_type("statically-exported-entry"); + SIM_register_notifier(SIM_object_class(obj), notifier_type, + "statically exported entry"); + SIM_add_notifier(obj, notifier_type, obj, + independent_exported_trampoline, NULL); + } +%} + +extern void register_callbacks(conf_object_t *obj); + +method init() { + register_callbacks(dev.obj); +} + +// These will both only be called from the callbacks registered +// so, we do not need to be concerned with this being called twice +method get_count() -> (uint64 *) { + return &count.count; +} +export get_count as "get_count_pointer"; + +attribute count is read_only_attr { + param type = "i"; + session uint64 count; + method get() -> (attr_value_t) { + // The count is incremented at exit of this method, so we + // must compensate + return SIM_make_attr_int64(count--); + } +} + +method exposed_method() { + log info, 1: "Exposed method called"; +} +export exposed_method as "exported_method"; + +method indirectly_exposed_method() { + log info, 1: "Indirectly exposed method called"; +} + +independent method indie_exposed_method(conf_object_t *obj) { + (&indirectly_exposed_method)(obj); +} +export indie_exposed_method as "independent_exported_method"; + +attribute a is pseudo_attr { + param type = "n"; + method set(attr_value_t val) throws default { + log info, 1: "Attribute set called"; + } + method get() -> (attr_value_t) { + log info, 1: "Attribute get called"; + return SIM_make_attr_nil(); + } +} + +attribute ev is write_only_attr { + param type = "n"; + method event() { + log info, 1: "Event triggered"; + } + method set(attr_value_t val) throws default { + after 0.1 s: event(); + } +} + +attribute immediate_after is write_only_attr { + param type = "n"; + method event() { + log info, 1: "Immediate after triggered"; + } + method set(attr_value_t val) throws default { + after: event(); + } +} + +implement signal { + method signal_raise() { + log info, 1: "Interface method called"; + } + method signal_lower() { + // noop + } +} + + +// port insig is signal_port; +// connect outsig is signal_connect; + +// attribute test_outsig is (pseudo_attr, bool_attr) { +// method set(attr_value_t value) throws { +// default(value); +// if (this.val) +// outsig.set_level(1); +// else +// outsig.set_level(0); +// } +// } diff --git a/test/1.4/misc/thread_aware.py b/test/1.4/misc/thread_aware.py new file mode 100644 index 00000000..26455eb4 --- /dev/null +++ b/test/1.4/misc/thread_aware.py @@ -0,0 +1,65 @@ +# © 2024 Intel Corporation +# SPDX-License-Identifier: MPL-2.0 + +import stest + +cpu1 = SIM_create_object("clock", "clock1", freq_mhz=1) +cpu2 = SIM_create_object("clock", "clock2", freq_mhz=1, cell=cpu1.cell) + +buddy = SIM_create_object("test", "buddy") +obj.queue = cpu1 +obj.buddy = buddy +buddy.queue = cpu2 + +SIM_run_command('enable-multicore-accelerator') +for cpu in (cpu1, cpu2): + stest.expect_equal(cpu.multicore_accelerator_enabled, True) + +for dev in (obj, buddy): + stest.expect_equal(dev.iface.concurrency_mode.current_mode(), + Sim_Concurrency_Mode_Serialized_Memory) + +obj.setup_main = None +buddy.setup_buddy = None + +SIM_continue(1) +stest.expect_equal(obj.shared_state_attr, 3) + + +# # stest.expect_equal(obj.count, 0) + +# obj.a = None + +# # stest.expect_equal(obj.count, 1) + +# local = obj.a + +# # stest.expect_equal(obj.count, 2) + +# obj.ev = None + +# # stest.expect_equal(obj.count, 3) + +# SIM_continue(100000) + +# # stest.expect_equal(obj.count, 4) + +# obj.iface.signal.signal_raise() + +# # stest.expect_equal(obj.count, 5) + +# SIM_notify(obj, SIM_notifier_type("exported-entry")) + +# # stest.expect_equal(obj.count, 6) + +# SIM_notify(obj, SIM_notifier_type("statically-exported-entry")) + +# # stest.expect_equal(obj.count, 7) + +# obj.immediate_after = None + +# # stest.expect_equal(obj.count, 8) + +# SIM_process_pending_work() + +# # stest.expect_equal(obj.count, 9) diff --git a/test/tests.py b/test/tests.py index 96564af0..65a56d4c 100644 --- a/test/tests.py +++ b/test/tests.py @@ -193,7 +193,9 @@ class DMLFileTestCase(BaseTestCase): 'simics_stderr', 'extraenv', # Extra environment variables - 'status' # Expected status + 'status', # Expected status + + 'thread_safe', # If the device module is considered thread-safe ) def __init__(self, fullname, filename, **info): BaseTestCase.__init__(self, fullname) @@ -205,6 +207,7 @@ def __init__(self, fullname, filename, **info): self.cc_extraargs = [] self.status = 0 self.extraenv = {} + self.thread_safe = False # Override defaults for k,v in info.items(): setattr(self, k, v) @@ -649,7 +652,7 @@ class options(object): cpumod = None date = None product = None - thread_safe = "no" + thread_safe = "yes" if self.thread_safe else "no" host_type = host_type() py_version = None py_iface_lists = [] @@ -1077,6 +1080,11 @@ def run_cc(self, cc_extraargs): status = 2, dmlc_extraargs = ["--werror"])) +all_tests.append(CTestCase( + ["1.4", "misc", "thread_aware"], + join(testdir, "1.4", "misc", "thread_aware.dml"), + thread_safe=True)) + if get_simics_major() == "6": all_tests.append(CTestCase( ["1.2", "errors", "WREF"],