diff --git a/RELEASENOTES-1.4.docu b/RELEASENOTES-1.4.docu
index 592e86d75..1fd00839e 100644
--- a/RELEASENOTES-1.4.docu
+++ b/RELEASENOTES-1.4.docu
@@ -521,4 +521,6 @@
This fix is only enabled by default with Simics API version 7 or above.
With version 6 or below it must be explicitly enabled by passing
--no-compat=shared_logs_on_device to DMLC.
+ Improved the run-time performance
+ of accesses to registers with many fields.
diff --git a/lib/1.4/dml-builtins.dml b/lib/1.4/dml-builtins.dml
index 91747a226..46fea4424 100644
--- a/lib/1.4/dml-builtins.dml
+++ b/lib/1.4/dml-builtins.dml
@@ -21,6 +21,7 @@ import "simics/model-iface/register-view.dml";
import "simics/model-iface/register-view-read-only.dml";
import "simics/model-iface/transaction.dml";
import "simics/util/hashtab.dml";
+import "simics/util/bitcount.dml";
import "simics/simulator/python.dml";
import "simics/simulator/sim-get-class.dml";
@@ -2825,55 +2826,23 @@ template register is (_conf_attribute, get, set, shown_desc,
// fills in the passed array with all fields in this register, and returns
// the number of elements and a bitmask showing bits not covered by fields.
- shared method _get_all_fields(field *fields) -> (int, uint64) /* n, unmapped */ {
+ shared method _get_get_fields(_get_field *fields) -> (int, uint64) /* n, unmapped */ {
local uint64 unmapped;
- unmapped[this.bitsize-1:0] = -1;
+ unmapped[this.bitsize - 1 : 0 ] = -1;
local int n = 0;
- // TODO: it is inefficient to re-generate the field list each call
- foreach f in (this.fields) {
+ local uint64 lsbs = 0;
+
+ // counting sort
+ foreach f in (_get_fields) {
local int lsb = f.lsb;
- // sort 'fields' by lsb
- local int i;
- for (i = n; i > 0; --i) {
- local int next_lsb = fields[i - 1].lsb;
- if (next_lsb > lsb) {
- fields[i] = fields[i - 1];
- } else {
- break;
- }
- }
- fields[i] = f;
+ lsbs[lsb] = 1;
unmapped[lsb + f.bitsize - 1 : lsb] = 0;
++n;
}
- assert n <= 64;
- return (n, unmapped);
- }
- // fills in the passed array with all fields in this register, and returns
- // the number of elements and a bitmask showing bits not covered by fields.
- shared method _get_get_fields(_get_field *fields) -> (int, uint64) /* n, unmapped */ {
- local uint64 unmapped;
- unmapped[this.bitsize-1:0] = -1;
- local int n = 0;
- // TODO: it is inefficient to re-generate the field list each call
- foreach f in (this._get_fields) {
- local int lsb = f.lsb;
- // sort 'fields' by lsb
- local int i;
- for (i = n; i > 0; --i) {
- local int next_lsb = fields[i - 1].lsb;
- if (next_lsb > lsb) {
- fields[i] = fields[i - 1];
- } else {
- break;
- }
- }
- fields[i] = f;
- unmapped[lsb + f.bitsize - 1 : lsb] = 0;
- ++n;
+ foreach f in (_get_fields) {
+ fields[bit_count64(lsbs[f.lsb : 0]) - 1] = f;
}
- assert n <= 64;
return (n, unmapped);
}
@@ -2881,26 +2850,21 @@ template register is (_conf_attribute, get, set, shown_desc,
// the number of elements and a bitmask showing bits not covered by fields.
shared method _get_set_fields(_set_field *fields) -> (int, uint64) /* n, unmapped */ {
local uint64 unmapped;
- unmapped[this.bitsize-1:0] = -1;
+ unmapped[this.bitsize - 1 : 0 ] = -1;
local int n = 0;
- // TODO: it is inefficient to re-generate the field list each call
- foreach f in (this._set_fields) {
+ local uint64 lsbs = 0;
+
+ // counting sort
+ foreach f in (_set_fields) {
local int lsb = f.lsb;
- // sort 'fields' by lsb
- local int i;
- for (i = n; i > 0; --i) {
- local int next_lsb = fields[i - 1].lsb;
- if (next_lsb > lsb) {
- fields[i] = fields[i - 1];
- } else {
- break;
- }
- }
- fields[i] = f;
+ lsbs[lsb] = 1;
unmapped[lsb + f.bitsize - 1 : lsb] = 0;
++n;
}
- assert n <= 64;
+
+ foreach f in (_set_fields) {
+ fields[bit_count64(lsbs[f.lsb : 0]) - 1] = f;
+ }
return (n, unmapped);
}
@@ -2908,26 +2872,21 @@ template register is (_conf_attribute, get, set, shown_desc,
// the number of elements and a bitmask showing bits not covered by fields.
shared method _get_write_fields(_write_field *fields) -> (int, uint64) /* n, unmapped */ {
local uint64 unmapped;
- unmapped[this.bitsize-1:0] = -1;
+ unmapped[this.bitsize - 1 : 0 ] = -1;
local int n = 0;
- // TODO: it is inefficient to re-generate the field list each call
- foreach f in (this._write_fields) {
+ local uint64 lsbs = 0;
+
+ // counting sort
+ foreach f in (_write_fields) {
local int lsb = f.lsb;
- // sort 'fields' by lsb
- local int i;
- for (i = n; i > 0; --i) {
- local int next_lsb = fields[i - 1].lsb;
- if (next_lsb > lsb) {
- fields[i] = fields[i - 1];
- } else {
- break;
- }
- }
- fields[i] = f;
+ lsbs[lsb] = 1;
unmapped[lsb + f.bitsize - 1 : lsb] = 0;
++n;
}
- assert n <= 64;
+
+ foreach f in (_write_fields) {
+ fields[bit_count64(lsbs[f.lsb : 0]) - 1] = f;
+ }
return (n, unmapped);
}
@@ -2935,26 +2894,21 @@ template register is (_conf_attribute, get, set, shown_desc,
// the number of elements and a bitmask showing bits not covered by fields.
shared method _get_read_fields(_read_field *fields) -> (int, uint64) /* n, unmapped */ {
local uint64 unmapped;
- unmapped[this.bitsize-1:0] = -1;
+ unmapped[this.bitsize - 1 : 0 ] = -1;
local int n = 0;
- // TODO: it is inefficient to re-generate the field list each call
- foreach f in (this._read_fields) {
+ local uint64 lsbs = 0;
+
+ // counting sort
+ foreach f in (_read_fields) {
local int lsb = f.lsb;
- // sort 'fields' by lsb
- local int i;
- for (i = n; i > 0; --i) {
- local int next_lsb = fields[i - 1].lsb;
- if (next_lsb > lsb) {
- fields[i] = fields[i - 1];
- } else {
- break;
- }
- }
- fields[i] = f;
+ lsbs[lsb] = 1;
unmapped[lsb + f.bitsize - 1 : lsb] = 0;
++n;
}
- assert n <= 64;
+
+ foreach f in (_read_fields) {
+ fields[bit_count64(lsbs[f.lsb : 0]) - 1] = f;
+ }
return (n, unmapped);
}