Skip to content

Conversation

@Max042004
Copy link

@Max042004 Max042004 commented Nov 7, 2025

Previous commit 78836cf prevents frequently update of rv->csr_cycle by manipulate it with register. However, currently rv->csr_cycle only update after a block end or JIT clearing block map. This results csr cycle instructions cannot fetch up-to-date cycle. If csr cycle instructions are located in a same block, leading to the result cycle count becomes zero.

This commit updates rv->csr_cycle everytime before csr instruction is executed.


Summary by cubic

Synchronizes CSR cycle reads by updating rv->csr_cycle before each CSR instruction, so cycle returns the current value even within the same block. Fixes cases where CSR cycle could read zero or stale counts until block end.

  • Bug Fixes
    • Set rv->csr_cycle = cycle at the start of csrrw, csrrs, csrrc, csrrwi, csrrsi, and csrrci handlers.

Written for commit 3fede2a. Summary will update automatically on new commits.

Copy link
Contributor

@jserv jserv left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarks

Benchmark suite Current: 3fede2a Previous: 3e71dc3 Ratio
Dhrystone 1302 Average DMIPS over 10 runs 1133 Average DMIPS over 10 runs 0.87
Coremark 918.848 Average iterations/sec over 10 runs 920.513 Average iterations/sec over 10 runs 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Copy link

@cubic-dev-ai cubic-dev-ai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No issues found across 1 file

@jserv
Copy link
Contributor

jserv commented Nov 7, 2025

Consider the changes below:

diff --git a/src/emulate.c b/src/emulate.c
index e5e4cdd..caff2d4 100644
--- a/src/emulate.c
+++ b/src/emulate.c
@@ -190,8 +190,19 @@ static uint32_t *csr_get_ptr(riscv_t *rv, uint32_t csr)
  * If rd == x0, then the instruction shall not read the CSR and shall not cause
  * any of the side effects that might occur on a CSR read.
  */
-static uint32_t csr_csrrw(riscv_t *rv, uint32_t csr, uint32_t val)
+static uint32_t csr_csrrw(riscv_t *rv, uint32_t csr, uint32_t val, uint64_t cycle)
 {
+    /* Sync cycle counter for cycle-related CSRs only */
+    switch (csr & 0xFFF) {
+    case CSR_CYCLE:
+    case CSR_CYCLEH:
+    case CSR_INSTRET:
+    case CSR_INSTRETH:
+        if (rv->csr_cycle != cycle)
+            rv->csr_cycle = cycle;
+        break;
+    }
+
     uint32_t *c = csr_get_ptr(rv, csr);
     if (!c)
         return 0;
@@ -222,8 +233,19 @@ static uint32_t csr_csrrw(riscv_t *rv, uint32_t csr, uint32_t val)
 }
 
 /* perform csrrs (atomic read and set) */
-static uint32_t csr_csrrs(riscv_t *rv, uint32_t csr, uint32_t val)
+static uint32_t csr_csrrs(riscv_t *rv, uint32_t csr, uint32_t val, uint64_t cycle)
 {
+    /* Sync cycle counter for cycle-related CSRs only */
+    switch (csr & 0xFFF) {
+    case CSR_CYCLE:
+    case CSR_CYCLEH:
+    case CSR_INSTRET:
+    case CSR_INSTRETH:
+        if (rv->csr_cycle != cycle)
+            rv->csr_cycle = cycle;
+        break;
+    }
+
     uint32_t *c = csr_get_ptr(rv, csr);
     if (!c)
         return 0;
@@ -243,8 +265,19 @@ static uint32_t csr_csrrs(riscv_t *rv, uint32_t csr, uint32_t val)
  * Read old value of CSR, zero-extend to XLEN bits, write to rd.
  * Read value from rs1, use as bit mask to clear bits in CSR.
  */
-static uint32_t csr_csrrc(riscv_t *rv, uint32_t csr, uint32_t val)
+static uint32_t csr_csrrc(riscv_t *rv, uint32_t csr, uint32_t val, uint64_t cycle)
 {
+    /* Sync cycle counter for cycle-related CSRs only */
+    switch (csr & 0xFFF) {
+    case CSR_CYCLE:
+    case CSR_CYCLEH:
+    case CSR_INSTRET:
+    case CSR_INSTRETH:
+        if (rv->csr_cycle != cycle)
+            rv->csr_cycle = cycle;
+        break;
+    }
+
     uint32_t *c = csr_get_ptr(rv, csr);
     if (!c)
         return 0;
diff --git a/src/rv32_template.c b/src/rv32_template.c
index 6b46f36..9a4f913 100644
--- a/src/rv32_template.c
+++ b/src/rv32_template.c
@@ -1216,8 +1216,7 @@ RVOP(
 RVOP(
     csrrw,
     {
-        rv->csr_cycle = cycle;
-        uint32_t tmp = csr_csrrw(rv, ir->imm, rv->X[ir->rs1]);
+        uint32_t tmp = csr_csrrw(rv, ir->imm, rv->X[ir->rs1], cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({
@@ -1236,9 +1235,8 @@ RVOP(
 RVOP(
     csrrs,
     {
-        rv->csr_cycle = cycle;
         uint32_t tmp = csr_csrrs(
-            rv, ir->imm, (ir->rs1 == rv_reg_zero) ? 0U : rv->X[ir->rs1]);
+            rv, ir->imm, (ir->rs1 == rv_reg_zero) ? 0U : rv->X[ir->rs1], cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({
@@ -1249,9 +1247,8 @@ RVOP(
 RVOP(
     csrrc,
     {
-        rv->csr_cycle = cycle;
         uint32_t tmp = csr_csrrc(
-            rv, ir->imm, (ir->rs1 == rv_reg_zero) ? 0U : rv->X[ir->rs1]);
+            rv, ir->imm, (ir->rs1 == rv_reg_zero) ? 0U : rv->X[ir->rs1], cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({
@@ -1262,8 +1259,7 @@ RVOP(
 RVOP(
     csrrwi,
     {
-        rv->csr_cycle = cycle;
-        uint32_t tmp = csr_csrrw(rv, ir->imm, ir->rs1);
+        uint32_t tmp = csr_csrrw(rv, ir->imm, ir->rs1, cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({
@@ -1274,8 +1270,7 @@ RVOP(
 RVOP(
     csrrsi,
     {
-        rv->csr_cycle = cycle;
-        uint32_t tmp = csr_csrrs(rv, ir->imm, ir->rs1);
+        uint32_t tmp = csr_csrrs(rv, ir->imm, ir->rs1, cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({
@@ -1286,8 +1281,7 @@ RVOP(
 RVOP(
     csrrci,
     {
-        rv->csr_cycle = cycle;
-        uint32_t tmp = csr_csrrc(rv, ir->imm, ir->rs1);
+        uint32_t tmp = csr_csrrc(rv, ir->imm, ir->rs1, cycle);
         rv->X[ir->rd] = ir->rd ? tmp : rv->X[ir->rd];
     },
     GEN({

The above eliminates a fundamental defect: the original PR writes the same sync logic six times, creating six potential failure points. When you modify sync behavior, you must find and update six scattered locations. Miss one and you have a silent bug that only appears with specific instruction sequences. It writes the logic once in each of three helper functions, cutting failure points in half and grouping all sync logic in adjacent functions visible.

@jserv jserv requested a review from vacantron November 7, 2025 15:20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants