Skip to content

Commit 10cdcbc

Browse files
authored
Merge pull request #103 from yy214123/direct-mapped-cache
Optimize the instruction fetch path
2 parents 55af56c + 7658a3f commit 10cdcbc

File tree

3 files changed

+143
-25
lines changed

3 files changed

+143
-25
lines changed

main.c

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,8 +1024,24 @@ static void print_mmu_cache_stats(vm_t *vm)
10241024
fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
10251025
for (uint32_t i = 0; i < vm->n_hart; i++) {
10261026
hart_t *hart = vm->hart[i];
1027-
uint64_t fetch_total =
1028-
hart->cache_fetch.hits + hart->cache_fetch.misses;
1027+
1028+
/* Combine 2-entry TLB statistics */
1029+
uint64_t fetch_hits_tlb = 0, fetch_misses_tlb = 0;
1030+
fetch_hits_tlb =
1031+
hart->cache_fetch[0].tlb_hits + hart->cache_fetch[1].tlb_hits;
1032+
fetch_misses_tlb =
1033+
hart->cache_fetch[0].tlb_misses + hart->cache_fetch[1].tlb_misses;
1034+
uint64_t tlb_total = fetch_hits_tlb + fetch_misses_tlb;
1035+
1036+
/* Combine I-cache statistics */
1037+
uint64_t fetch_hits_icache = 0, fetch_misses_icache = 0;
1038+
fetch_hits_icache =
1039+
hart->cache_fetch[0].icache_hits + hart->cache_fetch[1].icache_hits;
1040+
fetch_misses_icache = hart->cache_fetch[0].icache_misses +
1041+
hart->cache_fetch[1].icache_misses;
1042+
1043+
uint64_t access_total =
1044+
hart->cache_fetch[0].total_fetch + hart->cache_fetch[1].total_fetch;
10291045

10301046
/* Combine 8-set × 2-way load cache statistics */
10311047
uint64_t load_hits = 0, load_misses = 0;
@@ -1048,13 +1064,24 @@ static void print_mmu_cache_stats(vm_t *vm)
10481064
uint64_t store_total = store_hits + store_misses;
10491065

10501066
fprintf(stderr, "\nHart %u:\n", i);
1051-
fprintf(stderr, " Fetch: %12llu hits, %12llu misses",
1052-
hart->cache_fetch.hits, hart->cache_fetch.misses);
1053-
if (fetch_total > 0)
1054-
fprintf(stderr, " (%.2f%% hit rate)",
1055-
100.0 * hart->cache_fetch.hits / fetch_total);
1056-
fprintf(stderr, "\n");
1057-
1067+
fprintf(stderr, "\n=== Introduction Cache Statistics ===\n");
1068+
fprintf(stderr, " Total access: %12llu\n", access_total);
1069+
if (access_total > 0) {
1070+
fprintf(stderr, " Icache hits: %12llu (%.2f%%)\n",
1071+
fetch_hits_icache,
1072+
(fetch_hits_icache * 100.0) / access_total);
1073+
1074+
fprintf(stderr, " Icache misses: %12llu (%.2f%%)\n",
1075+
fetch_misses_icache,
1076+
(fetch_misses_icache * 100.0) / access_total);
1077+
}
1078+
if (tlb_total > 0) {
1079+
fprintf(stderr, " ├ TLB hits: %12llu (%.2f%%)\n",
1080+
fetch_hits_tlb, (fetch_hits_tlb * 100.0) / (tlb_total));
1081+
fprintf(stderr, " └ TLB misses: %12llu (%.2f%%)\n",
1082+
fetch_misses_tlb, (fetch_misses_tlb * 100.0) / (tlb_total));
1083+
}
1084+
fprintf(stderr, "\n=== Data Cache Statistics ===\n");
10581085
fprintf(stderr, " Load: %12llu hits, %12llu misses (8x2)", load_hits,
10591086
load_misses);
10601087
if (load_total > 0)

riscv.c

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <stdio.h>
2+
#include <string.h>
23

34
#include "common.h"
45
#include "device.h"
@@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn)
180181
return vm->x_regs[decode_rs2(insn)];
181182
}
182183

184+
static inline void icache_invalidate_all(hart_t *vm)
185+
{
186+
memset(&vm->icache, 0, sizeof(vm->icache));
187+
}
188+
183189
/* virtual addressing */
184190

185191
void mmu_invalidate(hart_t *vm)
186192
{
187-
vm->cache_fetch.n_pages = 0xFFFFFFFF;
193+
vm->cache_fetch[0].n_pages = 0xFFFFFFFF;
194+
vm->cache_fetch[1].n_pages = 0xFFFFFFFF;
188195
/* Invalidate all 8 sets × 2 ways for load cache */
189196
for (int set = 0; set < 8; set++) {
190197
for (int way = 0; way < 2; way++)
@@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm)
197204
vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF;
198205
vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */
199206
}
207+
icache_invalidate_all(vm);
200208
}
201209

202210
/* Invalidate MMU caches for a specific virtual address range.
@@ -226,10 +234,24 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
226234
end_addr = UINT32_MAX;
227235
uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;
228236

229-
/* Cache invalidation for fetch cache */
230-
if (vm->cache_fetch.n_pages >= start_vpn &&
231-
vm->cache_fetch.n_pages <= end_vpn)
232-
vm->cache_fetch.n_pages = 0xFFFFFFFF;
237+
/* Invalidate fetch cache: 2 entry */
238+
for (int i = 0; i < 2; i++) {
239+
if (vm->cache_fetch[i].n_pages >= start_vpn &&
240+
vm->cache_fetch[i].n_pages <= end_vpn)
241+
vm->cache_fetch[i].n_pages = 0xFFFFFFFF;
242+
}
243+
244+
/* Invalidate I-cache: 256 blocks */
245+
for (int i = 0; i < ICACHE_BLOCKS; i++) {
246+
icache_block_t *blk = &vm->icache.block[i];
247+
if (!blk->valid)
248+
continue;
249+
250+
uint32_t icache_vpn = (blk->tag << ICACHE_INDEX_BITS) | i;
251+
icache_vpn >>= (RV_PAGE_SHIFT - ICACHE_OFFSET_BITS);
252+
if (icache_vpn >= start_vpn && icache_vpn <= end_vpn)
253+
blk->valid = false;
254+
}
233255

234256
/* Invalidate load cache: 8 sets × 2 ways */
235257
for (int set = 0; set < 8; set++) {
@@ -361,10 +383,38 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED)
361383

362384
static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
363385
{
386+
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
387+
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
388+
icache_block_t *blk = &vm->icache.block[idx];
364389
uint32_t vpn = addr >> RV_PAGE_SHIFT;
365-
if (unlikely(vpn != vm->cache_fetch.n_pages)) {
390+
uint32_t index = __builtin_parity(vpn) & 0x1;
391+
392+
#ifdef MMU_CACHE_STATS
393+
vm->cache_fetch[index].total_fetch++;
394+
#endif
395+
396+
/* I-cache lookup */
397+
if (likely(blk->valid && blk->tag == tag)) {
398+
/* I-cache hit */
399+
#ifdef MMU_CACHE_STATS
400+
vm->cache_fetch[index].icache_hits++;
401+
#endif
402+
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
403+
*value = *(const uint32_t *) (blk->base + ofs);
404+
return;
405+
}
406+
/* I-cache miss */
407+
else {
408+
#ifdef MMU_CACHE_STATS
409+
vm->cache_fetch[index].icache_misses++;
410+
#endif
411+
}
412+
413+
/* I-cache miss, 2-entry TLB lookup */
414+
if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
415+
/* TLB miss */
366416
#ifdef MMU_CACHE_STATS
367-
vm->cache_fetch.misses++;
417+
vm->cache_fetch[index].tlb_misses++;
368418
#endif
369419
mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
370420
RV_EXC_FETCH_PFAULT);
@@ -374,15 +424,24 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
374424
vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr);
375425
if (vm->error)
376426
return;
377-
vm->cache_fetch.n_pages = vpn;
378-
vm->cache_fetch.page_addr = page_addr;
427+
vm->cache_fetch[index].n_pages = vpn;
428+
vm->cache_fetch[index].page_addr = page_addr;
379429
}
380-
#ifdef MMU_CACHE_STATS
430+
/* TLB hit */
381431
else {
382-
vm->cache_fetch.hits++;
383-
}
432+
#ifdef MMU_CACHE_STATS
433+
vm->cache_fetch[index].tlb_hits++;
384434
#endif
385-
*value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
435+
}
436+
437+
*value =
438+
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
439+
440+
/* fill into the I-cache */
441+
uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
442+
blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
443+
blk->tag = tag;
444+
blk->valid = true;
386445
}
387446

388447
static void mmu_load(hart_t *vm,

riscv.h

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ typedef struct {
3636
uint32_t n_pages;
3737
uint32_t *page_addr;
3838
#ifdef MMU_CACHE_STATS
39-
uint64_t hits;
40-
uint64_t misses;
39+
uint64_t total_fetch;
40+
uint64_t tlb_hits, tlb_misses;
41+
uint64_t icache_hits, icache_misses;
4142
#endif
4243
} mmu_fetch_cache_t;
4344

@@ -75,7 +76,37 @@ typedef struct {
7576
typedef struct __hart_internal hart_t;
7677
typedef struct __vm_internel vm_t;
7778

79+
/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line).
80+
* ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache.
81+
*
82+
* The cache address is decomposed into [ tag | index | offset ] fields:
83+
* - block-offset bits = log2(ICACHE_BLOCKS_SIZE)
84+
* - index bits = log2(ICACHE_BLOCKS)
85+
*/
86+
#define ICACHE_BLOCKS_SIZE 256
87+
#define ICACHE_BLOCKS 256
88+
#define ICACHE_OFFSET_BITS 8
89+
#define ICACHE_INDEX_BITS 8
90+
91+
/* For power-of-two sizes, (size - 1) sets all low bits to 1,
92+
* allowing fast extraction of an address.
93+
*/
94+
#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1)
95+
#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1)
96+
#define RV_PAGE_MASK (RV_PAGE_SIZE - 1)
97+
98+
typedef struct {
99+
uint32_t tag;
100+
const uint8_t *base;
101+
bool valid;
102+
} icache_block_t;
103+
104+
typedef struct {
105+
icache_block_t block[ICACHE_BLOCKS];
106+
} icache_t;
107+
78108
struct __hart_internal {
109+
icache_t icache;
79110
uint32_t x_regs[32];
80111

81112
/* LR reservation virtual address. last bit is 1 if valid */
@@ -106,7 +137,8 @@ struct __hart_internal {
106137
*/
107138
uint32_t exc_cause, exc_val;
108139

109-
mmu_fetch_cache_t cache_fetch;
140+
/* 2-entry direct-mapped with hash-based indexing */
141+
mmu_fetch_cache_t cache_fetch[2];
110142
/* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
111143
mmu_cache_set_t cache_load[8];
112144
/* 8-set × 2-way set-associative cache for store operations */

0 commit comments

Comments
 (0)