diff --git a/.gitignore b/.gitignore index 2d6a863fe..ab8faab70 100644 --- a/.gitignore +++ b/.gitignore @@ -350,3 +350,4 @@ fpga/.Xil/ # vscode temp file .vscode/ .bloop +.metals/ \ No newline at end of file diff --git a/.metals/metals.lock.db b/.metals/metals.lock.db new file mode 100644 index 000000000..cb74576f7 --- /dev/null +++ b/.metals/metals.lock.db @@ -0,0 +1,6 @@ +#FileLock +#Mon Oct 24 22:46:26 CST 2022 +server=localhost\:42597 +hostName=localhost +method=file +id=1840a758cb97dd43e099648973046290a4902dd73eb diff --git a/.metals/metals.mv.db b/.metals/metals.mv.db new file mode 100644 index 000000000..0cb6c4190 Binary files /dev/null and b/.metals/metals.mv.db differ diff --git a/src/main/scala/bus/simplebus/SimpleBus.scala b/src/main/scala/bus/simplebus/SimpleBus.scala index 320072e78..107172223 100644 --- a/src/main/scala/bus/simplebus/SimpleBus.scala +++ b/src/main/scala/bus/simplebus/SimpleBus.scala @@ -24,7 +24,7 @@ import utils._ import bus.axi4._ import bus.memport._ -sealed abstract class SimpleBusBundle extends Bundle with HasNutCoreParameter +abstract class SimpleBusBundle extends Bundle with HasNutCoreParameter object SimpleBusCmd { // req diff --git a/src/main/scala/nutcore/Bundle.scala b/src/main/scala/nutcore/Bundle.scala index 4193c191c..65c73fb43 100644 --- a/src/main/scala/nutcore/Bundle.scala +++ b/src/main/scala/nutcore/Bundle.scala @@ -19,6 +19,8 @@ package nutcore import chisel3._ import chisel3.util._ +import nutcore.backend._ + class CtrlSignalIO extends NutCoreBundle { val src1Type = Output(SrcType()) val src2Type = Output(SrcType()) diff --git a/src/main/scala/nutcore/NutCore.scala b/src/main/scala/nutcore/NutCore.scala index d3ba936cd..6f3e1a4c5 100644 --- a/src/main/scala/nutcore/NutCore.scala +++ b/src/main/scala/nutcore/NutCore.scala @@ -20,6 +20,12 @@ import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore.frontend._ +import nutcore.backend._ + +import nutcore.mem.cache._ +import nutcore.mem.tlb._ + import bus.simplebus._ import bus.axi4._ import utils._ @@ -98,15 +104,15 @@ class NutCore(implicit val p: NutCoreConfig) extends NutCoreModule { // Frontend val frontend = (Settings.get("IsRV32"), Settings.get("EnableOutOfOrderExec")) match { - case (true, _) => Module(new Frontend_embedded) - case (false, true) => Module(new Frontend_ooo) - case (false, false) => Module(new Frontend_inorder) + case (true, _) => Module(new FrontendEmbedded) + case (false, true) => Module(new FrontendDynamic) + case (false, false) => Module(new FrontendSequential) } // Backend if (EnableOutOfOrderExec) { val mmioXbar = Module(new SimpleBusCrossbarNto1(if (HasDcache) 2 else 3)) - val backend = Module(new Backend_ooo) + val backend = Module(new BackendDynamic) PipelineVector2Connect(new DecodeIO, frontend.io.out(0), frontend.io.out(1), backend.io.in(0), backend.io.in(1), frontend.io.flushVec(1), 16) backend.io.flush := frontend.io.flushVec(2) frontend.io.redirect <> backend.io.redirect @@ -143,7 +149,7 @@ class NutCore(implicit val p: NutCoreConfig) extends NutCoreModule { io.mmio <> mmioXbar.io.out } else { - val backend = Module(new Backend_inorder) + val backend = Module(new BackendSequential) PipelineVector2Connect(new DecodeIO, frontend.io.out(0), frontend.io.out(1), backend.io.in(0), backend.io.in(1), frontend.io.flushVec(1), 4) diff --git a/src/main/scala/nutcore/backend/BackendCommons.scala b/src/main/scala/nutcore/backend/BackendCommons.scala new file mode 100644 index 000000000..9c7c5c9ef --- /dev/null +++ b/src/main/scala/nutcore/backend/BackendCommons.scala @@ -0,0 +1,43 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.backend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import bus.simplebus._ +import difftest._ + +trait HasBackendConst{ + // val multiIssue = true + val robSize = 16 + val robWidth = 2 + val robInstCapacity = robSize * robWidth + val checkpointSize = 4 // register map checkpoint size + val brTagWidth = log2Up(checkpointSize) + val prfAddrWidth = log2Up(robSize) + log2Up(robWidth) // physical rf addr width + + val DispatchWidth = 2 + val CommitWidth = 2 + val RetireWidth = 2 + + val enableCheckpoint = true +} diff --git a/src/main/scala/nutcore/backend/ooo/Backend.scala b/src/main/scala/nutcore/backend/dynamic/BackendDynamic.scala similarity index 92% rename from src/main/scala/nutcore/backend/ooo/Backend.scala rename to src/main/scala/nutcore/backend/dynamic/BackendDynamic.scala index 9655c85a8..d8a7a893e 100644 --- a/src/main/scala/nutcore/backend/ooo/Backend.scala +++ b/src/main/scala/nutcore/backend/dynamic/BackendDynamic.scala @@ -1,47 +1,19 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import bus.simplebus._ import difftest._ -trait HasBackendConst{ - // val multiIssue = true - val robSize = 16 - val robWidth = 2 - val robInstCapacity = robSize * robWidth - val checkpointSize = 4 // register map checkpoint size - val brTagWidth = log2Up(checkpointSize) - val prfAddrWidth = log2Up(robSize) + log2Up(robWidth) // physical rf addr width - - val DispatchWidth = 2 - val CommitWidth = 2 - val RetireWidth = 2 - - val enableCheckpoint = true -} // NutShell/Argo Out Of Order Execution Backend -class Backend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasRegFileParameter with HasBackendConst{ +class BackendDynamic(implicit val p: NutCoreConfig) extends NutCoreModule with HasRegFileParameter with HasBackendConst{ val io = IO(new Bundle { // EXU @@ -666,35 +638,3 @@ class Backend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasR } } - -class Backend_inorder(implicit val p: NutCoreConfig) extends NutCoreModule { - val io = IO(new Bundle { - val in = Vec(2, Flipped(Decoupled(new DecodeIO))) - val flush = Input(UInt(2.W)) - val dmem = new SimpleBusUC(addrBits = VAddrBits) - val memMMU = Flipped(new MemMMUIO) - - val redirect = new RedirectIO - }) - - val isu = Module(new ISU) - val exu = Module(new EXU) - val wbu = Module(new WBU) - - PipelineConnect(isu.io.out, exu.io.in, exu.io.out.fire(), io.flush(0)) - PipelineConnect(exu.io.out, wbu.io.in, true.B, io.flush(1)) - - isu.io.in <> io.in - - isu.io.flush := io.flush(0) - exu.io.flush := io.flush(1) - - isu.io.wb <> wbu.io.wb - io.redirect <> wbu.io.redirect - // forward - isu.io.forward <> exu.io.forward - - io.memMMU.imem <> exu.io.memMMU.imem - io.memMMU.dmem <> exu.io.memMMU.dmem - io.dmem <> exu.io.dmem -} \ No newline at end of file diff --git a/src/main/scala/nutcore/backend/ooo/EP.scala b/src/main/scala/nutcore/backend/dynamic/EP.scala similarity index 93% rename from src/main/scala/nutcore/backend/ooo/EP.scala rename to src/main/scala/nutcore/backend/dynamic/EP.scala index 94293f9c4..d666d283e 100644 --- a/src/main/scala/nutcore/backend/ooo/EP.scala +++ b/src/main/scala/nutcore/backend/dynamic/EP.scala @@ -1,9 +1,12 @@ -package nutcore + +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ // Out of Order Execution Pipeline for NutShell/Argo diff --git a/src/main/scala/nutcore/backend/ooo/ROB.scala b/src/main/scala/nutcore/backend/dynamic/ROB.scala similarity index 99% rename from src/main/scala/nutcore/backend/ooo/ROB.scala rename to src/main/scala/nutcore/backend/dynamic/ROB.scala index 615c9b94b..a5bd3801f 100644 --- a/src/main/scala/nutcore/backend/ooo/ROB.scala +++ b/src/main/scala/nutcore/backend/dynamic/ROB.scala @@ -14,15 +14,18 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import difftest._ + object physicalRFTools{ def getPRFAddr(robIndex: UInt, bank: UInt): UInt = { Cat(robIndex, bank(0)) @@ -110,7 +113,7 @@ class ROB(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType val rmtMap = Reg(Vec(NRReg, UInt(prfAddrWidth.W))) val rmtValid = RegInit(VecInit(Seq.fill(NRReg)(false.B))) - sealed class Checkpoint extends NutCoreBundle { + class Checkpoint extends NutCoreBundle { val map = Vec(NRReg, UInt(prfAddrWidth.W)) val valid = Vec(NRReg, Bool()) } diff --git a/src/main/scala/nutcore/backend/ooo/RS.scala b/src/main/scala/nutcore/backend/dynamic/RS.scala similarity index 99% rename from src/main/scala/nutcore/backend/ooo/RS.scala rename to src/main/scala/nutcore/backend/dynamic/RS.scala index 54ad1298f..10675c8e3 100644 --- a/src/main/scala/nutcore/backend/ooo/RS.scala +++ b/src/main/scala/nutcore/backend/dynamic/RS.scala @@ -14,12 +14,14 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ trait HasRSConst{ diff --git a/src/main/scala/nutcore/backend/fu/ALU.scala b/src/main/scala/nutcore/backend/fu/ALU.scala index 7b506c1e0..5071e5326 100644 --- a/src/main/scala/nutcore/backend/fu/ALU.scala +++ b/src/main/scala/nutcore/backend/fu/ALU.scala @@ -20,6 +20,8 @@ import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore.frontend.instr_fetch.branch_predict._ + import utils._ import difftest._ import top.Settings diff --git a/src/main/scala/nutcore/backend/sequential/BackendSequential.scala b/src/main/scala/nutcore/backend/sequential/BackendSequential.scala new file mode 100644 index 000000000..4348ba248 --- /dev/null +++ b/src/main/scala/nutcore/backend/sequential/BackendSequential.scala @@ -0,0 +1,44 @@ + +package nutcore.backend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import bus.simplebus._ +import difftest._ + +class BackendSequential(implicit val p: NutCoreConfig) extends NutCoreModule { + val io = IO(new Bundle { + val in = Vec(2, Flipped(Decoupled(new DecodeIO))) + val flush = Input(UInt(2.W)) + val dmem = new SimpleBusUC(addrBits = VAddrBits) + val memMMU = Flipped(new MemMMUIO) + + val redirect = new RedirectIO + }) + + val isu = Module(new ISU) + val exu = Module(new EXU) + val wbu = Module(new WBU) + + PipelineConnect(isu.io.out, exu.io.in, exu.io.out.fire(), io.flush(0)) + PipelineConnect(exu.io.out, wbu.io.in, true.B, io.flush(1)) + + isu.io.in <> io.in + + isu.io.flush := io.flush(0) + exu.io.flush := io.flush(1) + + isu.io.wb <> wbu.io.wb + io.redirect <> wbu.io.redirect + // forward + isu.io.forward <> exu.io.forward + + io.memMMU.imem <> exu.io.memMMU.imem + io.memMMU.dmem <> exu.io.memMMU.dmem + io.dmem <> exu.io.dmem +} diff --git a/src/main/scala/nutcore/backend/seq/EXU.scala b/src/main/scala/nutcore/backend/sequential/EXU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/EXU.scala rename to src/main/scala/nutcore/backend/sequential/EXU.scala diff --git a/src/main/scala/nutcore/backend/seq/ISU.scala b/src/main/scala/nutcore/backend/sequential/ISU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/ISU.scala rename to src/main/scala/nutcore/backend/sequential/ISU.scala diff --git a/src/main/scala/nutcore/backend/seq/WBU.scala b/src/main/scala/nutcore/backend/sequential/WBU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/WBU.scala rename to src/main/scala/nutcore/backend/sequential/WBU.scala diff --git a/src/main/scala/nutcore/frontend/BPU.scala b/src/main/scala/nutcore/frontend/BPU.scala deleted file mode 100644 index 470fe9ab1..000000000 --- a/src/main/scala/nutcore/frontend/BPU.scala +++ /dev/null @@ -1,476 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ -import top.Settings - -class TableAddr(val idxBits: Int) extends NutCoreBundle { - val padLen = if (Settings.get("IsRV32") || !Settings.get("EnableOutOfOrderExec")) 2 else 3 - def tagBits = VAddrBits - padLen - idxBits - - //val res = UInt((AddrBits - VAddrBits).W) - val tag = UInt(tagBits.W) - val idx = UInt(idxBits.W) - val pad = UInt(padLen.W) - - def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) - def getTag(x: UInt) = fromUInt(x).tag - def getIdx(x: UInt) = fromUInt(x).idx -} - -object BTBtype { - def B = "b00".U // branch - def J = "b01".U // jump - def I = "b10".U // indirect - def R = "b11".U // return - - def apply() = UInt(2.W) -} - -class BPUUpdateReq extends NutCoreBundle { - val valid = Output(Bool()) - val pc = Output(UInt(VAddrBits.W)) - val isMissPredict = Output(Bool()) - val actualTarget = Output(UInt(VAddrBits.W)) - val actualTaken = Output(Bool()) // for branch - val fuOpType = Output(FuOpType()) - val btbType = Output(BTBtype()) - val isRVC = Output(Bool()) // for ras, save PC+2 to stack if is RVC -} - -// nextline predicter generates NPC from current NPC in 1 cycle -class BPU_ooo extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - val brIdx = Output(Vec(4, Bool())) - // val target = Output(Vec(4, UInt(VAddrBits.W))) - // val instValid = Output(UInt(4.W)) // now instValid is generated in IFU - val crosslineJump = Output(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb >> 2)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(VAddrBits.W) - val crosslineJump = Bool() - val valid = Bool() - } - - val btb = List.fill(4)(Module(new SRAMTemplate(btbEntry(), set = NRbtb >> 2, shouldReset = true, holdRead = true, singlePort = true))) - // flush BTB when executing fence.i - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - (0 to 3).map(i => (btb(i).reset := reset.asBool || (flushBTB || flushTLB))) - - Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) - - (0 to 3).map(i => (btb(i).io.r.req.valid := io.in.pc.valid)) - (0 to 3).map(i => (btb(i).io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits))) - - - val btbRead = Wire(Vec(4, btbEntry())) - (0 to 3).map(i => (btbRead(i) := btb(i).io.r.resp.data(0))) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = Wire(Vec(4, Bool())) - (0 to 3).map(i => btbHit(i) := btbRead(i).valid && btbRead(i).tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb(i).io.r.req.fire(), init = false.B)) - // btbHit will ignore pc(2,0). pc(2,0) is used to build brIdx - val crosslineJump = btbRead(3).crosslineJump && btbHit(3) && !io.brIdx(0) && !io.brIdx(1) && !io.brIdx(2) - io.crosslineJump := crosslineJump - // val crosslineJumpLatch = RegNext(crosslineJump) - // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) - - // PHT - val pht = List.fill(4)(Mem(NRbtb >> 2, UInt(2.W))) - val phtTaken = Wire(Vec(4, Bool())) - (0 to 3).map(i => (phtTaken(i) := RegEnable(pht(i).read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid))) - - // RAS - val NRras = 16 - val ras = Mem(NRras, UInt(VAddrBits.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - btbWrite.crosslineJump := req.pc(2,1)==="h3".U && !req.isRVC // ((pc_offset % 8) == 6) && inst is 32bit in length - btbWrite.valid := true.B - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - (0 to 3).map(i => btb(i).io.w.req.valid := req.isMissPredict && req.valid && i.U === req.pc(2,1)) - (0 to 3).map(i => btb(i).io.w.req.bits.setIdx := btbAddr.getIdx(req.pc)) - (0 to 3).map(i => btb(i).io.w.req.bits.data := btbWrite) - - val getpht = LookupTree(req.pc(2,1), List.tabulate(4)(i => (i.U -> pht(i).read(btbAddr.getIdx(req.pc))))) - val cnt = RegNext(getpht) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - (0 to 3).map(i => when(i.U === reqLatch.pc(2,1)){pht(i).write(btbAddr.getIdx(reqLatch.pc), newCnt)}) - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - when(sp.value === 0.U) { - // RAS empty, do nothing - } - sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) - } - } - - def genInstValid(pc: UInt) = LookupTree(pc(2,1), List( - "b00".U -> "b1111".U, - "b01".U -> "b1110".U, - "b10".U -> "b1100".U, - "b11".U -> "b1000".U - )) - - val pcLatchValid = genInstValid(pcLatch) - - val target = Wire(Vec(4, UInt(VAddrBits.W))) - (0 to 3).map(i => target(i) := Mux(btbRead(i)._type === BTBtype.R, rasTarget, btbRead(i).target)) - (0 to 3).map(i => io.brIdx(i) := btbHit(i) && pcLatchValid(i).asBool && Mux(btbRead(i)._type === BTBtype.B, phtTaken(i), true.B) && btbRead(i).valid) - io.out.target := PriorityMux(io.brIdx, target) - io.out.valid := io.brIdx.asUInt.orR - io.out.rtype := 0.U - Debug(io.out.valid, "[BPU] pc %x io.brIdx.asUInt %b phtTaken %x %x %x %x valid %x %x %x %x\n", pcLatch, io.brIdx.asUInt, phtTaken(0), phtTaken(1), phtTaken(2), phtTaken(3), btbRead(0).valid, btbRead(1).valid, btbRead(2).valid, btbRead(3).valid) - - // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump - // Note: - // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict - // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline - // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) - // by using `instline`, we mean a 64 bit instfetch result from imem - // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. -} - -class BPU_embedded extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(32.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(32.W) - } - - val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) - btb.io.r.req.valid := io.in.pc.valid - btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) - - val btbRead = Wire(btbEntry()) - btbRead := btb.io.r.resp.data(0) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.ready, init = false.B) - - // PHT - val pht = Mem(NRbtb, UInt(2.W)) - val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) - - // RAS - val NRras = 16 - val ras = Mem(NRras, UInt(32.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - btb.io.w.req.valid := req.isMissPredict && req.valid - btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) - btb.io.w.req.bits.data := btbWrite - - val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, req.pc + 4.U) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - sp.value := sp.value - 1.U - } - } - - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - - io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) - io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) - io.out.rtype := 0.U -} - -class BPU_inorder extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - val brIdx = Output(UInt(3.W)) - val crosslineJump = Output(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(VAddrBits.W) - val brIdx = UInt(3.W) - val valid = Bool() - } - - val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) - // flush BTB when executing fence.i - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - btb.reset := reset.asBool || (flushBTB || flushTLB) - Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) - - btb.io.r.req.valid := io.in.pc.valid - btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) - - - val btbRead = Wire(btbEntry()) - btbRead := btb.io.r.resp.data(0) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = btbRead.valid && btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) - // btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx - // !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case: - // ------------------------------------------------- - // 0 jump rvc // marked as "take branch" in BTB - // 2 xxx rvc <-- pc // misrecognize this instr as "btb hit" with target of previous jump instr - // ------------------------------------------------- - val crosslineJump = btbRead.brIdx(2) && btbHit - io.crosslineJump := crosslineJump - // val crosslineJumpLatch = RegNext(crosslineJump) - // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) - Debug(btbHit, "[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type) - Debug(btbHit, "[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(crosslineJump, Fill(2, io.out.valid))) - // Debug(btbHit, "[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setId) - - // PHT - val pht = Mem(NRbtb, UInt(2.W)) - val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) - - // RAS - - val NRras = 16 - val ras = Mem(NRras, UInt(VAddrBits.W)) - // val raBrIdxs = Mem(NRras, UInt(2.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - // val rasBrIdx = RegEnable(raBrIdxs.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - Debug(req.valid, "[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType) - - //val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U - //when(fflag && GTimer()>2888000.U) { - // Debug("%d\n", GTimer()) - // Debug("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx) - // Debug("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx) - // Debug(p"[BTBHT6] req:${req} \n") - //} - //Debug("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))) - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)) - btbWrite.valid := true.B - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - btb.io.w.req.valid := req.isMissPredict && req.valid - btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) - btb.io.w.req.bits.data := btbWrite - - //Debug(true) { - //when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) { - // Debug("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - //} - - //when (GTimer() > 77437484.U && btb.io.w.req.valid) { - // Debug("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - - val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) - //Debug(){ - //Debug("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt) - //} - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) - // raBrIdxs.write(sp.value + 1.U, Mux(req.pc(1), 2.U, 1.U)) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - when(sp.value === 0.U) { - //Debug("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U - } - sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U - } - } - - io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) - // io.out.target := Mux(crosslineJumpLatch && !flush, crosslineJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target)) - // io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid) - io.brIdx := btbRead.brIdx & Cat(true.B, crosslineJump, Fill(2, io.out.valid)) - io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix - io.out.rtype := 0.U - // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump - // Note: - // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict - // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline - // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) - // by using `instline`, we mean a 64 bit instfetch result from imem - // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. -} - -class DummyPredicter extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val valid = Output(Bool()) - val flush = Input(Bool()) - val ignore = Input(Bool()) - val brIdx = Output(Vec(4, Bool())) - }) - // Note: when io.ignore, io.out.valid must be false.B for this pc - // This limitation is for cross instline inst fetch logic - io.valid := io.in.pc.valid // Predicter is returning a result - io.out.valid := false.B // Need redirect - io.out.target := DontCare // Redirect target - io.out.rtype := DontCare // Predicter does not need to care about it - io.brIdx := VecInit(Seq.fill(4)(false.B)) // Which inst triggers jump -} - -//---- Legacy BPUs ---- -/* -class BPU_nodelay extends NutCoreModule { - val io = IO(new Bundle { - val in = Flipped(Valid(new CtrlFlowIO)) - val out = new RedirectIO - }) - - val instr = io.in.bits.instr - val immJ = SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) - val immB = SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN) - val table = Array( - RV32I_BRUInstr.JAL -> List(immJ, true.B), - RV32I_BRUInstr.BNE -> List(immB, instr(31)), - RV32I_BRUInstr.BEQ -> List(immB, instr(31)), - RV32I_BRUInstr.BLT -> List(immB, instr(31)), - RV32I_BRUInstr.BGE -> List(immB, instr(31)), - RV32I_BRUInstr.BLTU -> List(immB, instr(31)), - RV32I_BRUInstr.BGEU -> List(immB, instr(31)) - ) - val default = List(immB, false.B) - val offset :: predict :: Nil = ListLookup(instr, default, table) - - io.out.target := io.in.bits.pc + offset - io.out.valid := io.in.valid && predict(0) - io.out.rtype := 0.U -} -*/ \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/Frontend.scala b/src/main/scala/nutcore/frontend/Frontend.scala deleted file mode 100644 index f34f160d0..000000000 --- a/src/main/scala/nutcore/frontend/Frontend.scala +++ /dev/null @@ -1,124 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import utils._ -import bus.simplebus._ -import chisel3.experimental.IO - -class FrontendIO(implicit val p: NutCoreConfig) extends Bundle with HasNutCoreConst { - val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) - val out = Vec(2, Decoupled(new DecodeIO)) - val flushVec = Output(UInt(4.W)) - val redirect = Flipped(new RedirectIO) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) -} - - -trait HasFrontendIO { - implicit val p: NutCoreConfig - val io = IO(new FrontendIO) -} - -class Frontend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - def pipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], - isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { - // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 - // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) - right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) - } - - val ifu = Module(new IFU_ooo) - val ibf = Module(new IBF) - val idu = Module(new IDU) - - pipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) - PipelineVector2Connect(new CtrlFlowIO, ibf.io.out(0), ibf.io.out(1), idu.io.in(0), idu.io.in(1), ifu.io.flushVec(1), if (EnableOutOfOrderExec) 8 else 4) - ibf.io.flush := ifu.io.flushVec(1) - - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), ibf:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, - ibf.io.in.valid, ibf.io.in.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(ibf.io.in.valid, "IBF: pc = 0x%x, instr = 0x%x\n", ibf.io.in.bits.pc, ibf.io.in.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) - Debug(idu.io.in(1).valid, "IDU2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(1).bits.pc, idu.io.in(1).bits.instr, idu.io.in(1).bits.pnpc) -} - -class Frontend_embedded(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - val ifu = Module(new IFU_embedded) - val idu = Module(new IDU) - - PipelineConnect(ifu.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(0)) - idu.io.in(1) := DontCare - - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) -} - -class Frontend_inorder(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - val ifu = Module(new IFU_inorder) - val ibf = Module(new NaiveRVCAlignBuffer) - val idu = Module(new IDU) - - def PipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], - isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { - // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 - // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) - right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) - } - - PipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) - PipelineConnect(ibf.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(1)) - idu.io.in(1) := DontCare - - ibf.io.flush := ifu.io.flushVec(1) - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) -} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/FrontendCommons.scala b/src/main/scala/nutcore/frontend/FrontendCommons.scala new file mode 100644 index 000000000..55c7b65b7 --- /dev/null +++ b/src/main/scala/nutcore/frontend/FrontendCommons.scala @@ -0,0 +1,46 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendIO(implicit val p: NutCoreConfig) extends Bundle with HasNutCoreConst { + val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) + val out = Vec(2, Decoupled(new DecodeIO)) + val flushVec = Output(UInt(4.W)) + val redirect = Flipped(new RedirectIO) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) +} + + +trait HasFrontendIO { + implicit val p: NutCoreConfig + val io = IO(new FrontendIO) +} + + diff --git a/src/main/scala/nutcore/frontend/FrontendDynamic.scala b/src/main/scala/nutcore/frontend/FrontendDynamic.scala new file mode 100644 index 000000000..9d47d0cc0 --- /dev/null +++ b/src/main/scala/nutcore/frontend/FrontendDynamic.scala @@ -0,0 +1,48 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendDynamic(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + def pipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], + isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { + // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 + // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) + right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) + } + + val ifu = Module(new InstrFetchDynamic) + val ibf = Module(new IBF) + val idu = Module(new Decode) + + pipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) + PipelineVector2Connect(new CtrlFlowIO, ibf.io.out(0), ibf.io.out(1), idu.io.in(0), idu.io.in(1), ifu.io.flushVec(1), if (EnableOutOfOrderExec) 8 else 4) + ibf.io.flush := ifu.io.flushVec(1) + + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), ibf:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, + ibf.io.in.valid, ibf.io.in.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(ibf.io.in.valid, "IBF: pc = 0x%x, instr = 0x%x\n", ibf.io.in.bits.pc, ibf.io.in.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) + Debug(idu.io.in(1).valid, "IDU2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(1).bits.pc, idu.io.in(1).bits.instr, idu.io.in(1).bits.pnpc) +} + diff --git a/src/main/scala/nutcore/frontend/FrontendEmbedded.scala b/src/main/scala/nutcore/frontend/FrontendEmbedded.scala new file mode 100644 index 000000000..3ad4e710a --- /dev/null +++ b/src/main/scala/nutcore/frontend/FrontendEmbedded.scala @@ -0,0 +1,36 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + + +class FrontendEmbedded(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + val ifu = Module(new InstrFetchEmbedded) + val idu = Module(new Decode) + + PipelineConnect(ifu.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(0)) + idu.io.in(1) := DontCare + + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) +} diff --git a/src/main/scala/nutcore/frontend/FrontendSequential.scala b/src/main/scala/nutcore/frontend/FrontendSequential.scala new file mode 100644 index 000000000..554a40543 --- /dev/null +++ b/src/main/scala/nutcore/frontend/FrontendSequential.scala @@ -0,0 +1,45 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendSequential(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + val ifu = Module(new InstrFetchSequential) + val ibf = Module(new NaiveRVCAlignBuffer) + val idu = Module(new Decode) + + def PipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], + isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { + // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 + // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) + right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) + } + + PipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) + PipelineConnect(ibf.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(1)) + idu.io.in(1) := DontCare + + ibf.io.flush := ifu.io.flushVec(1) + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) +} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/decode/Decode.scala b/src/main/scala/nutcore/frontend/decode/Decode.scala new file mode 100644 index 000000000..0c7e7dc37 --- /dev/null +++ b/src/main/scala/nutcore/frontend/decode/Decode.scala @@ -0,0 +1,66 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.decode + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import difftest._ + +class Decode(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { + val io = IO(new Bundle { + val in = Vec(2, Flipped(Decoupled(new CtrlFlowIO))) + val out = Vec(2, Decoupled(new DecodeIO)) + }) + val decoder1 = Module(new DecodeUnit) + val decoder2 = Module(new DecodeUnit) + io.in(0) <> decoder1.io.in + io.in(1) <> decoder2.io.in + io.out(0) <> decoder1.io.out + io.out(1) <> decoder2.io.out + if(!EnableMultiIssue){ + io.in(1).ready := false.B + decoder2.io.in.valid := false.B + } + + val checkpoint_id = RegInit(0.U(64.W)) + + // debug runahead + val runahead = Module(new DifftestRunaheadEvent) + runahead.io.clock := clock + runahead.io.coreid := 0.U + runahead.io.valid := io.out(0).fire() + runahead.io.branch := decoder1.io.isBranch + runahead.io.pc := io.out(0).bits.cf.pc + runahead.io.checkpoint_id := checkpoint_id + when(runahead.io.valid && runahead.io.branch) { + checkpoint_id := checkpoint_id + 1.U // allocate a new checkpoint_id + } + io.out(0).bits.cf.isBranch := decoder1.io.isBranch + io.out(0).bits.cf.runahead_checkpoint_id := checkpoint_id + // when(runahead.io.valid) { + // printf("fire pc %x branch %x inst %x\n", runahead.io.pc, runahead.io.branch, io.out(0).bits.cf.instr) + // } + + if (!p.FPGAPlatform) { + BoringUtils.addSource(decoder1.io.isWFI | decoder2.io.isWFI, "isWFI") + } +} diff --git a/src/main/scala/nutcore/frontend/IDU.scala b/src/main/scala/nutcore/frontend/decode/DecodeUnit.scala similarity index 77% rename from src/main/scala/nutcore/frontend/IDU.scala rename to src/main/scala/nutcore/frontend/decode/DecodeUnit.scala index e9d9d7b85..3154d821e 100644 --- a/src/main/scala/nutcore/frontend/IDU.scala +++ b/src/main/scala/nutcore/frontend/decode/DecodeUnit.scala @@ -1,29 +1,17 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.frontend.decode import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import difftest._ -class Decoder(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { + +class DecodeUnit(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { val io = IO(new Bundle { val in = Flipped(Decoupled(new CtrlFlowIO)) val out = Decoupled(new DecodeIO) @@ -188,43 +176,3 @@ class Decoder(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstr io.isBranch := VecInit(RV32I_BRUInstr.table.map(i => i._2.tail(1) === fuOpType)).asUInt.orR && fuType === FuType.bru } - -class IDU(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { - val io = IO(new Bundle { - val in = Vec(2, Flipped(Decoupled(new CtrlFlowIO))) - val out = Vec(2, Decoupled(new DecodeIO)) - }) - val decoder1 = Module(new Decoder) - val decoder2 = Module(new Decoder) - io.in(0) <> decoder1.io.in - io.in(1) <> decoder2.io.in - io.out(0) <> decoder1.io.out - io.out(1) <> decoder2.io.out - if(!EnableMultiIssue){ - io.in(1).ready := false.B - decoder2.io.in.valid := false.B - } - - val checkpoint_id = RegInit(0.U(64.W)) - - // debug runahead - val runahead = Module(new DifftestRunaheadEvent) - runahead.io.clock := clock - runahead.io.coreid := 0.U - runahead.io.valid := io.out(0).fire() - runahead.io.branch := decoder1.io.isBranch - runahead.io.pc := io.out(0).bits.cf.pc - runahead.io.checkpoint_id := checkpoint_id - when(runahead.io.valid && runahead.io.branch) { - checkpoint_id := checkpoint_id + 1.U // allocate a new checkpoint_id - } - io.out(0).bits.cf.isBranch := decoder1.io.isBranch - io.out(0).bits.cf.runahead_checkpoint_id := checkpoint_id - // when(runahead.io.valid) { - // printf("fire pc %x branch %x inst %x\n", runahead.io.pc, runahead.io.branch, io.out(0).bits.cf.instr) - // } - - if (!p.FPGAPlatform) { - BoringUtils.addSource(decoder1.io.isWFI | decoder2.io.isWFI, "isWFI") - } -} diff --git a/src/main/scala/nutcore/frontend/IBF.scala b/src/main/scala/nutcore/frontend/instr_align_buffer/IBF.scala similarity index 100% rename from src/main/scala/nutcore/frontend/IBF.scala rename to src/main/scala/nutcore/frontend/instr_align_buffer/IBF.scala diff --git a/src/main/scala/nutcore/frontend/NaiveIBF.scala b/src/main/scala/nutcore/frontend/instr_align_buffer/NaiveIBF.scala similarity index 100% rename from src/main/scala/nutcore/frontend/NaiveIBF.scala rename to src/main/scala/nutcore/frontend/instr_align_buffer/NaiveIBF.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala new file mode 100644 index 000000000..511b30e9f --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala @@ -0,0 +1,41 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +trait HasResetVector { + val resetVector = Settings.getLong("ResetVector") +} + +class ICacheUserBundle extends NutCoreBundle { + val pc = UInt(VAddrBits.W) + val brIdx = UInt(4.W) // mark if an inst is predicted to branch + val pnpc = UInt(VAddrBits.W) + val instValid = UInt(4.W) // mark which part of this inst line is valid +} +// Note: update ICacheUserBundleWidth when change ICacheUserBundle diff --git a/src/main/scala/nutcore/frontend/IFU.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchDynamic.scala similarity index 56% rename from src/main/scala/nutcore/frontend/IFU.scala rename to src/main/scala/nutcore/frontend/instr_fetch/InstrFetchDynamic.scala index c585db2eb..62d3e2bbe 100644 --- a/src/main/scala/nutcore/frontend/IFU.scala +++ b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchDynamic.scala @@ -1,43 +1,20 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.frontend.instr_fetch import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + import utils._ import bus.simplebus._ import top.Settings import difftest._ -trait HasResetVector { - val resetVector = Settings.getLong("ResetVector") -} - -class ICacheUserBundle extends NutCoreBundle { - val pc = UInt(VAddrBits.W) - val brIdx = UInt(4.W) // mark if an inst is predicted to branch - val pnpc = UInt(VAddrBits.W) - val instValid = UInt(4.W) // mark which part of this inst line is valid -} -// Note: update ICacheUserBundleWidth when change ICacheUserBundle -class IFU_ooo extends NutCoreModule with HasResetVector { +class InstrFetchDynamic extends NutCoreModule with HasResetVector { val io = IO(new Bundle { val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) @@ -60,7 +37,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { // Note: we define instline as 8 Byte aligned data from icache // Next-line branch predictor - val nlp = Module(new BPU_ooo) + val nlp = Module(new BranchPredictDynamic) // nlpxxx_latch is used for the situation when I$ is disabled val nlpvalidreg = RegInit(false.B) @@ -145,7 +122,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { // Multi-cycle branch predictor // Multi-cycle branch predictor will not be synthesized if EnableMultiCyclePredictor is set to false - val mcp = Module(new DummyPredicter) + val mcp = Module(new BranchPredictDummy) mcp.io.in.pc.valid := io.imem.req.fire() mcp.io.in.pc.bits := pc mcp.io.flush := io.redirect.valid @@ -240,7 +217,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { val maybeBranch = Wire(Vec(4, Bool())) val brIdxByPredictor = Mux(validMCPRedirect, mcpResultQueue.io.deq.bits.brIdx.asUInt, io.imem.resp.bits.user.get(VAddrBits*2 + 3, VAddrBits*2)) (0 until 4).map(i => maybeBranch(i) := preDecodeIsBranch(io.out.bits.instr(16*(i+1)-1, 16*i))) //TODO: use icache pre-decode result - // When branch predicter set non-sequential npc for a non-branch inst, + // When branch predictor set non-sequential npc for a non-branch inst, // flush IFU, fetch sequential inst instead. when((brIdxByPredictor & ~maybeBranch.asUInt).orR && io.out.fire()){ Debug("[ERROR] FixInvalidBranchPredict\n") @@ -255,141 +232,3 @@ class IFU_ooo extends NutCoreModule with HasResetVector { BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") } - -class IFU_embedded extends NutCoreModule with HasResetVector { - val io = IO(new Bundle { - val imem = new SimpleBusUC(userBits = 64, addrBits = VAddrBits) - val out = Decoupled(new CtrlFlowIO) - val redirect = Flipped(new RedirectIO) - val flushVec = Output(UInt(4.W)) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) - }) - - // pc - val pc = RegInit(resetVector.U(32.W)) - val pcUpdate = io.redirect.valid || io.imem.req.fire() - val snpc = pc + 4.U // sequential next pc - - val bpu = Module(new BPU_embedded) - - // predicted next pc - val pnpc = bpu.io.out.target - val npc = Mux(io.redirect.valid, io.redirect.target, Mux(bpu.io.out.valid, pnpc, snpc)) - - bpu.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request - bpu.io.in.pc.bits := npc // predict one cycle early - bpu.io.flush := io.redirect.valid - - when (pcUpdate) { pc := npc } - - io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) - io.bpFlush := false.B - - io.imem := DontCare - io.imem.req.bits.apply(addr = pc, size = "b10".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(pc, npc)) - io.imem.req.valid := io.out.ready - io.imem.resp.ready := io.out.ready || io.flushVec(0) - - io.out.bits := DontCare - io.out.bits.instr := io.imem.resp.bits.rdata - io.imem.resp.bits.user.map{ case x => - io.out.bits.pc := x(2*VAddrBits-1, VAddrBits) - io.out.bits.pnpc := x(VAddrBits-1, 0) - } - io.out.valid := io.imem.resp.valid && !io.flushVec(0) - - Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x redirect %x npc %x pc %x pnpc %x\n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, npc, pc, bpu.io.out.target) - Debug(io.out.fire(), "[IFO] pc=%x user=%x inst=%x npc=%x ipf %x\n", io.out.bits.pc, io.imem.resp.bits.user.get, io.out.bits.instr, io.out.bits.pnpc, io.ipf) - - BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") - BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") -} - -class IFU_inorder extends NutCoreModule with HasResetVector { - val io = IO(new Bundle { - - val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits) - val out = Decoupled(new CtrlFlowIO) - - val redirect = Flipped(new RedirectIO) - val flushVec = Output(UInt(4.W)) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) - }) - - // pc - val pc = RegInit(resetVector.U(VAddrBits.W)) - val pcUpdate = io.redirect.valid || io.imem.req.fire() - val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc - - val bp1 = Module(new BPU_inorder) - - val crosslineJump = bp1.io.crosslineJump - val crosslineJumpLatch = RegInit(false.B) - when(pcUpdate || bp1.io.flush) { - crosslineJumpLatch := Mux(bp1.io.flush, false.B, crosslineJump && !crosslineJumpLatch) - } - val crosslineJumpTarget = RegEnable(bp1.io.out.target, crosslineJump) - val crosslineJumpForceSeq = crosslineJump && bp1.io.out.valid - val crosslineJumpForceTgt = crosslineJumpLatch && !bp1.io.flush - - // predicted next pc - val pnpc = Mux(crosslineJump, snpc, bp1.io.out.target) - val pbrIdx = bp1.io.brIdx - val npc = Mux(io.redirect.valid, io.redirect.target, Mux(crosslineJumpLatch, crosslineJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc))) - val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(crosslineJumpLatch, false.B, Mux(crosslineJump, true.B, Mux(bp1.io.out.valid, false.B, true.B)))) - // Debug("[NPC] %x %x %x %x %x %x\n",crosslineJumpLatch, crosslineJumpTarget, crosslineJump, bp1.io.out.valid, pnpc, snpc) - - // val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc)) - val brIdx = Wire(UInt(4.W)) - // brIdx(0) -> branch at pc offset 0 (mod 4) - // brIdx(1) -> branch at pc offset 2 (mod 4) - // brIdx(2) -> branch at pc offset 6 (mod 8), and this inst is not rvc inst - brIdx := Cat(npcIsSeq, Mux(io.redirect.valid, 0.U, pbrIdx)) - //TODO: BP will be disabled shortly after a redirect request - - bp1.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request - bp1.io.in.pc.bits := npc // predict one cycle early - - // Debug(bp1.io.in.pc.valid, p"pc: ${Hexadecimal(pc)} npc: ${Hexadecimal(npc)}\n") - // Debug(bp1.io.out.valid, p"valid!!\n") - - bp1.io.flush := io.redirect.valid - - when (pcUpdate) { - pc := npc - // printf("[IF1] pc=%x\n", pc) - } - - Debug(pcUpdate, "[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,crosslineJumpLatch,crosslineJumpTarget,crosslineJump,snpc,bp1.io.out.valid,pnpc) - - io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) - io.bpFlush := false.B - - io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W)) - size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0))) - io.imem.req.valid := io.out.ready - //TODO: add ctrlFlow.exceptionVec - io.imem.resp.ready := io.out.ready || io.flushVec(0) - - io.out.bits := DontCare - //inst path only uses 32bit inst, get the right inst according to pc(2) - - Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx) - Debug(io.out.fire(), "[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr) - - // io.out.bits.instr := (if (XLEN == 64) io.imem.resp.bits.rdata.asTypeOf(Vec(2, UInt(32.W)))(io.out.bits.pc(2)) - // else io.imem.resp.bits.rdata) - io.out.bits.instr := io.imem.resp.bits.rdata - io.imem.resp.bits.user.map{ case x => - io.out.bits.pc := x(VAddrBits-1,0) - io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits) - io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2) - } - io.out.bits.exceptionVec(instrPageFault) := io.ipf - io.out.valid := io.imem.resp.valid && !io.flushVec(0) - - BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") - BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") -} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchEmbedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchEmbedded.scala new file mode 100644 index 000000000..608049aad --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchEmbedded.scala @@ -0,0 +1,63 @@ +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +class InstrFetchEmbedded extends NutCoreModule with HasResetVector { + val io = IO(new Bundle { + val imem = new SimpleBusUC(userBits = 64, addrBits = VAddrBits) + val out = Decoupled(new CtrlFlowIO) + val redirect = Flipped(new RedirectIO) + val flushVec = Output(UInt(4.W)) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) + }) + + // pc + val pc = RegInit(resetVector.U(32.W)) + val pcUpdate = io.redirect.valid || io.imem.req.fire() + val snpc = pc + 4.U // sequential next pc + + val bpu = Module(new BranchPredictEmbedded) + + // predicted next pc + val pnpc = bpu.io.out.target + val npc = Mux(io.redirect.valid, io.redirect.target, Mux(bpu.io.out.valid, pnpc, snpc)) + + bpu.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request + bpu.io.in.pc.bits := npc // predict one cycle early + bpu.io.flush := io.redirect.valid + + when (pcUpdate) { pc := npc } + + io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) + io.bpFlush := false.B + + io.imem := DontCare + io.imem.req.bits.apply(addr = pc, size = "b10".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(pc, npc)) + io.imem.req.valid := io.out.ready + io.imem.resp.ready := io.out.ready || io.flushVec(0) + + io.out.bits := DontCare + io.out.bits.instr := io.imem.resp.bits.rdata + io.imem.resp.bits.user.map{ case x => + io.out.bits.pc := x(2*VAddrBits-1, VAddrBits) + io.out.bits.pnpc := x(VAddrBits-1, 0) + } + io.out.valid := io.imem.resp.valid && !io.flushVec(0) + + Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x redirect %x npc %x pc %x pnpc %x\n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, npc, pc, bpu.io.out.target) + Debug(io.out.fire(), "[IFO] pc=%x user=%x inst=%x npc=%x ipf %x\n", io.out.bits.pc, io.imem.resp.bits.user.get, io.out.bits.instr, io.out.bits.pnpc, io.ipf) + + BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") + BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchSequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchSequential.scala new file mode 100644 index 000000000..d2059bd16 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchSequential.scala @@ -0,0 +1,102 @@ + +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +class InstrFetchSequential extends NutCoreModule with HasResetVector { + val io = IO(new Bundle { + + val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits) + val out = Decoupled(new CtrlFlowIO) + + val redirect = Flipped(new RedirectIO) + val flushVec = Output(UInt(4.W)) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) + }) + + // pc + val pc = RegInit(resetVector.U(VAddrBits.W)) + val pcUpdate = io.redirect.valid || io.imem.req.fire() + val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc + + val bp1 = Module(new BranchPredictSequential) + + val crosslineJump = bp1.io.crosslineJump + val crosslineJumpLatch = RegInit(false.B) + when(pcUpdate || bp1.io.flush) { + crosslineJumpLatch := Mux(bp1.io.flush, false.B, crosslineJump && !crosslineJumpLatch) + } + val crosslineJumpTarget = RegEnable(bp1.io.out.target, crosslineJump) + val crosslineJumpForceSeq = crosslineJump && bp1.io.out.valid + val crosslineJumpForceTgt = crosslineJumpLatch && !bp1.io.flush + + // predicted next pc + val pnpc = Mux(crosslineJump, snpc, bp1.io.out.target) + val pbrIdx = bp1.io.brIdx + val npc = Mux(io.redirect.valid, io.redirect.target, Mux(crosslineJumpLatch, crosslineJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc))) + val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(crosslineJumpLatch, false.B, Mux(crosslineJump, true.B, Mux(bp1.io.out.valid, false.B, true.B)))) + // Debug("[NPC] %x %x %x %x %x %x\n",crosslineJumpLatch, crosslineJumpTarget, crosslineJump, bp1.io.out.valid, pnpc, snpc) + + // val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc)) + val brIdx = Wire(UInt(4.W)) + // brIdx(0) -> branch at pc offset 0 (mod 4) + // brIdx(1) -> branch at pc offset 2 (mod 4) + // brIdx(2) -> branch at pc offset 6 (mod 8), and this inst is not rvc inst + brIdx := Cat(npcIsSeq, Mux(io.redirect.valid, 0.U, pbrIdx)) + //TODO: BP will be disabled shortly after a redirect request + + bp1.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request + bp1.io.in.pc.bits := npc // predict one cycle early + + // Debug(bp1.io.in.pc.valid, p"pc: ${Hexadecimal(pc)} npc: ${Hexadecimal(npc)}\n") + // Debug(bp1.io.out.valid, p"valid!!\n") + + bp1.io.flush := io.redirect.valid + + when (pcUpdate) { + pc := npc + // printf("[IF1] pc=%x\n", pc) + } + + Debug(pcUpdate, "[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,crosslineJumpLatch,crosslineJumpTarget,crosslineJump,snpc,bp1.io.out.valid,pnpc) + + io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) + io.bpFlush := false.B + + io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W)) + size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0))) + io.imem.req.valid := io.out.ready + //TODO: add ctrlFlow.exceptionVec + io.imem.resp.ready := io.out.ready || io.flushVec(0) + + io.out.bits := DontCare + //inst path only uses 32bit inst, get the right inst according to pc(2) + + Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx) + Debug(io.out.fire(), "[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr) + + // io.out.bits.instr := (if (XLEN == 64) io.imem.resp.bits.rdata.asTypeOf(Vec(2, UInt(32.W)))(io.out.bits.pc(2)) + // else io.imem.resp.bits.rdata) + io.out.bits.instr := io.imem.resp.bits.rdata + io.imem.resp.bits.user.map{ case x => + io.out.bits.pc := x(VAddrBits-1,0) + io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits) + io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2) + } + io.out.bits.exceptionVec(instrPageFault) := io.ipf + io.out.valid := io.imem.resp.valid && !io.flushVec(0) + + BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") + BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") +} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala new file mode 100644 index 000000000..9d5f75eb8 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala @@ -0,0 +1,60 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class TableAddr(val idxBits: Int) extends NutCoreBundle { + val padLen = if (Settings.get("IsRV32") || !Settings.get("EnableOutOfOrderExec")) 2 else 3 + def tagBits = VAddrBits - padLen - idxBits + + //val res = UInt((AddrBits - VAddrBits).W) + val tag = UInt(tagBits.W) + val idx = UInt(idxBits.W) + val pad = UInt(padLen.W) + + def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) + def getTag(x: UInt) = fromUInt(x).tag + def getIdx(x: UInt) = fromUInt(x).idx +} + +object BTBtype { + def B = "b00".U // branch + def J = "b01".U // jump + def I = "b10".U // indirect + def R = "b11".U // return + + def apply() = UInt(2.W) +} + +class BPUUpdateReq extends NutCoreBundle { + val valid = Output(Bool()) + val pc = Output(UInt(VAddrBits.W)) + val isMissPredict = Output(Bool()) + val actualTarget = Output(UInt(VAddrBits.W)) + val actualTaken = Output(Bool()) // for branch + val fuOpType = Output(FuOpType()) + val btbType = Output(BTBtype()) + val isRVC = Output(Bool()) // for ras, save PC+2 to stack if is RVC +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDummy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDummy.scala new file mode 100644 index 000000000..b1ce00d9d --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDummy.scala @@ -0,0 +1,28 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictDummy extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val valid = Output(Bool()) + val flush = Input(Bool()) + val ignore = Input(Bool()) + val brIdx = Output(Vec(4, Bool())) + }) + // Note: when io.ignore, io.out.valid must be false.B for this pc + // This limitation is for cross instline inst fetch logic + io.valid := io.in.pc.valid // Predictor is returning a result + io.out.valid := false.B // Need redirect + io.out.target := DontCare // Redirect target + io.out.rtype := DontCare // Predictor does not need to care about it + io.brIdx := VecInit(Seq.fill(4)(false.B)) // Which inst triggers jump +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDynamic.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDynamic.scala new file mode 100644 index 000000000..9c5b9e4ce --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDynamic.scala @@ -0,0 +1,143 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +// nextline predictor generates NPC from current NPC in 1 cycle +class BranchPredictDynamic extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + val brIdx = Output(Vec(4, Bool())) + // val target = Output(Vec(4, UInt(VAddrBits.W))) + // val instValid = Output(UInt(4.W)) // now instValid is generated in IFU + val crosslineJump = Output(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb >> 2)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(VAddrBits.W) + val crosslineJump = Bool() + val valid = Bool() + } + + val btb = List.fill(4)(Module(new SRAMTemplate(btbEntry(), set = NRbtb >> 2, shouldReset = true, holdRead = true, singlePort = true))) + // flush BTB when executing fence.i + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + (0 to 3).map(i => (btb(i).reset := reset.asBool || (flushBTB || flushTLB))) + + Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) + + (0 to 3).map(i => (btb(i).io.r.req.valid := io.in.pc.valid)) + (0 to 3).map(i => (btb(i).io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits))) + + + val btbRead = Wire(Vec(4, btbEntry())) + (0 to 3).map(i => (btbRead(i) := btb(i).io.r.resp.data(0))) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = Wire(Vec(4, Bool())) + (0 to 3).map(i => btbHit(i) := btbRead(i).valid && btbRead(i).tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb(i).io.r.req.fire(), init = false.B)) + // btbHit will ignore pc(2,0). pc(2,0) is used to build brIdx + val crosslineJump = btbRead(3).crosslineJump && btbHit(3) && !io.brIdx(0) && !io.brIdx(1) && !io.brIdx(2) + io.crosslineJump := crosslineJump + // val crosslineJumpLatch = RegNext(crosslineJump) + // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) + + // PHT + val pht = List.fill(4)(Mem(NRbtb >> 2, UInt(2.W))) + val phtTaken = Wire(Vec(4, Bool())) + (0 to 3).map(i => (phtTaken(i) := RegEnable(pht(i).read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid))) + + // RAS + val NRras = 16 + val ras = Mem(NRras, UInt(VAddrBits.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + btbWrite.crosslineJump := req.pc(2,1)==="h3".U && !req.isRVC // ((pc_offset % 8) == 6) && inst is 32bit in length + btbWrite.valid := true.B + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + (0 to 3).map(i => btb(i).io.w.req.valid := req.isMissPredict && req.valid && i.U === req.pc(2,1)) + (0 to 3).map(i => btb(i).io.w.req.bits.setIdx := btbAddr.getIdx(req.pc)) + (0 to 3).map(i => btb(i).io.w.req.bits.data := btbWrite) + + val getpht = LookupTree(req.pc(2,1), List.tabulate(4)(i => (i.U -> pht(i).read(btbAddr.getIdx(req.pc))))) + val cnt = RegNext(getpht) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + (0 to 3).map(i => when(i.U === reqLatch.pc(2,1)){pht(i).write(btbAddr.getIdx(reqLatch.pc), newCnt)}) + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + when(sp.value === 0.U) { + // RAS empty, do nothing + } + sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) + } + } + + def genInstValid(pc: UInt) = LookupTree(pc(2,1), List( + "b00".U -> "b1111".U, + "b01".U -> "b1110".U, + "b10".U -> "b1100".U, + "b11".U -> "b1000".U + )) + + val pcLatchValid = genInstValid(pcLatch) + + val target = Wire(Vec(4, UInt(VAddrBits.W))) + (0 to 3).map(i => target(i) := Mux(btbRead(i)._type === BTBtype.R, rasTarget, btbRead(i).target)) + (0 to 3).map(i => io.brIdx(i) := btbHit(i) && pcLatchValid(i).asBool && Mux(btbRead(i)._type === BTBtype.B, phtTaken(i), true.B) && btbRead(i).valid) + io.out.target := PriorityMux(io.brIdx, target) + io.out.valid := io.brIdx.asUInt.orR + io.out.rtype := 0.U + Debug(io.out.valid, "[BPU] pc %x io.brIdx.asUInt %b phtTaken %x %x %x %x valid %x %x %x %x\n", pcLatch, io.brIdx.asUInt, phtTaken(0), phtTaken(1), phtTaken(2), phtTaken(3), btbRead(0).valid, btbRead(1).valid, btbRead(2).valid, btbRead(3).valid) + + // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump + // Note: + // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict + // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline + // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) + // by using `instline`, we mean a 64 bit instfetch result from imem + // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictEmbedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictEmbedded.scala new file mode 100644 index 000000000..55332837d --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictEmbedded.scala @@ -0,0 +1,97 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictEmbedded extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(32.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(32.W) + } + + val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) + btb.io.r.req.valid := io.in.pc.valid + btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) + + val btbRead = Wire(btbEntry()) + btbRead := btb.io.r.resp.data(0) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.ready, init = false.B) + + // PHT + val pht = Mem(NRbtb, UInt(2.W)) + val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) + + // RAS + val NRras = 16 + val ras = Mem(NRras, UInt(32.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + btb.io.w.req.valid := req.isMissPredict && req.valid + btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) + btb.io.w.req.bits.data := btbWrite + + val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, req.pc + 4.U) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + sp.value := sp.value - 1.U + } + } + + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + + io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) + io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) + io.out.rtype := 0.U +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictLegacy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictLegacy.scala new file mode 100644 index 000000000..ddc41a6f1 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictLegacy.scala @@ -0,0 +1,39 @@ +// package nutcore.frontend.instr_fetch.branch_predict + +// import chisel3._ +// import chisel3.util._ +// import chisel3.util.experimental.BoringUtils + +// import nutcore._ + +// import utils._ +// import top.Settings + +//---- Legacy BPUs ---- +/* +class BranchPredictLegacy extends NutCoreModule { + val io = IO(new Bundle { + val in = Flipped(Valid(new CtrlFlowIO)) + val out = new RedirectIO + }) + + val instr = io.in.bits.instr + val immJ = SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) + val immB = SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN) + val table = Array( + RV32I_BRUInstr.JAL -> List(immJ, true.B), + RV32I_BRUInstr.BNE -> List(immB, instr(31)), + RV32I_BRUInstr.BEQ -> List(immB, instr(31)), + RV32I_BRUInstr.BLT -> List(immB, instr(31)), + RV32I_BRUInstr.BGE -> List(immB, instr(31)), + RV32I_BRUInstr.BLTU -> List(immB, instr(31)), + RV32I_BRUInstr.BGEU -> List(immB, instr(31)) + ) + val default = List(immB, false.B) + val offset :: predict :: Nil = ListLookup(instr, default, table) + + io.out.target := io.in.bits.pc + offset + io.out.valid := io.in.valid && predict(0) + io.out.rtype := 0.U +} +*/ diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictSequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictSequential.scala new file mode 100644 index 000000000..314d83976 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictSequential.scala @@ -0,0 +1,162 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictSequential extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + val brIdx = Output(UInt(3.W)) + val crosslineJump = Output(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(VAddrBits.W) + val brIdx = UInt(3.W) + val valid = Bool() + } + + val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) + // flush BTB when executing fence.i + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + btb.reset := reset.asBool || (flushBTB || flushTLB) + Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) + + btb.io.r.req.valid := io.in.pc.valid + btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) + + + val btbRead = Wire(btbEntry()) + btbRead := btb.io.r.resp.data(0) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = btbRead.valid && btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) + // btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx + // !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case: + // ------------------------------------------------- + // 0 jump rvc // marked as "take branch" in BTB + // 2 xxx rvc <-- pc // misrecognize this instr as "btb hit" with target of previous jump instr + // ------------------------------------------------- + val crosslineJump = btbRead.brIdx(2) && btbHit + io.crosslineJump := crosslineJump + // val crosslineJumpLatch = RegNext(crosslineJump) + // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) + Debug(btbHit, "[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type) + Debug(btbHit, "[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(crosslineJump, Fill(2, io.out.valid))) + // Debug(btbHit, "[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setId) + + // PHT + val pht = Mem(NRbtb, UInt(2.W)) + val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) + + // RAS + + val NRras = 16 + val ras = Mem(NRras, UInt(VAddrBits.W)) + // val raBrIdxs = Mem(NRras, UInt(2.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + // val rasBrIdx = RegEnable(raBrIdxs.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + Debug(req.valid, "[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType) + + //val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U + //when(fflag && GTimer()>2888000.U) { + // Debug("%d\n", GTimer()) + // Debug("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx) + // Debug("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx) + // Debug(p"[BTBHT6] req:${req} \n") + //} + //Debug("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))) + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)) + btbWrite.valid := true.B + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + btb.io.w.req.valid := req.isMissPredict && req.valid + btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) + btb.io.w.req.bits.data := btbWrite + + //Debug(true) { + //when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) { + // Debug("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) + //} + //} + + //when (GTimer() > 77437484.U && btb.io.w.req.valid) { + // Debug("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) + //} + + val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) + //Debug(){ + //Debug("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt) + //} + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) + // raBrIdxs.write(sp.value + 1.U, Mux(req.pc(1), 2.U, 1.U)) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + when(sp.value === 0.U) { + //Debug("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U + } + sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U + } + } + + io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) + // io.out.target := Mux(crosslineJumpLatch && !flush, crosslineJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target)) + // io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid) + io.brIdx := btbRead.brIdx & Cat(true.B, crosslineJump, Fill(2, io.out.valid)) + io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix + io.out.rtype := 0.U + // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump + // Note: + // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict + // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline + // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) + // by using `instline`, we mean a 64 bit instfetch result from imem + // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. +} + diff --git a/src/main/scala/nutcore/isa/Priviledged.scala b/src/main/scala/nutcore/isa/Priviledged.scala index af483ee18..6c0067592 100644 --- a/src/main/scala/nutcore/isa/Priviledged.scala +++ b/src/main/scala/nutcore/isa/Priviledged.scala @@ -22,17 +22,17 @@ import chisel3.util._ import top.Settings object Priviledged extends HasInstrType { - def ECALL = BitPat("b000000000000_00000_000_00000_1110011") - def EBREAK = BitPat("b000000000001_00000_000_00000_1110011") - def MRET = BitPat("b001100000010_00000_000_00000_1110011") - def SRET = BitPat("b000100000010_00000_000_00000_1110011") - def SFANCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011") - def FENCE = BitPat("b????????????_?????_000_?????_0001111") - def WFI = BitPat("b0001000_00101_00000_000_00000_1110011") + def ECALL = BitPat("b000000000000_00000_000_00000_1110011") + def EBREAK = BitPat("b000000000001_00000_000_00000_1110011") + def MRET = BitPat("b001100000010_00000_000_00000_1110011") + def SRET = BitPat("b000100000010_00000_000_00000_1110011") + def SFENCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011") + def FENCE = BitPat("b????????????_?????_000_?????_0001111") + def WFI = BitPat("b0001000_00101_00000_000_00000_1110011") val table_s = Array( SRET -> List(InstrI, FuType.csr, CSROpType.jmp), - SFANCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma) + SFENCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma) ) val table = Array( diff --git a/src/main/scala/nutcore/isa/RVI.scala b/src/main/scala/nutcore/isa/RVI.scala index bdd214b64..e9d748cc7 100644 --- a/src/main/scala/nutcore/isa/RVI.scala +++ b/src/main/scala/nutcore/isa/RVI.scala @@ -19,6 +19,8 @@ package nutcore import chisel3._ import chisel3.util._ +import nutcore.frontend.instr_fetch.branch_predict._ + object RV32I_ALUInstr extends HasInstrType with HasNutCoreParameter { def ADDI = BitPat("b????????????_?????_000_?????_0010011") def SLLI = if (XLEN == 32) BitPat("b0000000?????_?????_001_?????_0010011") diff --git a/src/main/scala/nutcore/mem/Cache.scala b/src/main/scala/nutcore/mem/Cache.scala deleted file mode 100644 index cc02a3635..000000000 --- a/src/main/scala/nutcore/mem/Cache.scala +++ /dev/null @@ -1,681 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import bus.simplebus._ -import bus.axi4._ -import chisel3.experimental.IO -import utils._ -import top.Settings - -case class CacheConfig ( - ro: Boolean = false, - name: String = "cache", - userBits: Int = 0, - idBits: Int = 0, - cacheLevel: Int = 1, - - totalSize: Int = 32, // Kbytes - ways: Int = 4 -) - -sealed trait HasCacheConst { - implicit val cacheConfig: CacheConfig - - val PAddrBits: Int - val XLEN: Int - - val cacheName = cacheConfig.name - val userBits = cacheConfig.userBits - val idBits = cacheConfig.idBits - - val ro = cacheConfig.ro - val hasCoh = !ro - val hasCohInt = (if (hasCoh) 1 else 0) - val hasPrefetch = cacheName == "l2cache" - - val cacheLevel = cacheConfig.cacheLevel - val TotalSize = cacheConfig.totalSize - val Ways = cacheConfig.ways - val LineSize = XLEN // byte - val LineBeats = LineSize / 8 //DATA WIDTH 64 - val Sets = TotalSize * 1024 / LineSize / Ways - val OffsetBits = log2Up(LineSize) - val IndexBits = log2Up(Sets) - val WordIndexBits = log2Up(LineBeats) - val TagBits = PAddrBits - OffsetBits - IndexBits - - val debug = false - - def addrBundle = new Bundle { - val tag = UInt(TagBits.W) - val index = UInt(IndexBits.W) - val wordIndex = UInt(WordIndexBits.W) - val byteOffset = UInt((if (XLEN == 64) 3 else 2).W) - } - - def CacheMetaArrayReadBus() = new SRAMReadBus(new MetaBundle, set = Sets, way = Ways) - def CacheDataArrayReadBus() = new SRAMReadBus(new DataBundle, set = Sets * LineBeats, way = Ways) - def CacheMetaArrayWriteBus() = new SRAMWriteBus(new MetaBundle, set = Sets, way = Ways) - def CacheDataArrayWriteBus() = new SRAMWriteBus(new DataBundle, set = Sets * LineBeats, way = Ways) - - def getMetaIdx(addr: UInt) = addr.asTypeOf(addrBundle).index - def getDataIdx(addr: UInt) = Cat(addr.asTypeOf(addrBundle).index, addr.asTypeOf(addrBundle).wordIndex) - - def isSameWord(a1: UInt, a2: UInt) = ((a1 >> 2) === (a2 >> 2)) - def isSetConflict(a1: UInt, a2: UInt) = (a1.asTypeOf(addrBundle).index === a2.asTypeOf(addrBundle).index) -} - -sealed abstract class CacheBundle(implicit cacheConfig: CacheConfig) extends Bundle with HasNutCoreParameter with HasCacheConst -sealed abstract class CacheModule(implicit cacheConfig: CacheConfig) extends Module with HasNutCoreParameter with HasCacheConst with HasNutCoreLog - -sealed class MetaBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val tag = Output(UInt(TagBits.W)) - val valid = Output(Bool()) - val dirty = Output(Bool()) - - def apply(tag: UInt, valid: Bool, dirty: Bool) = { - this.tag := tag - this.valid := valid - this.dirty := dirty - this - } -} - -sealed class DataBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val data = Output(UInt(DataBits.W)) - - def apply(data: UInt) = { - this.data := data - this - } -} - -sealed class Stage1IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) -} - -class CacheIO(implicit val cacheConfig: CacheConfig) extends Bundle with HasNutCoreParameter with HasCacheConst { - val in = Flipped(new SimpleBusUC(userBits = userBits, idBits = idBits)) - val flush = Input(UInt(2.W)) - val out = new SimpleBusC - val mmio = new SimpleBusUC - val empty = Output(Bool()) -} -trait HasCacheIO { - implicit val cacheConfig: CacheConfig - val io = IO(new CacheIO) -} - -// meta read -sealed class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage1IO extends Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, idBits = idBits))) - val out = Decoupled(new Stage1IO) - val metaReadBus = CacheMetaArrayReadBus() - val dataReadBus = CacheDataArrayReadBus() - } - val io = IO(new CacheStage1IO) - - if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) } - Debug(io.in.fire(), "[L1$] cache stage1, addr in: %x, user: %x id: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U), io.in.bits.id.getOrElse(0.U)) - - // read meta array and data array - val readBusValid = io.in.valid && io.out.ready - io.metaReadBus.apply(valid = readBusValid, setIdx = getMetaIdx(io.in.bits.addr)) - io.dataReadBus.apply(valid = readBusValid, setIdx = getDataIdx(io.in.bits.addr)) - - io.out.bits.req := io.in.bits - io.out.valid := io.in.valid && io.metaReadBus.req.ready && io.dataReadBus.req.ready - io.in.ready := (!io.in.valid || io.out.fire()) && io.metaReadBus.req.ready && io.dataReadBus.req.ready - - Debug("in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n", io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid) -} - -sealed class Stage2IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) - val metas = Vec(Ways, new MetaBundle) - val datas = Vec(Ways, new DataBundle) - val hit = Output(Bool()) - val waymask = Output(UInt(Ways.W)) - val mmio = Output(Bool()) - val isForwardData = Output(Bool()) - val forwardData = Output(CacheDataArrayWriteBus().req.bits) -} - -// check -sealed class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage2IO extends Bundle { - val in = Flipped(Decoupled(new Stage1IO)) - val out = Decoupled(new Stage2IO) - val metaReadResp = Flipped(Vec(Ways, new MetaBundle)) - val dataReadResp = Flipped(Vec(Ways, new DataBundle)) - val metaWriteBus = Input(CacheMetaArrayWriteBus()) - val dataWriteBus = Input(CacheDataArrayWriteBus()) - } - val io = IO(new CacheStage2IO) - - val req = io.in.bits.req - val addr = req.addr.asTypeOf(addrBundle) - - val isForwardMeta = io.in.valid && io.metaWriteBus.req.valid && io.metaWriteBus.req.bits.setIdx === getMetaIdx(req.addr) - val isForwardMetaReg = RegInit(false.B) - when (isForwardMeta) { isForwardMetaReg := true.B } - when (io.in.fire() || !io.in.valid) { isForwardMetaReg := false.B } - val forwardMetaReg = RegEnable(io.metaWriteBus.req.bits, isForwardMeta) - - val metaWay = Wire(Vec(Ways, chiselTypeOf(forwardMetaReg.data))) - val pickForwardMeta = isForwardMetaReg || isForwardMeta - val forwardMeta = Mux(isForwardMeta, io.metaWriteBus.req.bits, forwardMetaReg) - val forwardWaymask = forwardMeta.waymask.getOrElse("1".U).asBools - forwardWaymask.zipWithIndex.map { case (w, i) => - metaWay(i) := Mux(pickForwardMeta && w, forwardMeta.data, io.metaReadResp(i)) - } - - val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - - val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt - val hasInvalidWay = invalidVec.orR - val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U, - Mux(invalidVec >= 4.U, "b0100".U, - Mux(invalidVec >= 2.U, "b0010".U, "b0001".U))) - - // val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask) - val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask)) - when(PopCount(waymask) > 1.U){ - metaWay.map(m => Debug("[ERROR] metaWay %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) - io.metaReadResp.map(m => Debug("[ERROR] metaReadResp %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) - Debug("[ERROR] forwardMetaReg isForwardMetaReg %x %x metat %x wm %b\n", isForwardMetaReg, forwardMetaReg.data.valid, forwardMetaReg.data.tag, forwardMetaReg.waymask.get) - Debug("[ERROR] forwardMeta isForwardMeta %x %x metat %x wm %b\n", isForwardMeta, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.tag, io.metaWriteBus.req.bits.waymask.get) - } - when(PopCount(waymask) > 1.U){Debug("[ERROR] hit %b wmask %b hitvec %b\n", io.out.bits.hit, forwardMeta.waymask.getOrElse("1".U), hitVec)} - assert(!(io.in.valid && PopCount(waymask) > 1.U)) - - io.out.bits.metas := metaWay - io.out.bits.hit := io.in.valid && hitVec.orR - io.out.bits.waymask := waymask - io.out.bits.datas := io.dataReadResp - io.out.bits.mmio := AddressSpace.isMMIO(req.addr) - - val isForwardData = io.in.valid && (io.dataWriteBus.req match { case r => - r.valid && r.bits.setIdx === getDataIdx(req.addr) - }) - val isForwardDataReg = RegInit(false.B) - when (isForwardData) { isForwardDataReg := true.B } - when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B } - val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData) - io.out.bits.isForwardData := isForwardDataReg || isForwardData - io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg) - - io.out.bits.req <> req - io.out.valid := io.in.valid - io.in.ready := !io.in.valid || io.out.fire() - - Debug("[isFD:%d isFDreg:%d inFire:%d invalid:%d \n", isForwardData, isForwardDataReg, io.in.fire(), io.in.valid) - Debug("[isFM:%d isFMreg:%d metawreq:%x widx:%x ridx:%x \n", isForwardMeta, isForwardMetaReg, io.metaWriteBus.req.valid, io.metaWriteBus.req.bits.setIdx, getMetaIdx(req.addr)) -} - -// writeback -sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage3IO extends Bundle { - val in = Flipped(Decoupled(new Stage2IO)) - val out = Decoupled(new SimpleBusRespBundle(userBits = userBits, idBits = idBits)) - val isFinish = Output(Bool()) - val flush = Input(Bool()) - val dataReadBus = CacheDataArrayReadBus() - val dataWriteBus = CacheDataArrayWriteBus() - val metaWriteBus = CacheMetaArrayWriteBus() - - val mem = new SimpleBusUC - val mmio = new SimpleBusUC - val cohResp = Decoupled(new SimpleBusRespBundle) - - // use to distinguish prefetch request and normal request - val dataReadRespToL1 = Output(Bool()) - } - val io = IO(new CacheStage3IO) - - val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) - val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2)) - - val req = io.in.bits.req - val addr = req.addr.asTypeOf(addrBundle) - val mmio = io.in.valid && io.in.bits.mmio - val hit = io.in.valid && io.in.bits.hit - val miss = io.in.valid && !io.in.bits.hit - val probe = io.in.valid && hasCoh.B && req.isProbe() - val hitReadBurst = hit && req.isReadBurst() - val meta = Mux1H(io.in.bits.waymask, io.in.bits.metas) - assert(!(mmio && hit), "MMIO request should not hit in cache") - - - // this is ugly - if (cacheName == "dcache") { - BoringUtils.addSource(mmio, "lsuMMIO") - } - - val useForwardData = io.in.bits.isForwardData && io.in.bits.waymask === io.in.bits.forwardData.waymask.getOrElse("b1".U) - val dataReadArray = Mux1H(io.in.bits.waymask, io.in.bits.datas).data - val dataRead = Mux(useForwardData, io.in.bits.forwardData.data.data, dataReadArray) - val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W)) - - val writeL2BeatCnt = Counter(LineBeats) - when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) { - writeL2BeatCnt.inc() - } - - val hitWrite = hit && req.isWrite() - val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply( - data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)), - valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask) - - val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = hitWrite && !meta.dirty, setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask, - data = Wire(new MetaBundle).apply(tag = meta.tag, valid = true.B, dirty = (!ro).B) - ) - - val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9) - val state = RegInit(s_idle) - val needFlush = RegInit(false.B) - - when (io.flush && (state =/= s_idle)) { needFlush := true.B } - when (io.out.fire() && needFlush) { needFlush := false.B } - - val readBeatCnt = Counter(LineBeats) - val writeBeatCnt = Counter(LineBeats) - - val s2_idle :: s2_dataReadWait :: s2_dataOK :: Nil = Enum(3) - val state2 = RegInit(s2_idle) - - io.dataReadBus.apply(valid = (state === s_memWriteReq || state === s_release) && (state2 === s2_idle), - setIdx = Cat(addr.index, Mux(state === s_release, readBeatCnt.value, writeBeatCnt.value))) - val dataWay = RegEnable(io.dataReadBus.resp.data, state2 === s2_dataReadWait) - val dataHitWay = Mux1H(io.in.bits.waymask, dataWay).data - - switch (state2) { - is (s2_idle) { when (io.dataReadBus.req.fire()) { state2 := s2_dataReadWait } } - is (s2_dataReadWait) { state2 := s2_dataOK } - is (s2_dataOK) { when (io.mem.req.fire() || io.cohResp.fire() || hitReadBurst && io.out.ready) { state2 := s2_idle } } - } - - // critical word first read - val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W)) - else Cat(req.addr(PAddrBits-1,2), 0.U(2.W))) - // dirty block addr - val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W)) - val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst, - Mux((writeBeatCnt.value === (LineBeats - 1).U), SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) - io.mem.req.bits.apply(addr = Mux(state === s_memReadReq, raddr, waddr), - cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), - wdata = dataHitWay, wmask = Fill(DataBytes, 1.U)) - - io.mem.resp.ready := true.B - io.mem.req.valid := (state === s_memReadReq) || ((state === s_memWriteReq) && (state2 === s2_dataOK)) - - // mmio - io.mmio.req.bits := req - io.mmio.resp.ready := true.B - io.mmio.req.valid := (state === s_mmioReq) - - val afterFirstRead = RegInit(false.B) - val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire()) - val readingFirst = !afterFirstRead && io.mem.resp.fire() && (state === s_memReadResp) - val inRdataRegDemand = RegEnable(Mux(mmio, io.mmio.resp.bits.rdata, io.mem.resp.bits.rdata), - Mux(mmio, state === s_mmioResp, readingFirst)) - - // probe - io.cohResp.valid := ((state === s_idle) && probe) || - ((state === s_release) && (state2 === s2_dataOK)) - io.cohResp.bits.rdata := dataHitWay - val releaseLast = Counter(state === s_release && io.cohResp.fire(), LineBeats)._2 - io.cohResp.bits.cmd := Mux(state === s_release, Mux(releaseLast, SimpleBusCmd.readLast, 0.U), - Mux(hit, SimpleBusCmd.probeHit, SimpleBusCmd.probeMiss)) - - val respToL1Fire = hitReadBurst && io.out.ready && state2 === s2_dataOK - val respToL1Last = Counter((state === s_idle || state === s_release && state2 === s2_dataOK) && hitReadBurst && io.out.ready, LineBeats)._2 - - switch (state) { - is (s_idle) { - afterFirstRead := false.B - alreadyOutFire := false.B - - when (probe) { - when (io.cohResp.fire()) { - state := Mux(hit, s_release, s_idle) - readBeatCnt.value := addr.wordIndex - } - } .elsewhen (hitReadBurst && io.out.ready) { - state := s_release - readBeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)) - } .elsewhen ((miss || mmio) && !io.flush) { - state := Mux(mmio, s_mmioReq, Mux(!ro.B && meta.dirty, s_memWriteReq, s_memReadReq)) - } - } - - is (s_mmioReq) { when (io.mmio.req.fire()) { state := s_mmioResp } } - is (s_mmioResp) { when (io.mmio.resp.fire()) { state := s_wait_resp } } - - is (s_release) { - when (io.cohResp.fire() || respToL1Fire) { readBeatCnt.inc() } - when (probe && io.cohResp.fire() && releaseLast || respToL1Fire && respToL1Last) { state := s_idle } - } - - is (s_memReadReq) { when (io.mem.req.fire()) { - state := s_memReadResp - readBeatCnt.value := addr.wordIndex - }} - - is (s_memReadResp) { - when (io.mem.resp.fire()) { - afterFirstRead := true.B - readBeatCnt.inc() - when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U } - when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp } - } - } - - is (s_memWriteReq) { - when (io.mem.req.fire()) { writeBeatCnt.inc() } - when (io.mem.req.bits.isWriteLast() && io.mem.req.fire()) { state := s_memWriteResp } - } - - is (s_memWriteResp) { when (io.mem.resp.fire()) { state := s_memReadReq } } - is (s_wait_resp) { when (io.out.fire() || needFlush || alreadyOutFire) { state := s_idle } } - } - - val dataRefill = MaskData(io.mem.resp.bits.rdata, req.wdata, Mux(readingFirst, wordMask, 0.U(DataBits.W))) - val dataRefillWriteBus = Wire(CacheDataArrayWriteBus).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire(), setIdx = Cat(addr.index, readBeatCnt.value), - data = Wire(new DataBundle).apply(dataRefill), waymask = io.in.bits.waymask) - - dataWriteArb.io.in(0) <> dataHitWriteBus.req - dataWriteArb.io.in(1) <> dataRefillWriteBus.req - io.dataWriteBus.req <> dataWriteArb.io.out - - val metaRefillWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire() && io.mem.resp.bits.isReadLast(), - data = Wire(new MetaBundle).apply(valid = true.B, tag = addr.tag, dirty = !ro.B && req.isWrite()), - setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask - ) - - metaWriteArb.io.in(0) <> metaHitWriteBus.req - metaWriteArb.io.in(1) <> metaRefillWriteBus.req - io.metaWriteBus.req <> metaWriteArb.io.out - - if (cacheLevel == 2) { - when ((state === s_memReadResp) && io.mem.resp.fire() && req.isReadBurst()) { - // readBurst request miss - io.out.bits.rdata := dataRefill - io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) { - // writeBurst/writeLast request, no matter hit or miss - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := DontCare - }.elsewhen (hitReadBurst && state === s_release) { - // readBurst request hit - io.out.bits.rdata := dataHitWay - io.out.bits.cmd := Mux(respToL1Last, SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.otherwise { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := req.cmd - } - } else { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover - } - io.out.bits.user.zip(req.user).map { case (o,i) => o := i } - io.out.bits.id.zip(req.id).map { case (o,i) => o := i } - - io.out.valid := io.in.valid && Mux(req.isBurst() && (cacheLevel == 2).B, - Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || (respToL1Fire && respToL1Last && state === s_release), - Mux(probe, false.B, Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))) - ) - - // With critical-word first, the pipeline registers between - // s2 and s3 can not be overwritten before a missing request - // is totally handled. We use io.isFinish to indicate when the - // request really ends. - io.isFinish := Mux(probe, io.cohResp.fire() && Mux(miss, state === s_idle, (state === s_release) && releaseLast), - Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire)) - ) - - io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe - io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK) - - assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid)) - assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid)) - assert(!(!ro.B && io.flush), "only allow to flush icache") - Debug(" metaread idx %x waymask %b metas %x%x:%x %x%x:%x %x%x:%x %x%x:%x %x\n", getMetaIdx(req.addr), io.in.bits.waymask.asUInt, io.in.bits.metas(0).valid, io.in.bits.metas(0).dirty, io.in.bits.metas(0).tag, io.in.bits.metas(1).valid, io.in.bits.metas(1).dirty, io.in.bits.metas(1).tag, io.in.bits.metas(2).valid, io.in.bits.metas(2).dirty, io.in.bits.metas(2).tag, io.in.bits.metas(3).valid, io.in.bits.metas(3).dirty, io.in.bits.metas(3).tag, io.in.bits.datas.asUInt) - Debug(io.metaWriteBus.req.fire(), "%d: [" + cacheName + " S3]: metawrite idx %x wmask %b meta %x%x:%x\n", GTimer(), io.metaWriteBus.req.bits.setIdx, io.metaWriteBus.req.bits.waymask.get, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.dirty, io.metaWriteBus.req.bits.data.tag) - Debug(" in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n", io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish) - Debug(" out.valid:%d rdata:%x cmd:%d user:%x id:%x \n", io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U), io.out.bits.id.getOrElse(0.U)) - Debug(" DHW: (%d, %d), data:%x setIdx:%x MHW:(%d, %d)\n", dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, dataHitWriteBus.req.bits.setIdx, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready) - Debug(" DreadCache: %x \n", io.in.bits.datas.asUInt) - Debug(" useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n", useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U)) - Debug(io.dataWriteBus.req.fire(), "[WB] waymask: %b data:%x setIdx:%x\n", - io.dataWriteBus.req.bits.waymask.get.asUInt, io.dataWriteBus.req.bits.data.asUInt, io.dataWriteBus.req.bits.setIdx) - Debug((state === s_memWriteReq) && io.mem.req.fire(), "[COUTW] cnt %x addr %x data %x cmd %x size %x wmask %x tag %x idx %x waymask %b \n", writeBeatCnt.value, io.mem.req.bits.addr, io.mem.req.bits.wdata, io.mem.req.bits.cmd, io.mem.req.bits.size, io.mem.req.bits.wmask, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) - Debug((state === s_memReadReq) && io.mem.req.fire(), "[COUTR] addr %x tag %x idx %x waymask %b \n", io.mem.req.bits.addr, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) - Debug((state === s_memReadResp) && io.mem.resp.fire(), "[COUTR] cnt %x data %x tag %x idx %x waymask %b \n", readBeatCnt.value, io.mem.resp.bits.rdata, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) -} - -class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { - // cpu pipeline - val s1 = Module(new CacheStage1) - val s2 = Module(new CacheStage2) - val s3 = Module(new CacheStage3) - val metaArray = Module(new SRAMTemplateWithArbiter(nRead = 1, new MetaBundle, set = Sets, way = Ways, shouldReset = true)) - val dataArray = Module(new SRAMTemplateWithArbiter(nRead = 2, new DataBundle, set = Sets * LineBeats, way = Ways)) - - if (cacheName == "icache") { - // flush icache when executing fence.i - val flushICache = WireInit(false.B) - BoringUtils.addSink(flushICache, "MOUFlushICache") - metaArray.reset := reset.asBool || flushICache - } - - val arb = Module(new Arbiter(new SimpleBusReqBundle(userBits = userBits, idBits = idBits), hasCohInt + 1)) - arb.io.in(hasCohInt + 0) <> io.in.req - - s1.io.in <> arb.io.out - /* - val s2BlockByPrefetch = if (cacheLevel == 2) { - s2.io.out.valid && s3.io.in.valid && s3.io.in.bits.req.isPrefetch() && !s3.io.in.ready - } else { false.B } - */ - PipelineConnect(s1.io.out, s2.io.in, s2.io.out.fire(), io.flush(0)) - PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1)) - io.in.resp <> s3.io.out - s3.io.flush := io.flush(1) - io.out.mem <> s3.io.mem - io.mmio <> s3.io.mmio - io.empty := !s2.io.in.valid && !s3.io.in.valid - - io.in.resp.valid := Mux(s3.io.out.valid && s3.io.out.bits.isPrefetch(), false.B, s3.io.out.valid || s3.io.dataReadRespToL1) - - if (hasCoh) { - val cohReq = io.out.coh.req.bits - // coh does not have user signal, any better code? - val coh = Wire(new SimpleBusReqBundle(userBits = userBits, idBits = idBits)) - coh.apply(addr = cohReq.addr, cmd = cohReq.cmd, size = cohReq.size, wdata = cohReq.wdata, wmask = cohReq.wmask) - arb.io.in(0).bits := coh - arb.io.in(0).valid := io.out.coh.req.valid - io.out.coh.req.ready := arb.io.in(0).ready - io.out.coh.resp <> s3.io.cohResp - } else { - io.out.coh.req.ready := true.B - io.out.coh.resp := DontCare - io.out.coh.resp.valid := false.B - s3.io.cohResp.ready := true.B - } - - metaArray.io.r(0) <> s1.io.metaReadBus - dataArray.io.r(0) <> s1.io.dataReadBus - dataArray.io.r(1) <> s3.io.dataReadBus - - metaArray.io.w <> s3.io.metaWriteBus - dataArray.io.w <> s3.io.dataWriteBus - - s2.io.metaReadResp := s1.io.metaReadBus.resp.data - s2.io.dataReadResp := s1.io.dataReadBus.resp.data - s2.io.dataWriteBus := s3.io.dataWriteBus - s2.io.metaWriteBus := s3.io.metaWriteBus - - if (EnableOutOfOrderExec) { - BoringUtils.addSource(s3.io.out.fire() && s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Hit") - BoringUtils.addSource(s3.io.in.valid && !s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Loss") - BoringUtils.addSource(s1.io.in.fire(), "perfCntCondM" + cacheName + "Req") - } - // io.in.dump(cacheName + ".in") - Debug("InReq(%d, %d) InResp(%d, %d) \n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready) - Debug("{IN s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)} {OUT s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)}\n", s1.io.in.valid, s1.io.in.ready, s2.io.in.valid, s2.io.in.ready, s3.io.in.valid, s3.io.in.ready, s1.io.out.valid, s1.io.out.ready, s2.io.out.valid, s2.io.out.ready, s3.io.out.valid, s3.io.out.ready) - when (s1.io.in.valid) { Debug(p"[${cacheName}.S1]: ${s1.io.in.bits}\n") } - when (s2.io.in.valid) { Debug(p"[${cacheName}.S2]: ${s2.io.in.bits.req}\n") } - when (s3.io.in.valid) { Debug(p"[${cacheName}.S3]: ${s3.io.in.bits.req}\n") } - //s3.io.mem.dump(cacheName + ".mem") -} - -class Cache_fake(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { - val s_idle :: s_memReq :: s_memResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: Nil = Enum(6) - val state = RegInit(s_idle) - - val ismmio = AddressSpace.isMMIO(io.in.req.bits.addr) - val ismmioRec = RegEnable(ismmio, io.in.req.fire()) - if (cacheConfig.name == "dcache") { - BoringUtils.addSource(ismmio, "lsuMMIO") - } - - val needFlush = RegInit(false.B) - when (io.flush(0) && (state =/= s_idle)) { needFlush := true.B } - when (state === s_idle && needFlush) { needFlush := false.B } - - val alreadyOutFire = RegEnable(true.B, init = false.B, io.in.resp.fire()) - - switch (state) { - is (s_idle) { - alreadyOutFire := false.B - when (io.in.req.fire() && !io.flush(0)) { state := Mux(ismmio, s_mmioReq, s_memReq) } - } - is (s_memReq) { - when (io.out.mem.req.fire()) { state := s_memResp } - } - is (s_memResp) { - when (io.out.mem.resp.fire()) { state := s_wait_resp } - } - is (s_mmioReq) { - when (io.mmio.req.fire()) { state := s_mmioResp } - } - is (s_mmioResp) { - when (io.mmio.resp.fire() || alreadyOutFire) { state := s_wait_resp } - } - is (s_wait_resp) { - when (io.in.resp.fire() || needFlush || alreadyOutFire) { state := s_idle } - } - } - - val reqaddr = RegEnable(io.in.req.bits.addr, io.in.req.fire()) - val cmd = RegEnable(io.in.req.bits.cmd, io.in.req.fire()) - val size = RegEnable(io.in.req.bits.size, io.in.req.fire()) - val wdata = RegEnable(io.in.req.bits.wdata, io.in.req.fire()) - val wmask = RegEnable(io.in.req.bits.wmask, io.in.req.fire()) - - io.in.req.ready := (state === s_idle) - io.in.resp.valid := (state === s_wait_resp) && (!needFlush) - - val mmiordata = RegEnable(io.mmio.resp.bits.rdata, io.mmio.resp.fire()) - val mmiocmd = RegEnable(io.mmio.resp.bits.cmd, io.mmio.resp.fire()) - val memrdata = RegEnable(io.out.mem.resp.bits.rdata, io.out.mem.resp.fire()) - val memcmd = RegEnable(io.out.mem.resp.bits.cmd, io.out.mem.resp.fire()) - - io.in.resp.bits.rdata := Mux(ismmioRec, mmiordata, memrdata) - io.in.resp.bits.cmd := Mux(ismmioRec, mmiocmd, memcmd) - - val memuser = RegEnable(io.in.req.bits.user.getOrElse(0.U), io.in.req.fire()) - io.in.resp.bits.user.zip(if (userBits > 0) Some(memuser) else None).map { case (o,i) => o := i } - - io.out.mem.req.bits.apply(addr = reqaddr, - cmd = cmd, size = size, - wdata = wdata, wmask = wmask) - io.out.mem.req.valid := (state === s_memReq) - io.out.mem.resp.ready := true.B - - io.mmio.req.bits.apply(addr = reqaddr, - cmd = cmd, size = size, - wdata = wdata, wmask = wmask) - io.mmio.req.valid := (state === s_mmioReq) - io.mmio.resp.ready := true.B - - io.empty := false.B - io.out.coh := DontCare - - Debug(io.in.req.fire(), p"in.req: ${io.in.req.bits}\n") - Debug(io.out.mem.req.fire(), p"out.mem.req: ${io.out.mem.req.bits}\n") - Debug(io.out.mem.resp.fire(), p"out.mem.resp: ${io.out.mem.resp.bits}\n") - Debug(io.in.resp.fire(), p"in.resp: ${io.in.resp.bits}\n") -} - -class Cache_dummy(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { - - val needFlush = RegInit(false.B) - when (io.flush(0)) { - needFlush := true.B - } - when (io.in.req.fire() && !io.flush(0)) { - needFlush := false.B - } - - io.in.req.ready := io.out.mem.req.ready - io.in.resp.valid := (io.out.mem.resp.valid && !needFlush) || io.flush(0) - - io.in.resp.bits.rdata := io.out.mem.resp.bits.rdata - io.in.resp.bits.cmd := io.out.mem.resp.bits.cmd - val memuser = RegEnable(io.in.req.bits.user.getOrElse(0.U), io.in.req.fire()) - io.in.resp.bits.user.zip(if (userBits > 0) Some(memuser) else None).map { case (o,i) => o := i } - - io.out.mem.req.bits.apply( - addr = io.in.req.bits.addr, - cmd = io.in.req.bits.cmd, - size = io.in.req.bits.size, - wdata = io.in.req.bits.wdata, - wmask = io.in.req.bits.wmask - ) - io.out.mem.req.valid := io.in.req.valid - io.out.mem.resp.ready := io.in.resp.ready - - io.empty := false.B - io.mmio := DontCare - io.out.coh := DontCare -} - -object Cache { - def apply(in: SimpleBusUC, mmio: Seq[SimpleBusUC], flush: UInt, empty: Bool, enable: Boolean = true)(implicit cacheConfig: CacheConfig) = { - val cache = if (enable) Module(new Cache) - else (if (Settings.get("IsRV32")) - (if (cacheConfig.name == "dcache") Module(new Cache_fake) else Module(new Cache_dummy)) - else - (Module(new Cache_fake))) - cache.io.flush := flush - cache.io.in <> in - mmio(0) <> cache.io.mmio - empty := cache.io.empty - cache.io.out - } -} diff --git a/src/main/scala/nutcore/mem/cache/Cache.scala b/src/main/scala/nutcore/mem/cache/Cache.scala new file mode 100644 index 000000000..0aa528642 --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/Cache.scala @@ -0,0 +1,327 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + +case class CacheConfig ( + ro: Boolean = false, + name: String = "cache", + userBits: Int = 0, + idBits: Int = 0, + cacheLevel: Int = 1, + + totalSize: Int = 32, // Kbytes + ways: Int = 4 +) + +trait HasCacheConst { + implicit val cacheConfig: CacheConfig + + val PAddrBits: Int + val XLEN: Int + + val cacheName = cacheConfig.name + val userBits = cacheConfig.userBits + val idBits = cacheConfig.idBits + + val ro = cacheConfig.ro + val hasCoh = !ro + val hasCohInt = (if (hasCoh) 1 else 0) + val hasPrefetch = cacheName == "l2cache" + + val cacheLevel = cacheConfig.cacheLevel + val TotalSize = cacheConfig.totalSize + val Ways = cacheConfig.ways + val LineSize = XLEN // byte + val LineBeats = LineSize / 8 //DATA WIDTH 64 + val Sets = TotalSize * 1024 / LineSize / Ways + val OffsetBits = log2Up(LineSize) + val IndexBits = log2Up(Sets) + val WordIndexBits = log2Up(LineBeats) + val TagBits = PAddrBits - OffsetBits - IndexBits + + val debug = false + + def addrBundle = new Bundle { + val tag = UInt(TagBits.W) + val index = UInt(IndexBits.W) + val wordIndex = UInt(WordIndexBits.W) + val byteOffset = UInt((if (XLEN == 64) 3 else 2).W) + } + + def CacheMetaArrayReadBus() = new SRAMReadBus(new MetaBundle, set = Sets, way = Ways) + def CacheDataArrayReadBus() = new SRAMReadBus(new DataBundle, set = Sets * LineBeats, way = Ways) + def CacheMetaArrayWriteBus() = new SRAMWriteBus(new MetaBundle, set = Sets, way = Ways) + def CacheDataArrayWriteBus() = new SRAMWriteBus(new DataBundle, set = Sets * LineBeats, way = Ways) + + def getMetaIdx(addr: UInt) = addr.asTypeOf(addrBundle).index + def getDataIdx(addr: UInt) = Cat(addr.asTypeOf(addrBundle).index, addr.asTypeOf(addrBundle).wordIndex) + + def isSameWord(a1: UInt, a2: UInt) = ((a1 >> 2) === (a2 >> 2)) + def isSetConflict(a1: UInt, a2: UInt) = (a1.asTypeOf(addrBundle).index === a2.asTypeOf(addrBundle).index) +} + +abstract class CacheBundle(implicit cacheConfig: CacheConfig) extends Bundle with HasNutCoreParameter with HasCacheConst +abstract class CacheModule(implicit cacheConfig: CacheConfig) extends Module with HasNutCoreParameter with HasCacheConst with HasNutCoreLog + +class MetaBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val tag = Output(UInt(TagBits.W)) + val valid = Output(Bool()) + val dirty = Output(Bool()) + + def apply(tag: UInt, valid: Bool, dirty: Bool) = { + this.tag := tag + this.valid := valid + this.dirty := dirty + this + } +} + +class DataBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val data = Output(UInt(DataBits.W)) + + def apply(data: UInt) = { + this.data := data + this + } +} + +class CacheIO(implicit val cacheConfig: CacheConfig) extends Bundle with HasNutCoreParameter with HasCacheConst { + val in = Flipped(new SimpleBusUC(userBits = userBits, idBits = idBits)) + val flush = Input(UInt(2.W)) + val out = new SimpleBusC + val mmio = new SimpleBusUC + val empty = Output(Bool()) +} +trait HasCacheIO { + implicit val cacheConfig: CacheConfig + val io = IO(new CacheIO) +} + +class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { + // cpu pipeline + val s1 = Module(new CacheStageMetaRead) + val s2 = Module(new CacheStageCheck) + val s3 = Module(new CacheStageWriteBack) + val metaArray = Module(new SRAMTemplateWithArbiter(nRead = 1, new MetaBundle, set = Sets, way = Ways, shouldReset = true)) + val dataArray = Module(new SRAMTemplateWithArbiter(nRead = 2, new DataBundle, set = Sets * LineBeats, way = Ways)) + + if (cacheName == "icache") { + // flush icache when executing fence.i + val flushICache = WireInit(false.B) + BoringUtils.addSink(flushICache, "MOUFlushICache") + metaArray.reset := reset.asBool || flushICache + } + + val arb = Module(new Arbiter(new SimpleBusReqBundle(userBits = userBits, idBits = idBits), hasCohInt + 1)) + arb.io.in(hasCohInt + 0) <> io.in.req + + s1.io.in <> arb.io.out + /* + val s2BlockByPrefetch = if (cacheLevel == 2) { + s2.io.out.valid && s3.io.in.valid && s3.io.in.bits.req.isPrefetch() && !s3.io.in.ready + } else { false.B } + */ + PipelineConnect(s1.io.out, s2.io.in, s2.io.out.fire(), io.flush(0)) + PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1)) + io.in.resp <> s3.io.out + s3.io.flush := io.flush(1) + io.out.mem <> s3.io.mem + io.mmio <> s3.io.mmio + io.empty := !s2.io.in.valid && !s3.io.in.valid + + io.in.resp.valid := Mux(s3.io.out.valid && s3.io.out.bits.isPrefetch(), false.B, s3.io.out.valid || s3.io.dataReadRespToL1) + + if (hasCoh) { + val cohReq = io.out.coh.req.bits + // coh does not have user signal, any better code? + val coh = Wire(new SimpleBusReqBundle(userBits = userBits, idBits = idBits)) + coh.apply(addr = cohReq.addr, cmd = cohReq.cmd, size = cohReq.size, wdata = cohReq.wdata, wmask = cohReq.wmask) + arb.io.in(0).bits := coh + arb.io.in(0).valid := io.out.coh.req.valid + io.out.coh.req.ready := arb.io.in(0).ready + io.out.coh.resp <> s3.io.cohResp + } else { + io.out.coh.req.ready := true.B + io.out.coh.resp := DontCare + io.out.coh.resp.valid := false.B + s3.io.cohResp.ready := true.B + } + + metaArray.io.r(0) <> s1.io.metaReadBus + dataArray.io.r(0) <> s1.io.dataReadBus + dataArray.io.r(1) <> s3.io.dataReadBus + + metaArray.io.w <> s3.io.metaWriteBus + dataArray.io.w <> s3.io.dataWriteBus + + s2.io.metaReadResp := s1.io.metaReadBus.resp.data + s2.io.dataReadResp := s1.io.dataReadBus.resp.data + s2.io.dataWriteBus := s3.io.dataWriteBus + s2.io.metaWriteBus := s3.io.metaWriteBus + + if (EnableOutOfOrderExec) { + BoringUtils.addSource(s3.io.out.fire() && s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Hit") + BoringUtils.addSource(s3.io.in.valid && !s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Loss") + BoringUtils.addSource(s1.io.in.fire(), "perfCntCondM" + cacheName + "Req") + } + // io.in.dump(cacheName + ".in") + Debug("InReq(%d, %d) InResp(%d, %d) \n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready) + Debug("{IN s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)} {OUT s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)}\n", s1.io.in.valid, s1.io.in.ready, s2.io.in.valid, s2.io.in.ready, s3.io.in.valid, s3.io.in.ready, s1.io.out.valid, s1.io.out.ready, s2.io.out.valid, s2.io.out.ready, s3.io.out.valid, s3.io.out.ready) + when (s1.io.in.valid) { Debug(p"[${cacheName}.S1]: ${s1.io.in.bits}\n") } + when (s2.io.in.valid) { Debug(p"[${cacheName}.S2]: ${s2.io.in.bits.req}\n") } + when (s3.io.in.valid) { Debug(p"[${cacheName}.S3]: ${s3.io.in.bits.req}\n") } + //s3.io.mem.dump(cacheName + ".mem") +} + +class Cache_fake(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { + val s_idle :: s_memReq :: s_memResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: Nil = Enum(6) + val state = RegInit(s_idle) + + val ismmio = AddressSpace.isMMIO(io.in.req.bits.addr) + val ismmioRec = RegEnable(ismmio, io.in.req.fire()) + if (cacheConfig.name == "dcache") { + BoringUtils.addSource(ismmio, "lsuMMIO") + } + + val needFlush = RegInit(false.B) + when (io.flush(0) && (state =/= s_idle)) { needFlush := true.B } + when (state === s_idle && needFlush) { needFlush := false.B } + + val alreadyOutFire = RegEnable(true.B, init = false.B, io.in.resp.fire()) + + switch (state) { + is (s_idle) { + alreadyOutFire := false.B + when (io.in.req.fire() && !io.flush(0)) { state := Mux(ismmio, s_mmioReq, s_memReq) } + } + is (s_memReq) { + when (io.out.mem.req.fire()) { state := s_memResp } + } + is (s_memResp) { + when (io.out.mem.resp.fire()) { state := s_wait_resp } + } + is (s_mmioReq) { + when (io.mmio.req.fire()) { state := s_mmioResp } + } + is (s_mmioResp) { + when (io.mmio.resp.fire() || alreadyOutFire) { state := s_wait_resp } + } + is (s_wait_resp) { + when (io.in.resp.fire() || needFlush || alreadyOutFire) { state := s_idle } + } + } + + val reqaddr = RegEnable(io.in.req.bits.addr, io.in.req.fire()) + val cmd = RegEnable(io.in.req.bits.cmd, io.in.req.fire()) + val size = RegEnable(io.in.req.bits.size, io.in.req.fire()) + val wdata = RegEnable(io.in.req.bits.wdata, io.in.req.fire()) + val wmask = RegEnable(io.in.req.bits.wmask, io.in.req.fire()) + + io.in.req.ready := (state === s_idle) + io.in.resp.valid := (state === s_wait_resp) && (!needFlush) + + val mmiordata = RegEnable(io.mmio.resp.bits.rdata, io.mmio.resp.fire()) + val mmiocmd = RegEnable(io.mmio.resp.bits.cmd, io.mmio.resp.fire()) + val memrdata = RegEnable(io.out.mem.resp.bits.rdata, io.out.mem.resp.fire()) + val memcmd = RegEnable(io.out.mem.resp.bits.cmd, io.out.mem.resp.fire()) + + io.in.resp.bits.rdata := Mux(ismmioRec, mmiordata, memrdata) + io.in.resp.bits.cmd := Mux(ismmioRec, mmiocmd, memcmd) + + val memuser = RegEnable(io.in.req.bits.user.getOrElse(0.U), io.in.req.fire()) + io.in.resp.bits.user.zip(if (userBits > 0) Some(memuser) else None).map { case (o,i) => o := i } + + io.out.mem.req.bits.apply(addr = reqaddr, + cmd = cmd, size = size, + wdata = wdata, wmask = wmask) + io.out.mem.req.valid := (state === s_memReq) + io.out.mem.resp.ready := true.B + + io.mmio.req.bits.apply(addr = reqaddr, + cmd = cmd, size = size, + wdata = wdata, wmask = wmask) + io.mmio.req.valid := (state === s_mmioReq) + io.mmio.resp.ready := true.B + + io.empty := false.B + io.out.coh := DontCare + + Debug(io.in.req.fire(), p"in.req: ${io.in.req.bits}\n") + Debug(io.out.mem.req.fire(), p"out.mem.req: ${io.out.mem.req.bits}\n") + Debug(io.out.mem.resp.fire(), p"out.mem.resp: ${io.out.mem.resp.bits}\n") + Debug(io.in.resp.fire(), p"in.resp: ${io.in.resp.bits}\n") +} + +class Cache_dummy(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { + + val needFlush = RegInit(false.B) + when (io.flush(0)) { + needFlush := true.B + } + when (io.in.req.fire() && !io.flush(0)) { + needFlush := false.B + } + + io.in.req.ready := io.out.mem.req.ready + io.in.resp.valid := (io.out.mem.resp.valid && !needFlush) || io.flush(0) + + io.in.resp.bits.rdata := io.out.mem.resp.bits.rdata + io.in.resp.bits.cmd := io.out.mem.resp.bits.cmd + val memuser = RegEnable(io.in.req.bits.user.getOrElse(0.U), io.in.req.fire()) + io.in.resp.bits.user.zip(if (userBits > 0) Some(memuser) else None).map { case (o,i) => o := i } + + io.out.mem.req.bits.apply( + addr = io.in.req.bits.addr, + cmd = io.in.req.bits.cmd, + size = io.in.req.bits.size, + wdata = io.in.req.bits.wdata, + wmask = io.in.req.bits.wmask + ) + io.out.mem.req.valid := io.in.req.valid + io.out.mem.resp.ready := io.in.resp.ready + + io.empty := false.B + io.mmio := DontCare + io.out.coh := DontCare +} + +object Cache { + def apply(in: SimpleBusUC, mmio: Seq[SimpleBusUC], flush: UInt, empty: Bool, enable: Boolean = true)(implicit cacheConfig: CacheConfig) = { + val cache = if (enable) Module(new Cache) + else (if (Settings.get("IsRV32")) + (if (cacheConfig.name == "dcache") Module(new Cache_fake) else Module(new Cache_dummy)) + else + (Module(new Cache_fake))) + cache.io.flush := flush + cache.io.in <> in + mmio(0) <> cache.io.mmio + empty := cache.io.empty + cache.io.out + } +} diff --git a/src/main/scala/nutcore/mem/cache/CacheCheck.scala b/src/main/scala/nutcore/mem/cache/CacheCheck.scala new file mode 100644 index 000000000..26bb942d4 --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheCheck.scala @@ -0,0 +1,89 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + + +// meta read +// check +class CacheStageCheck(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageCheckIO extends Bundle { + val in = Flipped(Decoupled(new Stage1IO)) + val out = Decoupled(new Stage2IO) + val metaReadResp = Flipped(Vec(Ways, new MetaBundle)) + val dataReadResp = Flipped(Vec(Ways, new DataBundle)) + val metaWriteBus = Input(CacheMetaArrayWriteBus()) + val dataWriteBus = Input(CacheDataArrayWriteBus()) + } + val io = IO(new CacheStageCheckIO) + + val req = io.in.bits.req + val addr = req.addr.asTypeOf(addrBundle) + + val isForwardMeta = io.in.valid && io.metaWriteBus.req.valid && io.metaWriteBus.req.bits.setIdx === getMetaIdx(req.addr) + val isForwardMetaReg = RegInit(false.B) + when (isForwardMeta) { isForwardMetaReg := true.B } + when (io.in.fire() || !io.in.valid) { isForwardMetaReg := false.B } + val forwardMetaReg = RegEnable(io.metaWriteBus.req.bits, isForwardMeta) + + val metaWay = Wire(Vec(Ways, chiselTypeOf(forwardMetaReg.data))) + val pickForwardMeta = isForwardMetaReg || isForwardMeta + val forwardMeta = Mux(isForwardMeta, io.metaWriteBus.req.bits, forwardMetaReg) + val forwardWaymask = forwardMeta.waymask.getOrElse("1".U).asBools + forwardWaymask.zipWithIndex.map { case (w, i) => + metaWay(i) := Mux(pickForwardMeta && w, forwardMeta.data, io.metaReadResp(i)) + } + + val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt + val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U + + val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt + val hasInvalidWay = invalidVec.orR + val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U, + Mux(invalidVec >= 4.U, "b0100".U, + Mux(invalidVec >= 2.U, "b0010".U, "b0001".U))) + + // val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask) + val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask)) + when(PopCount(waymask) > 1.U){ + metaWay.map(m => Debug("[ERROR] metaWay %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) + io.metaReadResp.map(m => Debug("[ERROR] metaReadResp %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) + Debug("[ERROR] forwardMetaReg isForwardMetaReg %x %x metat %x wm %b\n", isForwardMetaReg, forwardMetaReg.data.valid, forwardMetaReg.data.tag, forwardMetaReg.waymask.get) + Debug("[ERROR] forwardMeta isForwardMeta %x %x metat %x wm %b\n", isForwardMeta, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.tag, io.metaWriteBus.req.bits.waymask.get) + } + when(PopCount(waymask) > 1.U){Debug("[ERROR] hit %b wmask %b hitvec %b\n", io.out.bits.hit, forwardMeta.waymask.getOrElse("1".U), hitVec)} + assert(!(io.in.valid && PopCount(waymask) > 1.U)) + + io.out.bits.metas := metaWay + io.out.bits.hit := io.in.valid && hitVec.orR + io.out.bits.waymask := waymask + io.out.bits.datas := io.dataReadResp + io.out.bits.mmio := AddressSpace.isMMIO(req.addr) + + val isForwardData = io.in.valid && (io.dataWriteBus.req match { case r => + r.valid && r.bits.setIdx === getDataIdx(req.addr) + }) + val isForwardDataReg = RegInit(false.B) + when (isForwardData) { isForwardDataReg := true.B } + when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B } + val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData) + io.out.bits.isForwardData := isForwardDataReg || isForwardData + io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg) + + io.out.bits.req <> req + io.out.valid := io.in.valid + io.in.ready := !io.in.valid || io.out.fire() + + Debug("[isFD:%d isFDreg:%d inFire:%d invalid:%d \n", isForwardData, isForwardDataReg, io.in.fire(), io.in.valid) + Debug("[isFM:%d isFMreg:%d metawreq:%x widx:%x ridx:%x \n", isForwardMeta, isForwardMetaReg, io.metaWriteBus.req.valid, io.metaWriteBus.req.bits.setIdx, getMetaIdx(req.addr)) +} \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala new file mode 100644 index 000000000..43583ed8d --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala @@ -0,0 +1,54 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + + +class Stage1IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) +} + +class CacheStageMetaRead(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageMetaReadIO extends Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, idBits = idBits))) + val out = Decoupled(new Stage1IO) + val metaReadBus = CacheMetaArrayReadBus() + val dataReadBus = CacheDataArrayReadBus() + } + val io = IO(new CacheStageMetaReadIO) + + if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) } + Debug(io.in.fire(), "[L1$] cache stage1, addr in: %x, user: %x id: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U), io.in.bits.id.getOrElse(0.U)) + + // read meta array and data array + val readBusValid = io.in.valid && io.out.ready + io.metaReadBus.apply(valid = readBusValid, setIdx = getMetaIdx(io.in.bits.addr)) + io.dataReadBus.apply(valid = readBusValid, setIdx = getDataIdx(io.in.bits.addr)) + + io.out.bits.req := io.in.bits + io.out.valid := io.in.valid && io.metaReadBus.req.ready && io.dataReadBus.req.ready + io.in.ready := (!io.in.valid || io.out.fire()) && io.metaReadBus.req.ready && io.dataReadBus.req.ready + + Debug("in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n", io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid) +} + +class Stage2IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) + val metas = Vec(Ways, new MetaBundle) + val datas = Vec(Ways, new DataBundle) + val hit = Output(Bool()) + val waymask = Output(UInt(Ways.W)) + val mmio = Output(Bool()) + val isForwardData = Output(Bool()) + val forwardData = Output(CacheDataArrayWriteBus().req.bits) +} diff --git a/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala new file mode 100644 index 000000000..e571604b7 --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala @@ -0,0 +1,258 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + +// writeback +class CacheStageWriteBack(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageWriteBackIO extends Bundle { + val in = Flipped(Decoupled(new Stage2IO)) + val out = Decoupled(new SimpleBusRespBundle(userBits = userBits, idBits = idBits)) + val isFinish = Output(Bool()) + val flush = Input(Bool()) + val dataReadBus = CacheDataArrayReadBus() + val dataWriteBus = CacheDataArrayWriteBus() + val metaWriteBus = CacheMetaArrayWriteBus() + + val mem = new SimpleBusUC + val mmio = new SimpleBusUC + val cohResp = Decoupled(new SimpleBusRespBundle) + + // use to distinguish prefetch request and normal request + val dataReadRespToL1 = Output(Bool()) + } + val io = IO(new CacheStageWriteBackIO) + + val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) + val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2)) + + val req = io.in.bits.req + val addr = req.addr.asTypeOf(addrBundle) + val mmio = io.in.valid && io.in.bits.mmio + val hit = io.in.valid && io.in.bits.hit + val miss = io.in.valid && !io.in.bits.hit + val probe = io.in.valid && hasCoh.B && req.isProbe() + val hitReadBurst = hit && req.isReadBurst() + val meta = Mux1H(io.in.bits.waymask, io.in.bits.metas) + assert(!(mmio && hit), "MMIO request should not hit in cache") + + + // this is ugly + if (cacheName == "dcache") { + BoringUtils.addSource(mmio, "lsuMMIO") + } + + val useForwardData = io.in.bits.isForwardData && io.in.bits.waymask === io.in.bits.forwardData.waymask.getOrElse("b1".U) + val dataReadArray = Mux1H(io.in.bits.waymask, io.in.bits.datas).data + val dataRead = Mux(useForwardData, io.in.bits.forwardData.data.data, dataReadArray) + val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W)) + + val writeL2BeatCnt = Counter(LineBeats) + when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) { + writeL2BeatCnt.inc() + } + + val hitWrite = hit && req.isWrite() + val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply( + data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)), + valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask) + + val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply( + valid = hitWrite && !meta.dirty, setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask, + data = Wire(new MetaBundle).apply(tag = meta.tag, valid = true.B, dirty = (!ro).B) + ) + + val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9) + val state = RegInit(s_idle) + val needFlush = RegInit(false.B) + + when (io.flush && (state =/= s_idle)) { needFlush := true.B } + when (io.out.fire() && needFlush) { needFlush := false.B } + + val readBeatCnt = Counter(LineBeats) + val writeBeatCnt = Counter(LineBeats) + + val s2_idle :: s2_dataReadWait :: s2_dataOK :: Nil = Enum(3) + val state2 = RegInit(s2_idle) + + io.dataReadBus.apply(valid = (state === s_memWriteReq || state === s_release) && (state2 === s2_idle), + setIdx = Cat(addr.index, Mux(state === s_release, readBeatCnt.value, writeBeatCnt.value))) + val dataWay = RegEnable(io.dataReadBus.resp.data, state2 === s2_dataReadWait) + val dataHitWay = Mux1H(io.in.bits.waymask, dataWay).data + + switch (state2) { + is (s2_idle) { when (io.dataReadBus.req.fire()) { state2 := s2_dataReadWait } } + is (s2_dataReadWait) { state2 := s2_dataOK } + is (s2_dataOK) { when (io.mem.req.fire() || io.cohResp.fire() || hitReadBurst && io.out.ready) { state2 := s2_idle } } + } + + // critical word first read + val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W)) + else Cat(req.addr(PAddrBits-1,2), 0.U(2.W))) + // dirty block addr + val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W)) + val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst, + Mux((writeBeatCnt.value === (LineBeats - 1).U), SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) + io.mem.req.bits.apply(addr = Mux(state === s_memReadReq, raddr, waddr), + cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), + wdata = dataHitWay, wmask = Fill(DataBytes, 1.U)) + + io.mem.resp.ready := true.B + io.mem.req.valid := (state === s_memReadReq) || ((state === s_memWriteReq) && (state2 === s2_dataOK)) + + // mmio + io.mmio.req.bits := req + io.mmio.resp.ready := true.B + io.mmio.req.valid := (state === s_mmioReq) + + val afterFirstRead = RegInit(false.B) + val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire()) + val readingFirst = !afterFirstRead && io.mem.resp.fire() && (state === s_memReadResp) + val inRdataRegDemand = RegEnable(Mux(mmio, io.mmio.resp.bits.rdata, io.mem.resp.bits.rdata), + Mux(mmio, state === s_mmioResp, readingFirst)) + + // probe + io.cohResp.valid := ((state === s_idle) && probe) || + ((state === s_release) && (state2 === s2_dataOK)) + io.cohResp.bits.rdata := dataHitWay + val releaseLast = Counter(state === s_release && io.cohResp.fire(), LineBeats)._2 + io.cohResp.bits.cmd := Mux(state === s_release, Mux(releaseLast, SimpleBusCmd.readLast, 0.U), + Mux(hit, SimpleBusCmd.probeHit, SimpleBusCmd.probeMiss)) + + val respToL1Fire = hitReadBurst && io.out.ready && state2 === s2_dataOK + val respToL1Last = Counter((state === s_idle || state === s_release && state2 === s2_dataOK) && hitReadBurst && io.out.ready, LineBeats)._2 + + switch (state) { + is (s_idle) { + afterFirstRead := false.B + alreadyOutFire := false.B + + when (probe) { + when (io.cohResp.fire()) { + state := Mux(hit, s_release, s_idle) + readBeatCnt.value := addr.wordIndex + } + } .elsewhen (hitReadBurst && io.out.ready) { + state := s_release + readBeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)) + } .elsewhen ((miss || mmio) && !io.flush) { + state := Mux(mmio, s_mmioReq, Mux(!ro.B && meta.dirty, s_memWriteReq, s_memReadReq)) + } + } + + is (s_mmioReq) { when (io.mmio.req.fire()) { state := s_mmioResp } } + is (s_mmioResp) { when (io.mmio.resp.fire()) { state := s_wait_resp } } + + is (s_release) { + when (io.cohResp.fire() || respToL1Fire) { readBeatCnt.inc() } + when (probe && io.cohResp.fire() && releaseLast || respToL1Fire && respToL1Last) { state := s_idle } + } + + is (s_memReadReq) { when (io.mem.req.fire()) { + state := s_memReadResp + readBeatCnt.value := addr.wordIndex + }} + + is (s_memReadResp) { + when (io.mem.resp.fire()) { + afterFirstRead := true.B + readBeatCnt.inc() + when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U } + when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp } + } + } + + is (s_memWriteReq) { + when (io.mem.req.fire()) { writeBeatCnt.inc() } + when (io.mem.req.bits.isWriteLast() && io.mem.req.fire()) { state := s_memWriteResp } + } + + is (s_memWriteResp) { when (io.mem.resp.fire()) { state := s_memReadReq } } + is (s_wait_resp) { when (io.out.fire() || needFlush || alreadyOutFire) { state := s_idle } } + } + + val dataRefill = MaskData(io.mem.resp.bits.rdata, req.wdata, Mux(readingFirst, wordMask, 0.U(DataBits.W))) + val dataRefillWriteBus = Wire(CacheDataArrayWriteBus).apply( + valid = (state === s_memReadResp) && io.mem.resp.fire(), setIdx = Cat(addr.index, readBeatCnt.value), + data = Wire(new DataBundle).apply(dataRefill), waymask = io.in.bits.waymask) + + dataWriteArb.io.in(0) <> dataHitWriteBus.req + dataWriteArb.io.in(1) <> dataRefillWriteBus.req + io.dataWriteBus.req <> dataWriteArb.io.out + + val metaRefillWriteBus = Wire(CacheMetaArrayWriteBus()).apply( + valid = (state === s_memReadResp) && io.mem.resp.fire() && io.mem.resp.bits.isReadLast(), + data = Wire(new MetaBundle).apply(valid = true.B, tag = addr.tag, dirty = !ro.B && req.isWrite()), + setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask + ) + + metaWriteArb.io.in(0) <> metaHitWriteBus.req + metaWriteArb.io.in(1) <> metaRefillWriteBus.req + io.metaWriteBus.req <> metaWriteArb.io.out + + if (cacheLevel == 2) { + when ((state === s_memReadResp) && io.mem.resp.fire() && req.isReadBurst()) { + // readBurst request miss + io.out.bits.rdata := dataRefill + io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst) + }.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) { + // writeBurst/writeLast request, no matter hit or miss + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := DontCare + }.elsewhen (hitReadBurst && state === s_release) { + // readBurst request hit + io.out.bits.rdata := dataHitWay + io.out.bits.cmd := Mux(respToL1Last, SimpleBusCmd.readLast, SimpleBusCmd.readBurst) + }.otherwise { + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := req.cmd + } + } else { + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover + } + io.out.bits.user.zip(req.user).map { case (o,i) => o := i } + io.out.bits.id.zip(req.id).map { case (o,i) => o := i } + + io.out.valid := io.in.valid && Mux(req.isBurst() && (cacheLevel == 2).B, + Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || (respToL1Fire && respToL1Last && state === s_release), + Mux(probe, false.B, Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))) + ) + + // With critical-word first, the pipeline registers between + // s2 and s3 can not be overwritten before a missing request + // is totally handled. We use io.isFinish to indicate when the + // request really ends. + io.isFinish := Mux(probe, io.cohResp.fire() && Mux(miss, state === s_idle, (state === s_release) && releaseLast), + Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire)) + ) + + io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe + io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK) + + assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid)) + assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid)) + assert(!(!ro.B && io.flush), "only allow to flush icache") + Debug(" metaread idx %x waymask %b metas %x%x:%x %x%x:%x %x%x:%x %x%x:%x %x\n", getMetaIdx(req.addr), io.in.bits.waymask.asUInt, io.in.bits.metas(0).valid, io.in.bits.metas(0).dirty, io.in.bits.metas(0).tag, io.in.bits.metas(1).valid, io.in.bits.metas(1).dirty, io.in.bits.metas(1).tag, io.in.bits.metas(2).valid, io.in.bits.metas(2).dirty, io.in.bits.metas(2).tag, io.in.bits.metas(3).valid, io.in.bits.metas(3).dirty, io.in.bits.metas(3).tag, io.in.bits.datas.asUInt) + Debug(io.metaWriteBus.req.fire(), "%d: [" + cacheName + " S3]: metawrite idx %x wmask %b meta %x%x:%x\n", GTimer(), io.metaWriteBus.req.bits.setIdx, io.metaWriteBus.req.bits.waymask.get, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.dirty, io.metaWriteBus.req.bits.data.tag) + Debug(" in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n", io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish) + Debug(" out.valid:%d rdata:%x cmd:%d user:%x id:%x \n", io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U), io.out.bits.id.getOrElse(0.U)) + Debug(" DHW: (%d, %d), data:%x setIdx:%x MHW:(%d, %d)\n", dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, dataHitWriteBus.req.bits.setIdx, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready) + Debug(" DreadCache: %x \n", io.in.bits.datas.asUInt) + Debug(" useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n", useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U)) + Debug(io.dataWriteBus.req.fire(), "[WB] waymask: %b data:%x setIdx:%x\n", + io.dataWriteBus.req.bits.waymask.get.asUInt, io.dataWriteBus.req.bits.data.asUInt, io.dataWriteBus.req.bits.setIdx) + Debug((state === s_memWriteReq) && io.mem.req.fire(), "[COUTW] cnt %x addr %x data %x cmd %x size %x wmask %x tag %x idx %x waymask %b \n", writeBeatCnt.value, io.mem.req.bits.addr, io.mem.req.bits.wdata, io.mem.req.bits.cmd, io.mem.req.bits.size, io.mem.req.bits.wmask, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) + Debug((state === s_memReadReq) && io.mem.req.fire(), "[COUTR] addr %x tag %x idx %x waymask %b \n", io.mem.req.bits.addr, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) + Debug((state === s_memReadResp) && io.mem.resp.fire(), "[COUTR] cnt %x data %x tag %x idx %x waymask %b \n", readBeatCnt.value, io.mem.resp.bits.rdata, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) +} diff --git a/src/main/scala/nutcore/mem/tlb/Embedded.scala b/src/main/scala/nutcore/mem/tlb/Embedded.scala new file mode 100644 index 000000000..6c520a303 --- /dev/null +++ b/src/main/scala/nutcore/mem/tlb/Embedded.scala @@ -0,0 +1,211 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.mem.tlb + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + +trait HasTLBIO extends HasNutCoreParameter with HasTlbConst with HasCSRConst { + class TLBIO extends Bundle { + val in = Flipped(new SimpleBusUC(userBits = userBits, addrBits = VAddrBits)) + val out = new SimpleBusUC(userBits = userBits) + + val mem = new SimpleBusUC() + val flush = Input(Bool()) + val csrMMU = new MMUIO + val cacheEmpty = Input(Bool()) + val ipf = Output(Bool()) + } + val io = IO(new TLBIO) +} + +// Duplicate with TLBMD. Consider eliminate one. +class EmbeddedTLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { + val io = IO(new Bundle { + val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) + val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) + val rindex = Input(UInt(IndexBits.W)) + val ready = Output(Bool()) + }) + + //val tlbmd = Reg(Vec(Ways, UInt(tlbLen.W))) + val tlbmd = Mem(Sets, Vec(Ways, UInt(tlbLen.W))) + io.tlbmd := tlbmd(io.rindex) + + //val reset = WireInit(false.B) + val resetState = RegInit(true.B)//RegEnable(true.B, init = true.B, reset) + val (resetSet, resetFinish) = Counter(resetState, Sets) + when (resetFinish) { resetState := false.B } + + val writeWen = io.write.wen//WireInit(false.B) + val writeSetIdx = io.write.windex + val writeWayMask = io.write.waymask + val writeData = io.write.wdata + + val wen = Mux(resetState, true.B, writeWen) + val setIdx = Mux(resetState, resetSet, writeSetIdx) + val waymask = Mux(resetState, Fill(Ways, "b1".U), writeWayMask) + val dataword = Mux(resetState, 0.U, writeData) + val wdata = VecInit(Seq.fill(Ways)(dataword)) + + when (wen) { tlbmd.write(setIdx, wdata, waymask.asBools) } + + io.ready := !resetState + def rready() = !resetState + def wready() = !resetState +} + +class EmbeddedTLB(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { + + val satp = WireInit(0.U(XLEN.W)) + BoringUtils.addSink(satp, "CSRSATP") + + // tlb exec + val tlbExec = Module(new EmbeddedTLBExec) + val tlbEmpty = Module(new EmbeddedTLBEmpty) + val mdTLB = Module(new EmbeddedTLBMD) + val mdUpdate = Wire(Bool()) + + tlbExec.io.flush := io.flush + tlbExec.io.satp := satp + tlbExec.io.mem <> io.mem + tlbExec.io.pf <> io.csrMMU + tlbExec.io.md <> RegEnable(mdTLB.io.tlbmd, mdUpdate) + tlbExec.io.mdReady := mdTLB.io.ready + mdTLB.io.rindex := getIndex(io.in.req.bits.addr) + mdTLB.io.write <> tlbExec.io.mdWrite + + io.ipf := false.B + + // meta reset + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + mdTLB.reset := reset.asBool || flushTLB + + // VM enable && io + val vmEnable = satp.asTypeOf(satpBundle).mode === 8.U && (io.csrMMU.priviledgeMode < ModeM) + + def PipelineConnectTLB[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], update: Bool, rightOutFire: Bool, isFlush: Bool, vmEnable: Bool) = { + val valid = RegInit(false.B) + when (rightOutFire) { valid := false.B } + when (left.valid && right.ready && vmEnable) { valid := true.B } + when (isFlush) { valid := false.B } + + left.ready := right.ready + right.bits <> RegEnable(left.bits, left.valid && right.ready) + right.valid := valid //&& !isFlush + + update := left.valid && right.ready + } + + tlbEmpty.io.in <> DontCare + tlbEmpty.io.out.ready := DontCare + PipelineConnectTLB(io.in.req, tlbExec.io.in, mdUpdate, tlbExec.io.isFinish, io.flush, vmEnable) + if(tlbname == "dtlb") { + PipelineConnect(tlbExec.io.out, tlbEmpty.io.in, tlbEmpty.io.out.fire(), io.flush) + } + when(!vmEnable) { + tlbExec.io.out.ready := true.B // let existed request go out + if( tlbname == "dtlb") { tlbEmpty.io.out.ready := true.B } + io.out.req.valid := io.in.req.valid + io.in.req.ready := io.out.req.ready + io.out.req.bits.addr := io.in.req.bits.addr(PAddrBits-1, 0) + io.out.req.bits.size := io.in.req.bits.size + io.out.req.bits.cmd := io.in.req.bits.cmd + io.out.req.bits.wmask := io.in.req.bits.wmask + io.out.req.bits.wdata := io.in.req.bits.wdata + io.out.req.bits.user.map(_ := io.in.req.bits.user.getOrElse(0.U)) + }.otherwise { + if (tlbname == "dtlb") { io.out.req <> tlbEmpty.io.out} + else { io.out.req <> tlbExec.io.out } + } + io.out.resp <> io.in.resp + + // lsu need dtlb signals + if(tlbname == "dtlb") { + val alreadyOutFinish = RegEnable(true.B, init=false.B, tlbExec.io.out.valid && !tlbExec.io.out.ready) + when(alreadyOutFinish && tlbExec.io.out.fire()) { alreadyOutFinish := false.B} + val tlbFinish = (tlbExec.io.out.valid && !alreadyOutFinish) || tlbExec.io.pf.isPF() + BoringUtils.addSource(tlbFinish, "DTLBFINISH") + BoringUtils.addSource(io.csrMMU.isPF(), "DTLBPF") + BoringUtils.addSource(vmEnable, "DTLBENABLE") + } + + // instruction page fault + if (tlbname == "itlb") { + when (tlbExec.io.ipf && vmEnable) { + tlbExec.io.out.ready := io.cacheEmpty && io.in.resp.ready + io.out.req.valid := false.B + } + + when (tlbExec.io.ipf && vmEnable && io.cacheEmpty) { + io.in.resp.valid := true.B + io.in.resp.bits.rdata := 0.U + io.in.resp.bits.cmd := SimpleBusCmd.readLast + io.in.resp.bits.user.map(_ := tlbExec.io.in.bits.user.getOrElse(0.U)) + io.ipf := tlbExec.io.ipf + } + } + + Debug("InReq(%d, %d) InResp(%d, %d) OutReq(%d, %d) OutResp(%d, %d) vmEnable:%d mode:%d\n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready, io.out.req.valid, io.out.req.ready, io.out.resp.valid, io.out.resp.ready, vmEnable, io.csrMMU.priviledgeMode) + Debug("InReq: addr:%x cmd:%d wdata:%x OutReq: addr:%x cmd:%x wdata:%x\n", io.in.req.bits.addr, io.in.req.bits.cmd, io.in.req.bits.wdata, io.out.req.bits.addr, io.out.req.bits.cmd, io.out.req.bits.wdata) + Debug("OutResp: rdata:%x cmd:%x Inresp: rdata:%x cmd:%x\n", io.out.resp.bits.rdata, io.out.resp.bits.cmd, io.in.resp.bits.rdata, io.in.resp.bits.cmd) + Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) +} + +class EmbeddedTLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { + val io = IO(new Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) + val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) + }) + + io.out <> io.in +} + +class EmbeddedTLB_fake(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { + io.mem <> DontCare + io.out <> io.in + io.csrMMU.loadPF := false.B + io.csrMMU.storePF := false.B + io.csrMMU.addr := io.in.req.bits.addr + io.ipf := false.B +} + + +object EmbeddedTLB { + def apply(in: SimpleBusUC, mem: SimpleBusUC, flush: Bool, csrMMU: MMUIO, enable: Boolean = true)(implicit tlbConfig: TLBConfig) = { + val tlb = if (enable) { + Module(new EmbeddedTLB) + } else { + Module(new EmbeddedTLB_fake) + } + tlb.io.in <> in + tlb.io.mem <> mem + tlb.io.flush := flush + tlb.io.csrMMU <> csrMMU + tlb + } +} \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/EmbeddedTLB.scala b/src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala similarity index 59% rename from src/main/scala/nutcore/mem/EmbeddedTLB.scala rename to src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala index e54aa64a1..fef3f02a0 100644 --- a/src/main/scala/nutcore/mem/EmbeddedTLB.scala +++ b/src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala @@ -1,177 +1,18 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ -package nutcore +package nutcore.mem.tlb import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils + +import nutcore._ + import bus.simplebus._ import bus.axi4._ import chisel3.experimental.IO import utils._ import top.Settings -trait HasTLBIO extends HasNutCoreParameter with HasTlbConst with HasCSRConst { - class TLBIO extends Bundle { - val in = Flipped(new SimpleBusUC(userBits = userBits, addrBits = VAddrBits)) - val out = new SimpleBusUC(userBits = userBits) - - val mem = new SimpleBusUC() - val flush = Input(Bool()) - val csrMMU = new MMUIO - val cacheEmpty = Input(Bool()) - val ipf = Output(Bool()) - } - val io = IO(new TLBIO) -} - -class EmbeddedTLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) - val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) - val rindex = Input(UInt(IndexBits.W)) - val ready = Output(Bool()) - }) - - //val tlbmd = Reg(Vec(Ways, UInt(tlbLen.W))) - val tlbmd = Mem(Sets, Vec(Ways, UInt(tlbLen.W))) - io.tlbmd := tlbmd(io.rindex) - - //val reset = WireInit(false.B) - val resetState = RegInit(true.B)//RegEnable(true.B, init = true.B, reset) - val (resetSet, resetFinish) = Counter(resetState, Sets) - when (resetFinish) { resetState := false.B } - - val writeWen = io.write.wen//WireInit(false.B) - val writeSetIdx = io.write.windex - val writeWayMask = io.write.waymask - val writeData = io.write.wdata - - val wen = Mux(resetState, true.B, writeWen) - val setIdx = Mux(resetState, resetSet, writeSetIdx) - val waymask = Mux(resetState, Fill(Ways, "b1".U), writeWayMask) - val dataword = Mux(resetState, 0.U, writeData) - val wdata = VecInit(Seq.fill(Ways)(dataword)) - - when (wen) { tlbmd.write(setIdx, wdata, waymask.asBools) } - - io.ready := !resetState - def rready() = !resetState - def wready() = !resetState -} - -class EmbeddedTLB(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { - - val satp = WireInit(0.U(XLEN.W)) - BoringUtils.addSink(satp, "CSRSATP") - - // tlb exec - val tlbExec = Module(new EmbeddedTLBExec) - val tlbEmpty = Module(new EmbeddedTLBEmpty) - val mdTLB = Module(new EmbeddedTLBMD) - val mdUpdate = Wire(Bool()) - - tlbExec.io.flush := io.flush - tlbExec.io.satp := satp - tlbExec.io.mem <> io.mem - tlbExec.io.pf <> io.csrMMU - tlbExec.io.md <> RegEnable(mdTLB.io.tlbmd, mdUpdate) - tlbExec.io.mdReady := mdTLB.io.ready - mdTLB.io.rindex := getIndex(io.in.req.bits.addr) - mdTLB.io.write <> tlbExec.io.mdWrite - - io.ipf := false.B - - // meta reset - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - mdTLB.reset := reset.asBool || flushTLB - - // VM enable && io - val vmEnable = satp.asTypeOf(satpBundle).mode === 8.U && (io.csrMMU.priviledgeMode < ModeM) - - def PipelineConnectTLB[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], update: Bool, rightOutFire: Bool, isFlush: Bool, vmEnable: Bool) = { - val valid = RegInit(false.B) - when (rightOutFire) { valid := false.B } - when (left.valid && right.ready && vmEnable) { valid := true.B } - when (isFlush) { valid := false.B } - - left.ready := right.ready - right.bits <> RegEnable(left.bits, left.valid && right.ready) - right.valid := valid //&& !isFlush - - update := left.valid && right.ready - } - - tlbEmpty.io.in <> DontCare - tlbEmpty.io.out.ready := DontCare - PipelineConnectTLB(io.in.req, tlbExec.io.in, mdUpdate, tlbExec.io.isFinish, io.flush, vmEnable) - if(tlbname == "dtlb") { - PipelineConnect(tlbExec.io.out, tlbEmpty.io.in, tlbEmpty.io.out.fire(), io.flush) - } - when(!vmEnable) { - tlbExec.io.out.ready := true.B // let existed request go out - if( tlbname == "dtlb") { tlbEmpty.io.out.ready := true.B } - io.out.req.valid := io.in.req.valid - io.in.req.ready := io.out.req.ready - io.out.req.bits.addr := io.in.req.bits.addr(PAddrBits-1, 0) - io.out.req.bits.size := io.in.req.bits.size - io.out.req.bits.cmd := io.in.req.bits.cmd - io.out.req.bits.wmask := io.in.req.bits.wmask - io.out.req.bits.wdata := io.in.req.bits.wdata - io.out.req.bits.user.map(_ := io.in.req.bits.user.getOrElse(0.U)) - }.otherwise { - if (tlbname == "dtlb") { io.out.req <> tlbEmpty.io.out} - else { io.out.req <> tlbExec.io.out } - } - io.out.resp <> io.in.resp - - // lsu need dtlb signals - if(tlbname == "dtlb") { - val alreadyOutFinish = RegEnable(true.B, init=false.B, tlbExec.io.out.valid && !tlbExec.io.out.ready) - when(alreadyOutFinish && tlbExec.io.out.fire()) { alreadyOutFinish := false.B} - val tlbFinish = (tlbExec.io.out.valid && !alreadyOutFinish) || tlbExec.io.pf.isPF() - BoringUtils.addSource(tlbFinish, "DTLBFINISH") - BoringUtils.addSource(io.csrMMU.isPF(), "DTLBPF") - BoringUtils.addSource(vmEnable, "DTLBENABLE") - } - - // instruction page fault - if (tlbname == "itlb") { - when (tlbExec.io.ipf && vmEnable) { - tlbExec.io.out.ready := io.cacheEmpty && io.in.resp.ready - io.out.req.valid := false.B - } - - when (tlbExec.io.ipf && vmEnable && io.cacheEmpty) { - io.in.resp.valid := true.B - io.in.resp.bits.rdata := 0.U - io.in.resp.bits.cmd := SimpleBusCmd.readLast - io.in.resp.bits.user.map(_ := tlbExec.io.in.bits.user.getOrElse(0.U)) - io.ipf := tlbExec.io.ipf - } - } - - Debug("InReq(%d, %d) InResp(%d, %d) OutReq(%d, %d) OutResp(%d, %d) vmEnable:%d mode:%d\n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready, io.out.req.valid, io.out.req.ready, io.out.resp.valid, io.out.resp.ready, vmEnable, io.csrMMU.priviledgeMode) - Debug("InReq: addr:%x cmd:%d wdata:%x OutReq: addr:%x cmd:%x wdata:%x\n", io.in.req.bits.addr, io.in.req.bits.cmd, io.in.req.bits.wdata, io.out.req.bits.addr, io.out.req.bits.cmd, io.out.req.bits.wdata) - Debug("OutResp: rdata:%x cmd:%x Inresp: rdata:%x cmd:%x\n", io.out.resp.bits.rdata, io.out.resp.bits.cmd, io.in.resp.bits.rdata, io.in.resp.bits.cmd) - Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) -} - class EmbeddedTLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ val io = IO(new Bundle { val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) @@ -395,38 +236,4 @@ class EmbeddedTLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) -} - -class EmbeddedTLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - }) - - io.out <> io.in -} - -class EmbeddedTLB_fake(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { - io.mem <> DontCare - io.out <> io.in - io.csrMMU.loadPF := false.B - io.csrMMU.storePF := false.B - io.csrMMU.addr := io.in.req.bits.addr - io.ipf := false.B -} - - -object EmbeddedTLB { - def apply(in: SimpleBusUC, mem: SimpleBusUC, flush: Bool, csrMMU: MMUIO, enable: Boolean = true)(implicit tlbConfig: TLBConfig) = { - val tlb = if (enable) { - Module(new EmbeddedTLB) - } else { - Module(new EmbeddedTLB_fake) - } - tlb.io.in <> in - tlb.io.mem <> mem - tlb.io.flush := flush - tlb.io.csrMMU <> csrMMU - tlb - } } \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/TLB.scala b/src/main/scala/nutcore/mem/tlb/TLB.scala similarity index 50% rename from src/main/scala/nutcore/mem/TLB.scala rename to src/main/scala/nutcore/mem/tlb/TLB.scala index bcd452174..5d1c71564 100644 --- a/src/main/scala/nutcore/mem/TLB.scala +++ b/src/main/scala/nutcore/mem/tlb/TLB.scala @@ -14,18 +14,20 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.mem.tlb import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import bus.simplebus._ import bus.axi4._ import utils._ import top.Settings -sealed trait Sv39Const extends HasNutCoreParameter{ +trait Sv39Const extends HasNutCoreParameter{ val Level = 3 val offLen = 12 val ppn0Len = 9 @@ -133,7 +135,7 @@ sealed trait Sv39Const extends HasNutCoreParameter{ } -sealed case class TLBConfig ( +case class TLBConfig ( name: String = "tlb", userBits: Int = 0, @@ -204,7 +206,7 @@ trait HasTlbConst extends Sv39Const{ abstract class TlbBundle(implicit tlbConfig: TLBConfig) extends NutCoreBundle with HasNutCoreParameter with HasTlbConst with Sv39Const abstract class TlbModule(implicit tlbConfig: TLBConfig) extends NutCoreModule with HasNutCoreParameter with HasTlbConst with Sv39Const with HasCSRConst -sealed class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: Int) extends Bundle with HasNutCoreParameter with Sv39Const { +class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: Int) extends Bundle with HasNutCoreParameter with Sv39Const { val wen = Output(Bool()) val windex = Output(UInt(IndexBits.W)) val waymask = Output(UInt(Ways.W)) @@ -218,7 +220,7 @@ sealed class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: In } } -sealed class TLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { +class TLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { class TLBMDIO extends Bundle { val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) @@ -361,247 +363,8 @@ class TLB(implicit val tlbConfig: TLBConfig) extends TlbModule{ Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) } -sealed class TLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ - class TLBExecIO extends Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - - val md = Input(Vec(Ways, UInt(tlbLen.W))) - val mdWrite = new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen) - val mdReady = Input(Bool()) - - val mem = new SimpleBusUC(userBits = userBits) - val flush = Input(Bool()) - val satp = Input(UInt(XLEN.W)) - val pf = new MMUIO - val ipf = Output(Bool()) - val isFinish = Output(Bool()) - } - val io = IO(new TLBExecIO) - - val md = io.md//RegEnable(mdTLB.io.tlbmd, io.in.ready) - - // lazy renaming - val req = io.in.bits - val vpn = req.addr.asTypeOf(vaBundle2).vpn.asTypeOf(vpnBundle) - val pf = io.pf - val satp = io.satp.asTypeOf(satpBundle) - val ifecth = if(tlbname == "itlb") true.B else false.B - - // pf init - pf.loadPF := false.B - pf.storePF := false.B - pf.addr := req.addr - - // check hit or miss - val hitVec = VecInit(md.map(m => m.asTypeOf(tlbBundle).flag.asTypeOf(flagBundle).v && (m.asTypeOf(tlbBundle).asid === satp.asid) && MaskEQ(m.asTypeOf(tlbBundle).mask, m.asTypeOf(tlbBundle).vpn, vpn.asUInt))).asUInt - val hit = io.in.valid && hitVec.orR - val miss = io.in.valid && !hitVec.orR - - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - val waymask = Mux(hit, hitVec, victimWaymask) - - val loadPF = WireInit(false.B) - val storePF = WireInit(false.B) - - // hit - val hitMeta = Mux1H(waymask, md).asTypeOf(tlbBundle2).meta.asTypeOf(metaBundle) - val hitData = Mux1H(waymask, md).asTypeOf(tlbBundle2).data.asTypeOf(dataBundle) - val hitFlag = hitMeta.flag.asTypeOf(flagBundle) - val hitMask = hitMeta.mask - // hit write back pte.flag - val hitinstrPF = WireInit(false.B) - val hitWB = hit && (!hitFlag.a || !hitFlag.d && req.isWrite()) && !hitinstrPF && !(loadPF || storePF || io.pf.isPF()) - val hitRefillFlag = Cat(req.isWrite().asUInt, 1.U(1.W), 0.U(6.W)) | hitFlag.asUInt - val hitWBStore = RegEnable(Cat(0.U(10.W), hitData.ppn, 0.U(2.W), hitRefillFlag), hitWB) - - // hit permission check - val hitCheck = hit /*&& hitFlag.v */&& !(pf.priviledgeMode === ModeU && !hitFlag.u) && !(pf.priviledgeMode === ModeS && hitFlag.u && (!pf.status_sum || ifecth)) - val hitExec = hitCheck && hitFlag.x - val hitLoad = hitCheck && (hitFlag.r || pf.status_mxr && hitFlag.x) - val hitStore = hitCheck && hitFlag.w - - io.pf.loadPF := loadPF //RegNext(loadPF, init =false.B) - io.pf.storePF := storePF //RegNext(storePF, init = false.B) - - if (tlbname == "itlb") { hitinstrPF := !hitExec && hit} - if (tlbname == "dtlb") { - loadPF := !hitLoad && req.isRead() && hit - storePF := (!hitStore && req.isWrite() && hit) - // AMO pagefault type will be fixed in LSU - } - - // miss - val s_idle :: s_memReadReq :: s_memReadResp :: s_write_pte :: s_wait_resp :: s_miss_slpf :: Nil = Enum(6) - val state = RegInit(s_idle) - val level = RegInit(Level.U(log2Up(Level).W)) - - val memRespStore = Reg(UInt(XLEN.W)) - val missMask = WireInit("h3ffff".U(maskLen.W)) - val missMaskStore = Reg(UInt(maskLen.W)) - val missMetaRefill = WireInit(false.B) - val missRefillFlag = WireInit(0.U(8.W)) - val memRdata = io.mem.resp.bits.rdata.asTypeOf(pteBundle) - val raddr = Reg(UInt(PAddrBits.W)) - val alreadyOutFire = RegEnable(true.B, init = false.B, if(tlbname == "itlb") io.out.fire else io.out.valid) - - //handle flush - val needFlush = RegInit(false.B) - val ioFlush = io.flush - val isFlush = needFlush || ioFlush - when (ioFlush && (state =/= s_idle)) { needFlush := true.B} - if(tlbname == "itlb"){ - when (io.out.fire() && needFlush) { needFlush := false.B} - } - if(tlbname == "dtlb"){ - when (io.out.valid && needFlush) { needFlush := false.B} - } - - val missIPF = RegInit(false.B) - - // state machine to handle miss(ptw) and pte-writing-back - switch (state) { - is (s_idle) { - when (!ioFlush && hitWB) { - state := s_write_pte - needFlush := false.B - alreadyOutFire := false.B - }.elsewhen (miss && !ioFlush) { - state := s_memReadReq - raddr := paddrApply(satp.ppn, vpn.vpn2) // - level := Level.U - needFlush := false.B - alreadyOutFire := false.B - } - } - - is (s_memReadReq) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_memReadResp} - } - - is (s_memReadResp) { - val missflag = memRdata.flag.asTypeOf(flagBundle) - when (io.mem.resp.fire()) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (!(missflag.r || missflag.x) && (level===3.U || level===2.U)) { - when(!missflag.v || (!missflag.r && missflag.w)) { //TODO: fix needflush - if(tlbname == "itlb") { state := s_wait_resp } else { state := s_miss_slpf } - if(tlbname == "itlb") { missIPF := true.B } - if(tlbname == "dtlb") { - loadPF := req.isRead() - storePF := req.isWrite() - } - Debug("tlbException!!! ") - Debug(false, p" req:${req} Memreq:${io.mem.req} MemResp:${io.mem.resp}") - Debug(false, " level:%d",level) - Debug(false, "\n") - }.otherwise { - state := s_memReadReq - raddr := paddrApply(memRdata.ppn, Mux(level === 3.U, vpn.vpn1, vpn.vpn0)) - } - }.elsewhen (level =/= 0.U) { //TODO: fix needFlush - val permCheck = missflag.v && !(pf.priviledgeMode === ModeU && !missflag.u) && !(pf.priviledgeMode === ModeS && missflag.u && (!pf.status_sum || ifecth)) - val permExec = permCheck && missflag.x - val permLoad = permCheck && (missflag.r || pf.status_mxr && missflag.x) - val permStore = permCheck && missflag.w - val updateAD = if (Settings.get("FPGAPlatform")) !missflag.a || (!missflag.d && req.isWrite()) else false.B - val updateData = Cat( 0.U(56.W), req.isWrite(), 1.U(1.W), 0.U(6.W) ) - missRefillFlag := Cat(req.isWrite(), 1.U(1.W), 0.U(6.W)) | missflag.asUInt - memRespStore := io.mem.resp.bits.rdata | updateData - if(tlbname == "itlb") { - when (!permExec) { missIPF := true.B ; state := s_wait_resp} - .otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - if(tlbname == "dtlb") { - when((!permLoad && req.isRead()) || (!permStore && req.isWrite())) { - state := s_miss_slpf - loadPF := req.isRead() - storePF := req.isWrite() - }.otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - missMask := Mux(level===3.U, 0.U(maskLen.W), Mux(level===2.U, "h3fe00".U(maskLen.W), "h3ffff".U(maskLen.W))) - missMaskStore := missMask - } - level := level - 1.U - } - } - - is (s_write_pte) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_wait_resp } - } - - is (s_wait_resp) { - if(tlbname == "itlb"){ - when (io.out.fire() || ioFlush || alreadyOutFire){ - state := s_idle - missIPF := false.B - alreadyOutFire := false.B - } - } - if(tlbname == "dtlb"){ - state := s_idle - missIPF := false.B - alreadyOutFire := false.B - } - } - - is (s_miss_slpf) { - state := s_idle - } - } - - // mem - val cmd = Mux(state === s_write_pte, SimpleBusCmd.write, SimpleBusCmd.read) - io.mem.req.bits.apply(addr = Mux(hitWB, hitData.pteaddr, raddr), cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), wdata = Mux( hitWB, hitWBStore, memRespStore), wmask = 0xff.U) - io.mem.req.valid := ((state === s_memReadReq || state === s_write_pte) && !isFlush) - io.mem.resp.ready := true.B - - // tlb refill - io.mdWrite.apply(wen = RegNext((missMetaRefill && !isFlush) || (hitWB && state === s_idle && !isFlush), init = false.B), - windex = RegNext(getIndex(req.addr)), waymask = RegNext(waymask), vpn = RegNext(vpn.asUInt), - asid = RegNext(Mux(hitWB, hitMeta.asid, satp.asid)), mask = RegNext(Mux(hitWB, hitMask, missMask)), - flag = RegNext(Mux(hitWB, hitRefillFlag, missRefillFlag)), ppn = RegNext(Mux(hitWB, hitData.ppn, memRdata.ppn)), - pteaddr = RegNext((Mux(hitWB, hitData.pteaddr, raddr)))) - - // io - io.out.bits := req - io.out.bits.addr := Mux(hit, maskPaddr(hitData.ppn, req.addr(PAddrBits-1, 0), hitMask), maskPaddr(memRespStore.asTypeOf(pteBundle).ppn, req.addr(PAddrBits-1, 0), missMaskStore)) - io.out.valid := io.in.valid && Mux(hit && !hitWB, !(io.pf.isPF() || loadPF || storePF), state === s_wait_resp)// && !alreadyOutFire - - io.in.ready := io.out.ready && (state === s_idle) && !miss && !hitWB && io.mdReady && (!io.pf.isPF() && !loadPF && !storePF)//maybe be optimized - - io.ipf := Mux(hit, hitinstrPF, missIPF) - io.isFinish := io.out.fire() || io.pf.isPF() - - if(tlbname == "dtlb") { - io.isFinish := io.out.valid || io.pf.isPF() - io.out.valid := io.in.valid && (Mux(hit && !hitWB, true.B, state === s_wait_resp) || loadPF || storePF)// && !alreadyOutFire - } - Debug("In(%d, %d) Out(%d, %d) InAddr:%x OutAddr:%x cmd:%d \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, req.addr, io.out.bits.addr, req.cmd) - Debug("io.Flush:%d needFlush:%d alreadyOutFire:%d isFinish:%d\n", io.flush, needFlush, alreadyOutFire, io.isFinish) - Debug("hit:%d hitWB:%d hitVPN:%x hitFlag:%x hitPPN:%x hitRefillFlag:%x hitWBStore:%x hitCheck:%d hitExec:%d hitLoad:%d hitStore:%d\n", hit, hitWB, hitMeta.vpn, hitFlag.asUInt, hitData.ppn, hitRefillFlag, hitWBStore, hitCheck, hitExec, hitLoad, hitStore) - Debug("miss:%d state:%d level:%d raddr:%x memRdata:%x missMask:%x missRefillFlag:%x missMetaRefill:%d\n", miss, state, level, raddr, memRdata.asUInt, missMask, missRefillFlag, missMetaRefill) - Debug("meta/data: (0)%x|%b|%x (1)%x|%b|%x (2)%x|%b|%x (3)%x|%b|%x rread:%d\n", md(0).asTypeOf(tlbBundle).vpn, md(0).asTypeOf(tlbBundle).flag, md(0).asTypeOf(tlbBundle).ppn, md(1).asTypeOf(tlbBundle).vpn, md(1).asTypeOf(tlbBundle).flag, md(1).asTypeOf(tlbBundle).ppn, md(2).asTypeOf(tlbBundle).vpn, md(2).asTypeOf(tlbBundle).flag, md(2).asTypeOf(tlbBundle).ppn, md(3).asTypeOf(tlbBundle).vpn, md(3).asTypeOf(tlbBundle).flag, md(3).asTypeOf(tlbBundle).ppn, io.mdReady) - Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) - Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) - Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) -} -sealed class TLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { +class TLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { class TLBEmptyIO extends Bundle { val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) diff --git a/src/main/scala/nutcore/mem/tlb/TLBExec.scala b/src/main/scala/nutcore/mem/tlb/TLBExec.scala new file mode 100644 index 000000000..b0ae01427 --- /dev/null +++ b/src/main/scala/nutcore/mem/tlb/TLBExec.scala @@ -0,0 +1,254 @@ + +package nutcore.mem.tlb + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import utils._ +import top.Settings + + +class TLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ + class TLBExecIO extends Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) + val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) + + val md = Input(Vec(Ways, UInt(tlbLen.W))) + val mdWrite = new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen) + val mdReady = Input(Bool()) + + val mem = new SimpleBusUC(userBits = userBits) + val flush = Input(Bool()) + val satp = Input(UInt(XLEN.W)) + val pf = new MMUIO + val ipf = Output(Bool()) + val isFinish = Output(Bool()) + } + val io = IO(new TLBExecIO) + + val md = io.md//RegEnable(mdTLB.io.tlbmd, io.in.ready) + + // lazy renaming + val req = io.in.bits + val vpn = req.addr.asTypeOf(vaBundle2).vpn.asTypeOf(vpnBundle) + val pf = io.pf + val satp = io.satp.asTypeOf(satpBundle) + val ifecth = if(tlbname == "itlb") true.B else false.B + + // pf init + pf.loadPF := false.B + pf.storePF := false.B + pf.addr := req.addr + + // check hit or miss + val hitVec = VecInit(md.map(m => m.asTypeOf(tlbBundle).flag.asTypeOf(flagBundle).v && (m.asTypeOf(tlbBundle).asid === satp.asid) && MaskEQ(m.asTypeOf(tlbBundle).mask, m.asTypeOf(tlbBundle).vpn, vpn.asUInt))).asUInt + val hit = io.in.valid && hitVec.orR + val miss = io.in.valid && !hitVec.orR + + val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U + val waymask = Mux(hit, hitVec, victimWaymask) + + val loadPF = WireInit(false.B) + val storePF = WireInit(false.B) + + // hit + val hitMeta = Mux1H(waymask, md).asTypeOf(tlbBundle2).meta.asTypeOf(metaBundle) + val hitData = Mux1H(waymask, md).asTypeOf(tlbBundle2).data.asTypeOf(dataBundle) + val hitFlag = hitMeta.flag.asTypeOf(flagBundle) + val hitMask = hitMeta.mask + // hit write back pte.flag + val hitinstrPF = WireInit(false.B) + val hitWB = hit && (!hitFlag.a || !hitFlag.d && req.isWrite()) && !hitinstrPF && !(loadPF || storePF || io.pf.isPF()) + val hitRefillFlag = Cat(req.isWrite().asUInt, 1.U(1.W), 0.U(6.W)) | hitFlag.asUInt + val hitWBStore = RegEnable(Cat(0.U(10.W), hitData.ppn, 0.U(2.W), hitRefillFlag), hitWB) + + // hit permission check + val hitCheck = hit /*&& hitFlag.v */&& !(pf.priviledgeMode === ModeU && !hitFlag.u) && !(pf.priviledgeMode === ModeS && hitFlag.u && (!pf.status_sum || ifecth)) + val hitExec = hitCheck && hitFlag.x + val hitLoad = hitCheck && (hitFlag.r || pf.status_mxr && hitFlag.x) + val hitStore = hitCheck && hitFlag.w + + io.pf.loadPF := loadPF //RegNext(loadPF, init =false.B) + io.pf.storePF := storePF //RegNext(storePF, init = false.B) + + if (tlbname == "itlb") { hitinstrPF := !hitExec && hit} + if (tlbname == "dtlb") { + loadPF := !hitLoad && req.isRead() && hit + storePF := (!hitStore && req.isWrite() && hit) + // AMO pagefault type will be fixed in LSU + } + + // miss + val s_idle :: s_memReadReq :: s_memReadResp :: s_write_pte :: s_wait_resp :: s_miss_slpf :: Nil = Enum(6) + val state = RegInit(s_idle) + val level = RegInit(Level.U(log2Up(Level).W)) + + val memRespStore = Reg(UInt(XLEN.W)) + val missMask = WireInit("h3ffff".U(maskLen.W)) + val missMaskStore = Reg(UInt(maskLen.W)) + val missMetaRefill = WireInit(false.B) + val missRefillFlag = WireInit(0.U(8.W)) + val memRdata = io.mem.resp.bits.rdata.asTypeOf(pteBundle) + val raddr = Reg(UInt(PAddrBits.W)) + val alreadyOutFire = RegEnable(true.B, init = false.B, if(tlbname == "itlb") io.out.fire else io.out.valid) + + //handle flush + val needFlush = RegInit(false.B) + val ioFlush = io.flush + val isFlush = needFlush || ioFlush + when (ioFlush && (state =/= s_idle)) { needFlush := true.B} + if(tlbname == "itlb"){ + when (io.out.fire() && needFlush) { needFlush := false.B} + } + if(tlbname == "dtlb"){ + when (io.out.valid && needFlush) { needFlush := false.B} + } + + val missIPF = RegInit(false.B) + + // state machine to handle miss(ptw) and pte-writing-back + switch (state) { + is (s_idle) { + when (!ioFlush && hitWB) { + state := s_write_pte + needFlush := false.B + alreadyOutFire := false.B + }.elsewhen (miss && !ioFlush) { + state := s_memReadReq + raddr := paddrApply(satp.ppn, vpn.vpn2) // + level := Level.U + needFlush := false.B + alreadyOutFire := false.B + } + } + + is (s_memReadReq) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (io.mem.req.fire()) { state := s_memReadResp} + } + + is (s_memReadResp) { + val missflag = memRdata.flag.asTypeOf(flagBundle) + when (io.mem.resp.fire()) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (!(missflag.r || missflag.x) && (level===3.U || level===2.U)) { + when(!missflag.v || (!missflag.r && missflag.w)) { //TODO: fix needflush + if(tlbname == "itlb") { state := s_wait_resp } else { state := s_miss_slpf } + if(tlbname == "itlb") { missIPF := true.B } + if(tlbname == "dtlb") { + loadPF := req.isRead() + storePF := req.isWrite() + } + Debug("tlbException!!! ") + Debug(false, p" req:${req} Memreq:${io.mem.req} MemResp:${io.mem.resp}") + Debug(false, " level:%d",level) + Debug(false, "\n") + }.otherwise { + state := s_memReadReq + raddr := paddrApply(memRdata.ppn, Mux(level === 3.U, vpn.vpn1, vpn.vpn0)) + } + }.elsewhen (level =/= 0.U) { //TODO: fix needFlush + val permCheck = missflag.v && !(pf.priviledgeMode === ModeU && !missflag.u) && !(pf.priviledgeMode === ModeS && missflag.u && (!pf.status_sum || ifecth)) + val permExec = permCheck && missflag.x + val permLoad = permCheck && (missflag.r || pf.status_mxr && missflag.x) + val permStore = permCheck && missflag.w + val updateAD = if (Settings.get("FPGAPlatform")) !missflag.a || (!missflag.d && req.isWrite()) else false.B + val updateData = Cat( 0.U(56.W), req.isWrite(), 1.U(1.W), 0.U(6.W) ) + missRefillFlag := Cat(req.isWrite(), 1.U(1.W), 0.U(6.W)) | missflag.asUInt + memRespStore := io.mem.resp.bits.rdata | updateData + if(tlbname == "itlb") { + when (!permExec) { missIPF := true.B ; state := s_wait_resp} + .otherwise { + state := Mux(updateAD, s_write_pte, s_wait_resp) + missMetaRefill := true.B + } + } + if(tlbname == "dtlb") { + when((!permLoad && req.isRead()) || (!permStore && req.isWrite())) { + state := s_miss_slpf + loadPF := req.isRead() + storePF := req.isWrite() + }.otherwise { + state := Mux(updateAD, s_write_pte, s_wait_resp) + missMetaRefill := true.B + } + } + missMask := Mux(level===3.U, 0.U(maskLen.W), Mux(level===2.U, "h3fe00".U(maskLen.W), "h3ffff".U(maskLen.W))) + missMaskStore := missMask + } + level := level - 1.U + } + } + + is (s_write_pte) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (io.mem.req.fire()) { state := s_wait_resp } + } + + is (s_wait_resp) { + if(tlbname == "itlb"){ + when (io.out.fire() || ioFlush || alreadyOutFire){ + state := s_idle + missIPF := false.B + alreadyOutFire := false.B + } + } + if(tlbname == "dtlb"){ + state := s_idle + missIPF := false.B + alreadyOutFire := false.B + } + } + + is (s_miss_slpf) { + state := s_idle + } + } + + // mem + val cmd = Mux(state === s_write_pte, SimpleBusCmd.write, SimpleBusCmd.read) + io.mem.req.bits.apply(addr = Mux(hitWB, hitData.pteaddr, raddr), cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), wdata = Mux( hitWB, hitWBStore, memRespStore), wmask = 0xff.U) + io.mem.req.valid := ((state === s_memReadReq || state === s_write_pte) && !isFlush) + io.mem.resp.ready := true.B + + // tlb refill + io.mdWrite.apply(wen = RegNext((missMetaRefill && !isFlush) || (hitWB && state === s_idle && !isFlush), init = false.B), + windex = RegNext(getIndex(req.addr)), waymask = RegNext(waymask), vpn = RegNext(vpn.asUInt), + asid = RegNext(Mux(hitWB, hitMeta.asid, satp.asid)), mask = RegNext(Mux(hitWB, hitMask, missMask)), + flag = RegNext(Mux(hitWB, hitRefillFlag, missRefillFlag)), ppn = RegNext(Mux(hitWB, hitData.ppn, memRdata.ppn)), + pteaddr = RegNext((Mux(hitWB, hitData.pteaddr, raddr)))) + + // io + io.out.bits := req + io.out.bits.addr := Mux(hit, maskPaddr(hitData.ppn, req.addr(PAddrBits-1, 0), hitMask), maskPaddr(memRespStore.asTypeOf(pteBundle).ppn, req.addr(PAddrBits-1, 0), missMaskStore)) + io.out.valid := io.in.valid && Mux(hit && !hitWB, !(io.pf.isPF() || loadPF || storePF), state === s_wait_resp)// && !alreadyOutFire + + io.in.ready := io.out.ready && (state === s_idle) && !miss && !hitWB && io.mdReady && (!io.pf.isPF() && !loadPF && !storePF)//maybe be optimized + + io.ipf := Mux(hit, hitinstrPF, missIPF) + io.isFinish := io.out.fire() || io.pf.isPF() + + if(tlbname == "dtlb") { + io.isFinish := io.out.valid || io.pf.isPF() + io.out.valid := io.in.valid && (Mux(hit && !hitWB, true.B, state === s_wait_resp) || loadPF || storePF)// && !alreadyOutFire + } + Debug("In(%d, %d) Out(%d, %d) InAddr:%x OutAddr:%x cmd:%d \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, req.addr, io.out.bits.addr, req.cmd) + Debug("io.Flush:%d needFlush:%d alreadyOutFire:%d isFinish:%d\n", io.flush, needFlush, alreadyOutFire, io.isFinish) + Debug("hit:%d hitWB:%d hitVPN:%x hitFlag:%x hitPPN:%x hitRefillFlag:%x hitWBStore:%x hitCheck:%d hitExec:%d hitLoad:%d hitStore:%d\n", hit, hitWB, hitMeta.vpn, hitFlag.asUInt, hitData.ppn, hitRefillFlag, hitWBStore, hitCheck, hitExec, hitLoad, hitStore) + Debug("miss:%d state:%d level:%d raddr:%x memRdata:%x missMask:%x missRefillFlag:%x missMetaRefill:%d\n", miss, state, level, raddr, memRdata.asUInt, missMask, missRefillFlag, missMetaRefill) + Debug("meta/data: (0)%x|%b|%x (1)%x|%b|%x (2)%x|%b|%x (3)%x|%b|%x rread:%d\n", md(0).asTypeOf(tlbBundle).vpn, md(0).asTypeOf(tlbBundle).flag, md(0).asTypeOf(tlbBundle).ppn, md(1).asTypeOf(tlbBundle).vpn, md(1).asTypeOf(tlbBundle).flag, md(1).asTypeOf(tlbBundle).ppn, md(2).asTypeOf(tlbBundle).vpn, md(2).asTypeOf(tlbBundle).flag, md(2).asTypeOf(tlbBundle).ppn, md(3).asTypeOf(tlbBundle).vpn, md(3).asTypeOf(tlbBundle).flag, md(3).asTypeOf(tlbBundle).ppn, io.mdReady) + Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) + Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) + Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) +} diff --git a/src/main/scala/nutcore/utils/WritebackDelayer.scala b/src/main/scala/nutcore/utils/WritebackDelayer.scala index 1d1c3f3a3..fe7b78310 100644 --- a/src/main/scala/nutcore/utils/WritebackDelayer.scala +++ b/src/main/scala/nutcore/utils/WritebackDelayer.scala @@ -19,6 +19,8 @@ package nutcore import chisel3._ import chisel3.util._ +import nutcore.backend._ + import utils._ class WritebackDelayer(bru: Boolean = false, name: String = "unnamedDelayer") extends NutCoreModule with HasRSConst with HasBackendConst { diff --git a/src/main/scala/system/NutShell.scala b/src/main/scala/system/NutShell.scala index 4afa16e32..0566f7b2a 100644 --- a/src/main/scala/system/NutShell.scala +++ b/src/main/scala/system/NutShell.scala @@ -17,6 +17,8 @@ package system import nutcore._ +import nutcore.mem.cache._ + import bus.axi4.{AXI4, AXI4Lite} import bus.simplebus._ import device.{AXI4CLINT, AXI4PLIC} diff --git a/src/main/scala/system/Prefetcher.scala b/src/main/scala/system/Prefetcher.scala index a66b1f6a0..9b0778845 100644 --- a/src/main/scala/system/Prefetcher.scala +++ b/src/main/scala/system/Prefetcher.scala @@ -16,7 +16,9 @@ package system -import nutcore.{NutCore, NutCoreConfig, HasNutCoreParameter, AddressSpace, Cache, CacheConfig} +import nutcore.{NutCore, NutCoreConfig, HasNutCoreParameter, AddressSpace} +import nutcore.mem.cache._ + import bus.axi4.{AXI4, AXI4Lite} import bus.simplebus._ import utils._ diff --git a/src/main/scala/utils/Debug.scala b/src/main/scala/utils/Debug.scala index 5384ca707..318938ce0 100644 --- a/src/main/scala/utils/Debug.scala +++ b/src/main/scala/utils/Debug.scala @@ -59,7 +59,7 @@ object LogUtil { } } -sealed abstract class LogHelper(val logLevel: LogLevel) { +abstract class LogHelper(val logLevel: LogLevel) { def apply(cond: Bool, fmt: String, data: Bits*)(implicit name: String): Any = apply(cond, Printable.pack(fmt, data:_*))