diff --git a/Utility b/Utility index 575aff86..627ced70 160000 --- a/Utility +++ b/Utility @@ -1 +1 @@ -Subproject commit 575aff86cc7463193671542e6adad565c2437c6e +Subproject commit 627ced700e866d8c36a6c904347a18368db7565c diff --git a/src/main/scala/huancun/Common.scala b/src/main/scala/huancun/Common.scala index d1aa19ca..9ed63049 100644 --- a/src/main/scala/huancun/Common.scala +++ b/src/main/scala/huancun/Common.scala @@ -68,7 +68,7 @@ class SourceDReq(implicit p: Parameters) extends InnerTask with HasChannelBits { val sinkId = UInt(mshrBits.W) val bypassPut = Bool() val dirty = Bool() - val isHit = Bool() + val hitLevelL3toL2 = UInt(2.W) } class SourceAReq(implicit p: Parameters) extends HuanCunBundle { @@ -116,7 +116,6 @@ class SinkDResp(implicit p: Parameters) extends HuanCunBundle { val last = Bool() // last beat val denied = Bool() val dirty = Bool() - // val isHit = Bool() val bufIdx = UInt(bufIdxBits.W) } class SinkEResp(implicit p: Parameters) extends HuanCunBundle { @@ -155,11 +154,12 @@ class MSHRRequest(implicit p: Parameters) extends HuanCunBundle with HasChannelB val alias = aliasBitsOpt.map(_ => UInt(aliasBitsOpt.get.W)) val preferCache = Bool() val dirty = Bool() - val isHit = Bool() val fromProbeHelper = Bool() val fromCmoHelper = Bool() val needProbeAckData = if (cacheParams.inclusive) None else Some(Bool()) val reqSource = UInt(MemReqSource.reqSourceBits.W) + val hitLevelL3toL2 = UInt(2.W) + } class MSHRStatus(implicit p: Parameters) extends HuanCunBundle with HasChannelBits { diff --git a/src/main/scala/huancun/HCCacheParameters.scala b/src/main/scala/huancun/HCCacheParameters.scala index 98026d16..1984f9db 100644 --- a/src/main/scala/huancun/HCCacheParameters.scala +++ b/src/main/scala/huancun/HCCacheParameters.scala @@ -60,11 +60,14 @@ case object PreferCacheKey extends ControlKey[Bool](name = "preferCache") case class PreferCacheField() extends BundleField[Bool](PreferCacheKey, Output(Bool()), _ := false.B) -// indicate whether this block is granted from L3 or not (only used when grantData to L2) -// now it only works for non-inclusive cache (ignored in inclusive cache) -case object IsHitKey extends ControlKey[Bool](name = "isHitInL3") - -case class IsHitField() extends BundleField[Bool](IsHitKey, Output(Bool()), _ := true.B) +// indicate where this granted-block is from(only used in handle Grant/GrantData) +// now it only works for non-inclusive cache (ignored in inclusive cache) + // 0:isHitinMem or default + // 1:isHitinL3 + // 2:isHitinAnotherCore + // 3:isHitinCork +case object HitLevelKey extends ControlKey[UInt](name = "hitlevel") +case class HitLevelField(width: Int) extends BundleField[UInt](HitLevelKey, Output(UInt(width.W)), _ := 0.U(width.W)) // indicate whether this block is dirty or not (only used in handle Release/ReleaseData) // now it only works for non-inclusive cache (ignored in inclusive cache) @@ -108,7 +111,7 @@ case class HCCacheParameters reqField: Seq[BundleFieldBase] = Nil, // master respKey: Seq[BundleKeyBase] = Nil, reqKey: Seq[BundleKeyBase] = Seq(PrefetchKey, PreferCacheKey, AliasKey, ReqSourceKey), // slave - respField: Seq[BundleFieldBase] = Nil, + respField: Seq[BundleFieldBase] = Seq(HitLevelField(2)), ctrl: Option[CacheCtrl] = None, sramClkDivBy2: Boolean = false, sramDepthDiv: Int = 1, diff --git a/src/main/scala/huancun/SinkA.scala b/src/main/scala/huancun/SinkA.scala index d0943a9d..22e8e57b 100644 --- a/src/main/scala/huancun/SinkA.scala +++ b/src/main/scala/huancun/SinkA.scala @@ -109,7 +109,7 @@ class SinkA(implicit p: Parameters) extends HuanCunModule { allocInfo.preferCache := Mux((a.bits.opcode === TLMessages.Get || a.bits.opcode(2,1) === 0.U), true.B, a.bits.user.lift(PreferCacheKey).getOrElse(true.B)) } allocInfo.dirty := false.B // ignored - allocInfo.isHit := true.B // ignored + allocInfo.hitLevelL3toL2 := 0.U // ignored allocInfo.fromProbeHelper := false.B allocInfo.fromCmoHelper := false.B allocInfo.needProbeAckData.foreach(_ := false.B) diff --git a/src/main/scala/huancun/SinkB.scala b/src/main/scala/huancun/SinkB.scala index 8554a06a..cda90ea1 100644 --- a/src/main/scala/huancun/SinkB.scala +++ b/src/main/scala/huancun/SinkB.scala @@ -52,7 +52,7 @@ class SinkB(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule { io.alloc.bits.alias.foreach(_ := 0.U) io.alloc.bits.preferCache := true.B io.alloc.bits.dirty := false.B // ignored - io.alloc.bits.isHit := true.B // ignored + io.alloc.bits.hitLevelL3toL2 := 0.U // ignored io.alloc.bits.fromProbeHelper := false.B io.alloc.bits.fromCmoHelper := false.B io.alloc.bits.needProbeAckData.foreach(_ := io.b.bits.data(0)) diff --git a/src/main/scala/huancun/Slice.scala b/src/main/scala/huancun/Slice.scala index 8e22b61d..025c86c7 100644 --- a/src/main/scala/huancun/Slice.scala +++ b/src/main/scala/huancun/Slice.scala @@ -635,7 +635,7 @@ class Slice()(implicit p: Parameters) extends HuanCunModule { mshrReq.bits.fromCmoHelper := false.B mshrReq.bits.bufIdx := DontCare mshrReq.bits.dirty := false.B - mshrReq.bits.isHit := true.B + mshrReq.bits.hitLevelL3toL2 := 0.U mshrReq.bits.needProbeAckData.foreach(_ := false.B) mshrReq.bits.reqSource := pftReq.bits.pfSource pftReq.ready := mshrReq.ready diff --git a/src/main/scala/huancun/SourceC.scala b/src/main/scala/huancun/SourceC.scala index cdadf21a..ccbd6dd2 100644 --- a/src/main/scala/huancun/SourceC.scala +++ b/src/main/scala/huancun/SourceC.scala @@ -102,7 +102,7 @@ class SourceC(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule { queue.io.enq.bits.user.lift(PreferCacheKey).foreach(_ := true.B) queue.io.enq.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.NoWhere.id.U) queue.io.enq.bits.echo.lift(DirtyKey).foreach(_ := pipeOut.bits.task.dirty) - queue.io.enq.bits.user.lift(IsHitKey).foreach(_ := true.B) + queue.io.enq.bits.user.lift(HitLevelKey).foreach(_ := 0.U) io.c <> queue.io.deq } diff --git a/src/main/scala/huancun/SourceD.scala b/src/main/scala/huancun/SourceD.scala index 34ac5a2b..79307568 100644 --- a/src/main/scala/huancun/SourceD.scala +++ b/src/main/scala/huancun/SourceD.scala @@ -161,7 +161,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule { s2_d.bits.data := s1_queue.io.deq.bits.data s2_d.bits.corrupt := s2_d.bits.denied s2_d.bits.echo.lift(DirtyKey).foreach(_ := s2_req.dirty) - s2_d.bits.user.lift(IsHitKey).foreach(_ := s2_req.isHit) + s2_d.bits.user.lift(HitLevelKey).foreach(_ := s2_req.hitLevelL3toL2) dontTouch(s2_d.bits.user) val s2_can_go = Mux(s2_d.valid, s2_d.ready, s3_ready && (!s2_valid_pb || pb_ready)) @@ -225,7 +225,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule { s3_d.bits.corrupt := s3_req.denied || (s3_req.opcode =/= TLMessages.AccessAck && s3_req.opcode =/= TLMessages.Grant && s3_queue.io.deq.bits.corrupt) s3_d.bits.echo.lift(DirtyKey).foreach(_ := s3_req.dirty) - s3_d.bits.user.lift(IsHitKey).foreach(_ := s3_req.isHit) + s3_d.bits.user.lift(HitLevelKey).foreach(_ := s3_req.hitLevelL3toL2) dontTouch(s3_d.bits.user) s3_queue.io.enq.valid := RegNextN( diff --git a/src/main/scala/huancun/noninclusive/Directory.scala b/src/main/scala/huancun/noninclusive/Directory.scala index 34505911..08dae6cc 100644 --- a/src/main/scala/huancun/noninclusive/Directory.scala +++ b/src/main/scala/huancun/noninclusive/Directory.scala @@ -317,33 +317,33 @@ class Directory(implicit p: Parameters) assert(dirReadPorts == 1) val req_r = RegEnable(req.bits, req.fire) XSPerfAccumulate(cacheParams, "selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid) - XSPerfAccumulate(cacheParams, "selfdir_A_hit", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit) + XSPerfAccumulate(cacheParams, "selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit) XSPerfAccumulate(cacheParams, "selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid) - XSPerfAccumulate(cacheParams, "selfdir_B_hit", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit) + XSPerfAccumulate(cacheParams, "selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit) XSPerfAccumulate(cacheParams, "selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid) - XSPerfAccumulate(cacheParams, "selfdir_C_hit", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit) + XSPerfAccumulate(cacheParams, "selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit) - XSPerfAccumulate(cacheParams, "selfdir_dirty", resp.valid && resp.bits.self.dirty) - XSPerfAccumulate(cacheParams, "selfdir_TIP", resp.valid && resp.bits.self.state === TIP) - XSPerfAccumulate(cacheParams, "selfdir_BRANCH", resp.valid && resp.bits.self.state === BRANCH) - XSPerfAccumulate(cacheParams, "selfdir_TRUNK", resp.valid && resp.bits.self.state === TRUNK) - XSPerfAccumulate(cacheParams, "selfdir_INVALID", resp.valid && resp.bits.self.state === INVALID) + XSPerfAccumulate(cacheParams, "selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty) + XSPerfAccumulate(cacheParams, "selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP) + XSPerfAccumulate(cacheParams, "selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH) + XSPerfAccumulate(cacheParams, "selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK) + XSPerfAccumulate(cacheParams, "selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID) //val perfinfo = IO(new Bundle(){ // val perfEvents = Output(new PerfEventsBundle(numPCntHcDir)) //}) val perfinfo = IO(Output(Vec(numPCntHcDir, (UInt(6.W))))) val perfEvents = Seq( ("selfdir_A_req ", req_r.replacerInfo.channel(0) && resp.valid ), - ("selfdir_A_hit ", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit), + ("selfdir_A_hit ", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit), ("selfdir_B_req ", req_r.replacerInfo.channel(1) && resp.valid ), - ("selfdir_B_hit ", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit), + ("selfdir_B_hit ", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit), ("selfdir_C_req ", req_r.replacerInfo.channel(2) && resp.valid ), - ("selfdir_C_hit ", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit), - ("selfdir_dirty ", resp.valid && resp.bits.self.dirty ), - ("selfdir_TIP ", resp.valid && resp.bits.self.state === TIP ), - ("selfdir_BRANCH ", resp.valid && resp.bits.self.state === BRANCH ), - ("selfdir_TRUNK ", resp.valid && resp.bits.self.state === TRUNK ), - ("selfdir_INVALID ", resp.valid && resp.bits.self.state === INVALID ), + ("selfdir_C_hit ", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit), + ("selfdir_dirty ", RegNext(resp.valid) && resp.bits.self.dirty ), + ("selfdir_TIP ", RegNext(resp.valid) && resp.bits.self.state === TIP ), + ("selfdir_BRANCH ", RegNext(resp.valid) && resp.bits.self.state === BRANCH ), + ("selfdir_TRUNK ", RegNext(resp.valid) && resp.bits.self.state === TRUNK ), + ("selfdir_INVALID ", RegNext(resp.valid) && resp.bits.self.state === INVALID ), ) for (((perf_out,(perf_name,perf)),i) <- perfinfo.zip(perfEvents).zipWithIndex) { diff --git a/src/main/scala/huancun/noninclusive/MSHR.scala b/src/main/scala/huancun/noninclusive/MSHR.scala index c29455b4..ed72d99f 100644 --- a/src/main/scala/huancun/noninclusive/MSHR.scala +++ b/src/main/scala/huancun/noninclusive/MSHR.scala @@ -555,6 +555,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S val w_sinkcack = RegInit(true.B) val acquire_flag = RegInit(false.B) + // hold part of s_acquire: + // send acquire to memory when receive an acquire from L2 + // hold until read Directory next time + val s_acquire_hold_for_acquire = RegInit(false.B) + // hold part of s_probe: + // send probe to another L2 when receive an acquire from L2 + // hold until read Directory next time + val s_probe_hold_for_acquire = RegInit(false.B) + + // val sourceD_valid_hold = RegInit(false.B) def reset_all_flags(): Unit = { // Default value @@ -595,10 +605,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S nested_c_hit_reg := false.B gotDirty := false.B acquire_flag := false.B + s_acquire_hold_for_acquire := false.B + s_probe_hold_for_acquire := false.B + // sourceD_valid_hold := false.B a_do_release := false.B a_do_probe := false.B } when(!s_acquire) { acquire_flag := acquire_flag | true.B } + + when(!s_acquire && req_acquire) { s_acquire_hold_for_acquire := s_acquire_hold_for_acquire | true.B } + when(!s_probe && req_acquire) { s_probe_hold_for_acquire := s_probe_hold_for_acquire | true.B } def x_schedule(): Unit = { // TODO // Do probe to maintain coherence @@ -960,6 +976,10 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S io.tasks.prefetch_train.foreach(_.valid := !s_triggerprefetch.get) io.tasks.prefetch_resp.foreach(_.valid := !s_prefetchack.get && w_grantfirst) + // when(io.tasks.source_d.valid){ + // sourceD_valid_hold := sourceD_valid_hold | true.B + // } + val oa = io.tasks.source_a.bits val ob = io.tasks.source_b.bits val oc = io.tasks.source_c.bits @@ -990,6 +1010,9 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S // full overwrite, we can always acquire perm, no need to acquire block val acquire_perm_NtoT = req.opcode === AcquirePerm && req.param === NtoT + // will hit in cork if the acquire from L2 need to acquire downwords L3 and the state transfer is BtoT + val acquire_BtoT = req.fromA && req_acquire && req.param === BtoT + val acquire_opcode = if (cacheParams.name == "L2") { Mux(req.opcode === AcquirePerm && req.param === BtoT, AcquirePerm, Mux(req.opcode === Hint, AcquireBlock, req.opcode)) } else { @@ -1174,10 +1197,28 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S ), false.B ) - od.isHit := self_meta.hit od.bufIdx := req.bufIdx od.bypassPut := bypassPut_latch +// hitLevelL3toL2 makes sense when sourceD_valid is high + val hitFromCork = req.fromA && s_acquire_hold_for_acquire && acquire_BtoT + val hitFromMem = req.fromA && s_acquire_hold_for_acquire && !hitFromCork + val hitFromAnotherCore = req.fromA && !s_acquire_hold_for_acquire && s_probe_hold_for_acquire + val hitFromL3 = req.fromA && req_acquire && !s_acquire_hold_for_acquire && !s_probe_hold_for_acquire && self_meta.hit + + when(hitFromCork){ + od.hitLevelL3toL2 := 3.U + }.elsewhen(hitFromMem){ + od.hitLevelL3toL2 := 0.U + }.elsewhen(hitFromAnotherCore){ + od.hitLevelL3toL2 := 2.U + }.elsewhen(hitFromL3){ + od.hitLevelL3toL2 := 1.U + }.otherwise{ + od.hitLevelL3toL2 := 0.U + } + + oe.sink := sink ia.sourceId := req.source diff --git a/src/main/scala/huancun/noninclusive/ProbeHelper.scala b/src/main/scala/huancun/noninclusive/ProbeHelper.scala index 171607d1..6c995836 100644 --- a/src/main/scala/huancun/noninclusive/ProbeHelper.scala +++ b/src/main/scala/huancun/noninclusive/ProbeHelper.scala @@ -47,7 +47,7 @@ class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters) req.alias.foreach(_ := 0.U) req.preferCache := true.B req.dirty := false.B // ignored - req.isHit := true.B // ignored + req.hitLevelL3toL2 := 0.U // ignored req.needProbeAckData.foreach(_ := false.B) req.fromCmoHelper := false.B req.reqSource := MemReqSource.NoWhere.id.U diff --git a/src/main/scala/huancun/noninclusive/SinkC.scala b/src/main/scala/huancun/noninclusive/SinkC.scala index 9fd96efa..03f7a6f2 100644 --- a/src/main/scala/huancun/noninclusive/SinkC.scala +++ b/src/main/scala/huancun/noninclusive/SinkC.scala @@ -72,7 +72,7 @@ class SinkC(implicit p: Parameters) extends BaseSinkC { io.alloc.bits.isBop.foreach(_ := false.B) io.alloc.bits.alias.foreach(_ := 0.U) io.alloc.bits.preferCache := true.B - io.alloc.bits.isHit := true.B + io.alloc.bits.hitLevelL3toL2 := 0.U io.alloc.bits.dirty := c.bits.echo.lift(DirtyKey).getOrElse(true.B) io.alloc.bits.fromProbeHelper := false.B io.alloc.bits.fromCmoHelper := false.B diff --git a/src/main/scala/huancun/noninclusive/SliceCtrl.scala b/src/main/scala/huancun/noninclusive/SliceCtrl.scala index 8aa5c337..26068b83 100644 --- a/src/main/scala/huancun/noninclusive/SliceCtrl.scala +++ b/src/main/scala/huancun/noninclusive/SliceCtrl.scala @@ -225,7 +225,7 @@ class SliceCtrl()(implicit p: Parameters) extends HuanCunModule { io.cmo_req.bits.alias.foreach(_ := false.B) io.cmo_req.bits.preferCache := false.B io.cmo_req.bits.dirty := false.B - io.cmo_req.bits.isHit := true.B + io.cmo_req.bits.hitLevelL3toL2 := 0.U io.cmo_req.bits.fromProbeHelper := false.B io.cmo_req.bits.fromCmoHelper := true.B io.cmo_req.bits.needProbeAckData.foreach(_ := false.B) diff --git a/src/test/scala/huancun/FakeClient.scala b/src/test/scala/huancun/FakeClient.scala index cd4cec53..fdcf224f 100644 --- a/src/test/scala/huancun/FakeClient.scala +++ b/src/test/scala/huancun/FakeClient.scala @@ -28,6 +28,7 @@ abstract class BaseFakeClient(name: String, nBanks: Int, probe: Boolean = true)( echoFields = cacheParams.echoField, requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField()), responseKeys = cacheParams.respKey + // responseKeys = Seq(HitLevelKey) ) }) } diff --git a/src/test/scala/huancun/TestTop.scala b/src/test/scala/huancun/TestTop.scala index 7ba4fda2..f56d997a 100644 --- a/src/test/scala/huancun/TestTop.scala +++ b/src/test/scala/huancun/TestTop.scala @@ -207,7 +207,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule { minLatency = 1, echoFields = Seq(DirtyField()), requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField(), AliasField(2)), - responseKeys = Seq(IsHitKey) + responseKeys = Seq(HitLevelKey) ) )) masterNode @@ -227,7 +227,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule { prefetch = Some(huancun.prefetch.BOPParameters()), reqField = Seq(PreferCacheField()), echoField = Seq(DirtyField()), - respKey = Seq(IsHitKey) + respKey = Seq(HitLevelKey) ) }))).node) @@ -238,7 +238,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule { inclusive = false, clientCaches = Seq(CacheParameters(sets = 32, ways = 8, blockGranularity = 5, name = "L3")), echoField = Seq(DirtyField()), - respField = Seq(IsHitField()), + respField = Seq(HitLevelField(2)), simulation = true ) }))) @@ -346,7 +346,7 @@ class TestTop_FullSys()(implicit p: Parameters) extends LazyModule { clientCaches = Seq(CacheParameters("dcache", sets = 32, ways = 8, blockGranularity = 5)), reqField = Seq(PreferCacheField()), echoField = Seq(DirtyField()), - respKey = Seq(IsHitKey), + respKey = Seq(HitLevelKey), prefetch = Some(huancun.prefetch.BOPParameters()), sramDepthDiv = 2, simulation = true