Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MSHR & SourceD: add hitLevelL3toL2 function in L3 #137

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
6 changes: 3 additions & 3 deletions src/main/scala/huancun/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class SourceDReq(implicit p: Parameters) extends InnerTask with HasChannelBits {
val sinkId = UInt(mshrBits.W)
val bypassPut = Bool()
val dirty = Bool()
val isHit = Bool()
val hitLevelL3toL2 = UInt(2.W)
}

class SourceAReq(implicit p: Parameters) extends HuanCunBundle {
Expand Down Expand Up @@ -116,7 +116,6 @@ class SinkDResp(implicit p: Parameters) extends HuanCunBundle {
val last = Bool() // last beat
val denied = Bool()
val dirty = Bool()
// val isHit = Bool()
val bufIdx = UInt(bufIdxBits.W)
}
class SinkEResp(implicit p: Parameters) extends HuanCunBundle {
Expand Down Expand Up @@ -155,11 +154,12 @@ class MSHRRequest(implicit p: Parameters) extends HuanCunBundle with HasChannelB
val alias = aliasBitsOpt.map(_ => UInt(aliasBitsOpt.get.W))
val preferCache = Bool()
val dirty = Bool()
val isHit = Bool()
val fromProbeHelper = Bool()
val fromCmoHelper = Bool()
val needProbeAckData = if (cacheParams.inclusive) None else Some(Bool())
val reqSource = UInt(MemReqSource.reqSourceBits.W)
val hitLevelL3toL2 = UInt(2.W)

}

class MSHRStatus(implicit p: Parameters) extends HuanCunBundle with HasChannelBits {
Expand Down
15 changes: 9 additions & 6 deletions src/main/scala/huancun/HCCacheParameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,14 @@ case object PreferCacheKey extends ControlKey[Bool](name = "preferCache")

case class PreferCacheField() extends BundleField[Bool](PreferCacheKey, Output(Bool()), _ := false.B)

// indicate whether this block is granted from L3 or not (only used when grantData to L2)
// now it only works for non-inclusive cache (ignored in inclusive cache)
case object IsHitKey extends ControlKey[Bool](name = "isHitInL3")

case class IsHitField() extends BundleField[Bool](IsHitKey, Output(Bool()), _ := true.B)
// indicate where this granted-block is from(only used in handle Grant/GrantData)
// now it only works for non-inclusive cache (ignored in inclusive cache)
// 0:isHitinMem or default
// 1:isHitinL3
// 2:isHitinAnotherCore
// 3:isHitinCork
case object HitLevelKey extends ControlKey[UInt](name = "hitlevel")
case class HitLevelField(width: Int) extends BundleField[UInt](HitLevelKey, Output(UInt(width.W)), _ := 0.U(width.W))

// indicate whether this block is dirty or not (only used in handle Release/ReleaseData)
// now it only works for non-inclusive cache (ignored in inclusive cache)
Expand Down Expand Up @@ -108,7 +111,7 @@ case class HCCacheParameters
reqField: Seq[BundleFieldBase] = Nil, // master
respKey: Seq[BundleKeyBase] = Nil,
reqKey: Seq[BundleKeyBase] = Seq(PrefetchKey, PreferCacheKey, AliasKey, ReqSourceKey), // slave
respField: Seq[BundleFieldBase] = Nil,
respField: Seq[BundleFieldBase] = Seq(HitLevelField(2)),
ctrl: Option[CacheCtrl] = None,
sramClkDivBy2: Boolean = false,
sramDepthDiv: Int = 1,
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class SinkA(implicit p: Parameters) extends HuanCunModule {
allocInfo.preferCache := Mux((a.bits.opcode === TLMessages.Get || a.bits.opcode(2,1) === 0.U), true.B, a.bits.user.lift(PreferCacheKey).getOrElse(true.B))
}
allocInfo.dirty := false.B // ignored
allocInfo.isHit := true.B // ignored
allocInfo.hitLevelL3toL2 := 0.U // ignored
allocInfo.fromProbeHelper := false.B
allocInfo.fromCmoHelper := false.B
allocInfo.needProbeAckData.foreach(_ := false.B)
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SinkB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class SinkB(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule {
io.alloc.bits.alias.foreach(_ := 0.U)
io.alloc.bits.preferCache := true.B
io.alloc.bits.dirty := false.B // ignored
io.alloc.bits.isHit := true.B // ignored
io.alloc.bits.hitLevelL3toL2 := 0.U // ignored
io.alloc.bits.fromProbeHelper := false.B
io.alloc.bits.fromCmoHelper := false.B
io.alloc.bits.needProbeAckData.foreach(_ := io.b.bits.data(0))
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ class Slice()(implicit p: Parameters) extends HuanCunModule {
mshrReq.bits.fromCmoHelper := false.B
mshrReq.bits.bufIdx := DontCare
mshrReq.bits.dirty := false.B
mshrReq.bits.isHit := true.B
mshrReq.bits.hitLevelL3toL2 := 0.U
mshrReq.bits.needProbeAckData.foreach(_ := false.B)
mshrReq.bits.reqSource := pftReq.bits.pfSource
pftReq.ready := mshrReq.ready
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SourceC.scala
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class SourceC(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule {
queue.io.enq.bits.user.lift(PreferCacheKey).foreach(_ := true.B)
queue.io.enq.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.NoWhere.id.U)
queue.io.enq.bits.echo.lift(DirtyKey).foreach(_ := pipeOut.bits.task.dirty)
queue.io.enq.bits.user.lift(IsHitKey).foreach(_ := true.B)
queue.io.enq.bits.user.lift(HitLevelKey).foreach(_ := 0.U)

io.c <> queue.io.deq
}
4 changes: 2 additions & 2 deletions src/main/scala/huancun/SourceD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
s2_d.bits.data := s1_queue.io.deq.bits.data
s2_d.bits.corrupt := s2_d.bits.denied
s2_d.bits.echo.lift(DirtyKey).foreach(_ := s2_req.dirty)
s2_d.bits.user.lift(IsHitKey).foreach(_ := s2_req.isHit)
s2_d.bits.user.lift(HitLevelKey).foreach(_ := s2_req.hitLevelL3toL2)
dontTouch(s2_d.bits.user)

val s2_can_go = Mux(s2_d.valid, s2_d.ready, s3_ready && (!s2_valid_pb || pb_ready))
Expand Down Expand Up @@ -225,7 +225,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
s3_d.bits.corrupt := s3_req.denied ||
(s3_req.opcode =/= TLMessages.AccessAck && s3_req.opcode =/= TLMessages.Grant && s3_queue.io.deq.bits.corrupt)
s3_d.bits.echo.lift(DirtyKey).foreach(_ := s3_req.dirty)
s3_d.bits.user.lift(IsHitKey).foreach(_ := s3_req.isHit)
s3_d.bits.user.lift(HitLevelKey).foreach(_ := s3_req.hitLevelL3toL2)
dontTouch(s3_d.bits.user)

s3_queue.io.enq.valid := RegNextN(
Expand Down
32 changes: 16 additions & 16 deletions src/main/scala/huancun/noninclusive/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -317,33 +317,33 @@ class Directory(implicit p: Parameters)
assert(dirReadPorts == 1)
val req_r = RegEnable(req.bits, req.fire)
XSPerfAccumulate(cacheParams, "selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)

XSPerfAccumulate(cacheParams, "selfdir_dirty", resp.valid && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", resp.valid && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", resp.valid && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", resp.valid && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", resp.valid && resp.bits.self.state === INVALID)
XSPerfAccumulate(cacheParams, "selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcDir))
//})
val perfinfo = IO(Output(Vec(numPCntHcDir, (UInt(6.W)))))
val perfEvents = Seq(
("selfdir_A_req ", req_r.replacerInfo.channel(0) && resp.valid ),
("selfdir_A_hit ", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit),
("selfdir_A_hit ", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit),
("selfdir_B_req ", req_r.replacerInfo.channel(1) && resp.valid ),
("selfdir_B_hit ", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit),
("selfdir_B_hit ", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit),
("selfdir_C_req ", req_r.replacerInfo.channel(2) && resp.valid ),
("selfdir_C_hit ", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit),
("selfdir_dirty ", resp.valid && resp.bits.self.dirty ),
("selfdir_TIP ", resp.valid && resp.bits.self.state === TIP ),
("selfdir_BRANCH ", resp.valid && resp.bits.self.state === BRANCH ),
("selfdir_TRUNK ", resp.valid && resp.bits.self.state === TRUNK ),
("selfdir_INVALID ", resp.valid && resp.bits.self.state === INVALID ),
("selfdir_C_hit ", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit),
("selfdir_dirty ", RegNext(resp.valid) && resp.bits.self.dirty ),
("selfdir_TIP ", RegNext(resp.valid) && resp.bits.self.state === TIP ),
("selfdir_BRANCH ", RegNext(resp.valid) && resp.bits.self.state === BRANCH ),
("selfdir_TRUNK ", RegNext(resp.valid) && resp.bits.self.state === TRUNK ),
("selfdir_INVALID ", RegNext(resp.valid) && resp.bits.self.state === INVALID ),
)

for (((perf_out,(perf_name,perf)),i) <- perfinfo.zip(perfEvents).zipWithIndex) {
Expand Down
43 changes: 42 additions & 1 deletion src/main/scala/huancun/noninclusive/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
val w_sinkcack = RegInit(true.B)

val acquire_flag = RegInit(false.B)
// hold part of s_acquire:
// send acquire to memory when receive an acquire from L2
// hold until read Directory next time
val s_acquire_hold_for_acquire = RegInit(false.B)
// hold part of s_probe:
// send probe to another L2 when receive an acquire from L2
// hold until read Directory next time
val s_probe_hold_for_acquire = RegInit(false.B)

// val sourceD_valid_hold = RegInit(false.B)

def reset_all_flags(): Unit = {
// Default value
Expand Down Expand Up @@ -595,10 +605,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
nested_c_hit_reg := false.B
gotDirty := false.B
acquire_flag := false.B
s_acquire_hold_for_acquire := false.B
s_probe_hold_for_acquire := false.B
// sourceD_valid_hold := false.B
a_do_release := false.B
a_do_probe := false.B
}
when(!s_acquire) { acquire_flag := acquire_flag | true.B }

when(!s_acquire && req_acquire) { s_acquire_hold_for_acquire := s_acquire_hold_for_acquire | true.B }
when(!s_probe && req_acquire) { s_probe_hold_for_acquire := s_probe_hold_for_acquire | true.B }

def x_schedule(): Unit = { // TODO
// Do probe to maintain coherence
Expand Down Expand Up @@ -960,6 +976,10 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
io.tasks.prefetch_train.foreach(_.valid := !s_triggerprefetch.get)
io.tasks.prefetch_resp.foreach(_.valid := !s_prefetchack.get && w_grantfirst)

// when(io.tasks.source_d.valid){
// sourceD_valid_hold := sourceD_valid_hold | true.B
// }

val oa = io.tasks.source_a.bits
val ob = io.tasks.source_b.bits
val oc = io.tasks.source_c.bits
Expand Down Expand Up @@ -990,6 +1010,9 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
// full overwrite, we can always acquire perm, no need to acquire block
val acquire_perm_NtoT = req.opcode === AcquirePerm && req.param === NtoT

// will hit in cork if the acquire from L2 need to acquire downwords L3 and the state transfer is BtoT
val acquire_BtoT = req.fromA && req_acquire && req.param === BtoT

val acquire_opcode = if (cacheParams.name == "L2") {
Mux(req.opcode === AcquirePerm && req.param === BtoT, AcquirePerm, Mux(req.opcode === Hint, AcquireBlock, req.opcode))
} else {
Expand Down Expand Up @@ -1174,10 +1197,28 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
),
false.B
)
od.isHit := self_meta.hit
od.bufIdx := req.bufIdx
od.bypassPut := bypassPut_latch

// hitLevelL3toL2 makes sense when sourceD_valid is high
val hitFromCork = req.fromA && s_acquire_hold_for_acquire && acquire_BtoT
val hitFromMem = req.fromA && s_acquire_hold_for_acquire && !hitFromCork
val hitFromAnotherCore = req.fromA && !s_acquire_hold_for_acquire && s_probe_hold_for_acquire
val hitFromL3 = req.fromA && req_acquire && !s_acquire_hold_for_acquire && !s_probe_hold_for_acquire && self_meta.hit

when(hitFromCork){
od.hitLevelL3toL2 := 3.U
}.elsewhen(hitFromMem){
od.hitLevelL3toL2 := 0.U
}.elsewhen(hitFromAnotherCore){
od.hitLevelL3toL2 := 2.U
}.elsewhen(hitFromL3){
od.hitLevelL3toL2 := 1.U
}.otherwise{
od.hitLevelL3toL2 := 0.U
}


oe.sink := sink

ia.sourceId := req.source
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/ProbeHelper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)
req.alias.foreach(_ := 0.U)
req.preferCache := true.B
req.dirty := false.B // ignored
req.isHit := true.B // ignored
req.hitLevelL3toL2 := 0.U // ignored
req.needProbeAckData.foreach(_ := false.B)
req.fromCmoHelper := false.B
req.reqSource := MemReqSource.NoWhere.id.U
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/SinkC.scala
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class SinkC(implicit p: Parameters) extends BaseSinkC {
io.alloc.bits.isBop.foreach(_ := false.B)
io.alloc.bits.alias.foreach(_ := 0.U)
io.alloc.bits.preferCache := true.B
io.alloc.bits.isHit := true.B
io.alloc.bits.hitLevelL3toL2 := 0.U
io.alloc.bits.dirty := c.bits.echo.lift(DirtyKey).getOrElse(true.B)
io.alloc.bits.fromProbeHelper := false.B
io.alloc.bits.fromCmoHelper := false.B
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/SliceCtrl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class SliceCtrl()(implicit p: Parameters) extends HuanCunModule {
io.cmo_req.bits.alias.foreach(_ := false.B)
io.cmo_req.bits.preferCache := false.B
io.cmo_req.bits.dirty := false.B
io.cmo_req.bits.isHit := true.B
io.cmo_req.bits.hitLevelL3toL2 := 0.U
io.cmo_req.bits.fromProbeHelper := false.B
io.cmo_req.bits.fromCmoHelper := true.B
io.cmo_req.bits.needProbeAckData.foreach(_ := false.B)
Expand Down
1 change: 1 addition & 0 deletions src/test/scala/huancun/FakeClient.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ abstract class BaseFakeClient(name: String, nBanks: Int, probe: Boolean = true)(
echoFields = cacheParams.echoField,
requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField()),
responseKeys = cacheParams.respKey
// responseKeys = Seq(HitLevelKey)
)
})
}
Expand Down
8 changes: 4 additions & 4 deletions src/test/scala/huancun/TestTop.scala
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
minLatency = 1,
echoFields = Seq(DirtyField()),
requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField(), AliasField(2)),
responseKeys = Seq(IsHitKey)
responseKeys = Seq(HitLevelKey)
)
))
masterNode
Expand All @@ -227,7 +227,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
prefetch = Some(huancun.prefetch.BOPParameters()),
reqField = Seq(PreferCacheField()),
echoField = Seq(DirtyField()),
respKey = Seq(IsHitKey)
respKey = Seq(HitLevelKey)
)
}))).node)

Expand All @@ -238,7 +238,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
inclusive = false,
clientCaches = Seq(CacheParameters(sets = 32, ways = 8, blockGranularity = 5, name = "L3")),
echoField = Seq(DirtyField()),
respField = Seq(IsHitField()),
respField = Seq(HitLevelField(2)),
simulation = true
)
})))
Expand Down Expand Up @@ -346,7 +346,7 @@ class TestTop_FullSys()(implicit p: Parameters) extends LazyModule {
clientCaches = Seq(CacheParameters("dcache", sets = 32, ways = 8, blockGranularity = 5)),
reqField = Seq(PreferCacheField()),
echoField = Seq(DirtyField()),
respKey = Seq(IsHitKey),
respKey = Seq(HitLevelKey),
prefetch = Some(huancun.prefetch.BOPParameters()),
sramDepthDiv = 2,
simulation = true
Expand Down
Loading