From 959bea2d7b773a78acba4f721bb057710895a8cf Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Tue, 9 Jan 2018 21:46:13 -0800 Subject: [PATCH 1/5] Some speedups from lists instead of sets. --- .../org/geneontology/rules/engine/ReteNodes.scala | 12 ++++-------- .../org/geneontology/rules/engine/RuleEngine.scala | 9 +++++---- .../geneontology/rules/engine/WorkingMemory.scala | 8 ++++---- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala index 1cb2092..11d65bd 100644 --- a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala +++ b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala @@ -2,9 +2,6 @@ package org.geneontology.rules.engine import scala.collection.mutable -import scalaz._ -import scalaz.Scalaz._ - final class AlphaNode(val pattern: TriplePattern) { var children: List[JoinNode] = Nil @@ -18,10 +15,9 @@ final class AlphaNode(val pattern: TriplePattern) { def activate(triple: Triple, memory: WorkingMemory): Unit = { val alphaMem = memory.alpha.getOrElseUpdate(pattern, new AlphaMemory(pattern)) alphaMem.triples = triple :: alphaMem.triples - alphaMem.tripleIndexS = alphaMem.tripleIndexS |+| Map(triple.s -> Set(triple)) - alphaMem.tripleIndexP = alphaMem.tripleIndexP |+| Map(triple.p -> Set(triple)) - alphaMem.tripleIndexO = alphaMem.tripleIndexO |+| Map(triple.o -> Set(triple)) - //children.foreach(_.rightActivate(triple, memory)) + alphaMem.tripleIndexS += triple.s -> (triple :: alphaMem.tripleIndexS.getOrElse(triple.s, Nil)) + alphaMem.tripleIndexP += triple.p -> (triple :: alphaMem.tripleIndexP.getOrElse(triple.p, Nil)) + alphaMem.tripleIndexO += triple.o -> (triple :: alphaMem.tripleIndexO.getOrElse(triple.o, Nil)) alphaMem.linkedChildren.foreach(_.rightActivate(triple, memory)) } @@ -94,7 +90,7 @@ final class JoinNode(val leftParent: BetaNode with BetaParent, rightParent: Alph betaMem.checkLeftLink = true } var valid = true - var possibleTriples: List[Set[Triple]] = Nil + var possibleTriples: List[List[Triple]] = Nil if (thisPattern.s.isInstanceOf[Variable]) { val v = thisPattern.s.asInstanceOf[Variable] if (parentBoundVariables(v)) { diff --git a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala index 6c0f794..d2c3462 100644 --- a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala +++ b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala @@ -73,16 +73,17 @@ final class RuleEngine(inputRules: Iterable[Rule], val storeDerivations: Boolean } protected[engine] def processDerivedTriple(triple: Triple, derivation: Derivation, memory: WorkingMemory) = { - if (!memory.facts(triple)) { - memory.facts += triple + if (memory.facts.add(triple)) { //if (memory.facts.size % 100000 == 0) println(memory.facts.size) - memory.derivations = memory.derivations |+| Map(triple -> List(derivation)) + //memory.derivations = memory.derivations |+| Map(triple -> List(derivation)) + memory.derivations += triple -> (derivation :: memory.derivations.getOrElse(triple, Nil)) memory.agenda = memory.agenda.enqueue(triple) } } private def injectTriple(triple: Triple, memory: WorkingMemory): Unit = { - val patterns = List(DegeneratePattern, + val patterns = List( + DegeneratePattern, TriplePattern(AnyNode, AnyNode, triple.o), TriplePattern(AnyNode, triple.p, AnyNode), TriplePattern(AnyNode, triple.p, triple.o), diff --git a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala index 664ac74..eb0edbd 100644 --- a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala +++ b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala @@ -7,7 +7,7 @@ import scala.collection.mutable.AnyRefMap final class WorkingMemory(var asserted: Set[Triple]) { var agenda: Queue[Triple] = Queue.empty - var facts: Set[Triple] = asserted + val facts: mutable.Set[Triple] = mutable.Set.empty ++ asserted var derivations: Map[Triple, List[Derivation]] = Map.empty val alpha: mutable.Map[TriplePattern, AlphaMemory] = AnyRefMap.empty @@ -48,9 +48,9 @@ final class WorkingMemory(var asserted: Set[Triple]) { final class AlphaMemory(pattern: TriplePattern) { var triples: List[Triple] = Nil - var tripleIndexS: Map[ConcreteNode, Set[Triple]] = Map.empty - var tripleIndexP: Map[ConcreteNode, Set[Triple]] = Map.empty - var tripleIndexO: Map[ConcreteNode, Set[Triple]] = Map.empty + var tripleIndexS: Map[ConcreteNode, List[Triple]] = Map.empty + var tripleIndexP: Map[ConcreteNode, List[Triple]] = Map.empty + var tripleIndexO: Map[ConcreteNode, List[Triple]] = Map.empty var linkedChildren: List[JoinNode] = Nil } From 14b07c190bb8be93d558febb0f766e1035375007 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Wed, 10 Jan 2018 10:41:01 -0800 Subject: [PATCH 2/5] Cache hashcode for join node spec: performance improvement. --- .../geneontology/rules/engine/ReteNodes.scala | 18 ++++++++++++------ .../geneontology/rules/engine/RuleEngine.scala | 2 +- .../rules/engine/WorkingMemory.scala | 4 ++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala index 11d65bd..86dcc22 100644 --- a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala +++ b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala @@ -25,7 +25,7 @@ final class AlphaNode(val pattern: TriplePattern) { sealed trait BetaNode { - def spec: List[TriplePattern] + def spec: JoinNodeSpec def addChild(node: BetaNode): Unit @@ -43,7 +43,7 @@ final object BetaRoot extends BetaNode with BetaParent { def leftActivate(token: Token, memory: WorkingMemory): Unit = () def addChild(node: BetaNode): Unit = () - val spec: List[TriplePattern] = Nil + val spec: JoinNodeSpec = JoinNodeSpec(Nil) val memory: BetaMemory = new BetaMemory(spec, Nil) val children = Nil memory.tokens = Token(Map.empty, Nil) :: memory.tokens @@ -67,10 +67,10 @@ final case class Token(bindings: Map[Variable, ConcreteNode], triples: List[Trip } -final class JoinNode(val leftParent: BetaNode with BetaParent, rightParent: AlphaNode, val spec: List[TriplePattern]) extends BetaNode with BetaParent { +final class JoinNode(val leftParent: BetaNode with BetaParent, rightParent: AlphaNode, val spec: JoinNodeSpec) extends BetaNode with BetaParent { - private val thisPattern = spec.head - private val parentBoundVariables = spec.drop(1).flatMap(_.variables).toSet + private val thisPattern = spec.pattern.head + private val parentBoundVariables = spec.pattern.drop(1).flatMap(_.variables).toSet private val thisPatternVariables = thisPattern.variables private val matchVariables = parentBoundVariables intersect thisPatternVariables private val rightParentPattern = rightParent.pattern @@ -238,6 +238,12 @@ final class ProductionNode(rule: Rule, parent: BetaNode, engine: RuleEngine) ext def addChild(node: BetaNode): Unit = () - val spec: List[TriplePattern] = Nil + val spec: JoinNodeSpec = JoinNodeSpec(Nil) } + +final case class JoinNodeSpec(pattern: List[TriplePattern]) { + + override val hashCode: Int = pattern.hashCode + +} \ No newline at end of file diff --git a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala index d2c3462..af33c3a 100644 --- a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala +++ b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala @@ -20,7 +20,7 @@ final class RuleEngine(inputRules: Iterable[Rule], val storeDerivations: Boolean val blankPattern = pattern.blankVariables val alphaNode = alphaNodeIndex.getOrElseUpdate(blankPattern, new AlphaNode(blankPattern)) val thisPatternSequence = pattern :: parentPatterns - val joinNode = joinIndex.getOrElseUpdate(thisPatternSequence, new JoinNode(parent, alphaNode, thisPatternSequence)) + val joinNode = joinIndex.getOrElseUpdate(thisPatternSequence, new JoinNode(parent, alphaNode, JoinNodeSpec(thisPatternSequence))) parent.addChild(joinNode) alphaNode.addChild(joinNode) if (parent == BetaRoot) topJoinNodes += joinNode diff --git a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala index eb0edbd..8ce0495 100644 --- a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala +++ b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala @@ -11,7 +11,7 @@ final class WorkingMemory(var asserted: Set[Triple]) { var derivations: Map[Triple, List[Derivation]] = Map.empty val alpha: mutable.Map[TriplePattern, AlphaMemory] = AnyRefMap.empty - val beta: mutable.Map[List[TriplePattern], BetaMemory] = AnyRefMap.empty + val beta: mutable.Map[JoinNodeSpec, BetaMemory] = AnyRefMap.empty beta += (BetaRoot.spec -> BetaRoot.memory) def explain(triple: Triple): Set[Explanation] = explainAll(Set(triple)) @@ -55,7 +55,7 @@ final class AlphaMemory(pattern: TriplePattern) { } -final class BetaMemory(val spec: List[TriplePattern], initialLinkedChildren: List[BetaNode]) { +final class BetaMemory(val spec: JoinNodeSpec, initialLinkedChildren: List[BetaNode]) { var tokens: List[Token] = Nil var checkRightLink: Boolean = true From 728b6c51349f4c8690c3ae5b3153e280300e5107 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Wed, 10 Jan 2018 10:55:05 -0800 Subject: [PATCH 3/5] Faster triple storage when not saving derivations. --- .../scala/org/geneontology/rules/engine/RuleEngine.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala index af33c3a..dfb6597 100644 --- a/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala +++ b/src/main/scala/org/geneontology/rules/engine/RuleEngine.scala @@ -65,17 +65,13 @@ final class RuleEngine(inputRules: Iterable[Rule], val storeDerivations: Boolean private val DegeneratePattern = TriplePattern(AnyNode, AnyNode, AnyNode) protected[engine] def processTriple(triple: Triple, memory: WorkingMemory): Unit = { - if (!memory.facts(triple)) { - memory.facts += triple + if (memory.facts.add(triple)) { memory.agenda = memory.agenda.enqueue(triple) } - } protected[engine] def processDerivedTriple(triple: Triple, derivation: Derivation, memory: WorkingMemory) = { if (memory.facts.add(triple)) { - //if (memory.facts.size % 100000 == 0) println(memory.facts.size) - //memory.derivations = memory.derivations |+| Map(triple -> List(derivation)) memory.derivations += triple -> (derivation :: memory.derivations.getOrElse(triple, Nil)) memory.agenda = memory.agenda.enqueue(triple) } From ef26f3bd6ca607de4dd1b9ba5f6961a0674fd869 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Wed, 10 Jan 2018 11:33:40 -0800 Subject: [PATCH 4/5] Cleanup. --- src/main/scala/org/geneontology/rules/engine/ReteNodes.scala | 3 +-- .../scala/org/geneontology/rules/engine/WorkingMemory.scala | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala index 86dcc22..5e242f0 100644 --- a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala +++ b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala @@ -220,7 +220,6 @@ final class ProductionNode(rule: Rule, parent: BetaNode, engine: RuleEngine) ext for { pattern <- rule.head } { - //FIXME get rid of casting val newTriple = Triple( produceNode(pattern.s, token).asInstanceOf[Resource], produceNode(pattern.p, token).asInstanceOf[URI], @@ -233,7 +232,7 @@ final class ProductionNode(rule: Rule, parent: BetaNode, engine: RuleEngine) ext private def produceNode(node: Node, token: Token): ConcreteNode = node match { case c: ConcreteNode => c case v: Variable => token.bindings(v) - //case AnyNode => error + case AnyNode => throw new RuntimeException("Invalid rule head containing AnyNode") } def addChild(node: BetaNode): Unit = () diff --git a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala index 8ce0495..3794cc1 100644 --- a/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala +++ b/src/main/scala/org/geneontology/rules/engine/WorkingMemory.scala @@ -4,7 +4,7 @@ import scala.collection.immutable.Queue import scala.collection.mutable import scala.collection.mutable.AnyRefMap -final class WorkingMemory(var asserted: Set[Triple]) { +final class WorkingMemory(val asserted: Set[Triple]) { var agenda: Queue[Triple] = Queue.empty val facts: mutable.Set[Triple] = mutable.Set.empty ++ asserted @@ -60,7 +60,6 @@ final class BetaMemory(val spec: JoinNodeSpec, initialLinkedChildren: List[BetaN var tokens: List[Token] = Nil var checkRightLink: Boolean = true var checkLeftLink: Boolean = false - //val tokenIndex: mutable.Map[(Variable, ConcreteNode), mutable.Set[Token]] = AnyRefMap.empty val tokenIndex: mutable.Map[Variable, mutable.Map[ConcreteNode, List[Token]]] = AnyRefMap.empty var linkedChildren: List[BetaNode] = initialLinkedChildren From a115d263cfaff39bb98a04999ee288de5c01d462 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Tue, 16 Jan 2018 21:49:29 -0600 Subject: [PATCH 5/5] Cleanup. --- src/main/scala/org/geneontology/rules/engine/ReteNodes.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala index 5e242f0..6344c0e 100644 --- a/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala +++ b/src/main/scala/org/geneontology/rules/engine/ReteNodes.scala @@ -137,8 +137,6 @@ final class JoinNode(val leftParent: BetaNode with BetaParent, rightParent: Alph _ = tokensToSend = newToken :: tokensToSend (bindingVar, bindingValue) <- newToken.bindings } { - //betaMem.tokenIndex.getOrElseUpdate(binding, mutable.Set.empty).add(newToken) - //betaMem.tokenIndex.getOrElseUpdate(bindingVar, mutable.AnyRefMap.empty).getOrElseUpdate(bindingValue, mutable.Set.empty).add(newToken) val currentMap = betaMem.tokenIndex.getOrElseUpdate(bindingVar, mutable.AnyRefMap.empty) val currentList = currentMap.getOrElse(bindingValue, Nil) currentMap(bindingValue) = newToken :: currentList