Skip to content

Commit

Permalink
[SYCL][Graph] Fix issues with sycl_ext_oneapi_graph subgraphs (#10822)
Browse files Browse the repository at this point in the history
This PR addresses some issues with the
[sycl_ext_oneapi_graph](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc)
extension regarding sub-graphs. A sub-graph is when an executable
`command_graph` is added as a node in another graph.

Currently, adding an executable graph object as a subgraph permanently
connects the child graph's root nodes to the parent graph. This affects
subsequent independent submissions of the executable child graph object
and later additions as a subgraph node. This is incorrect behaviour, as
the previous uses as a sub-graph should have no effect on the state of
the executable graph object.

Fixed in this PR by duplicating the nodes of a subgraph when added to a
parent, enabling multiple parents. As a result, nodes of the initial
subgraph and the one of the main graph are no longer the exact same
(shared_pointer) but two different nodes with similar content.

## Authors

Co-authored-by: Maxime France-Pillois
<[email protected]>

---------

Co-authored-by: Maxime France-Pillois <[email protected]>
Co-authored-by: Ben Tracy <[email protected]>
  • Loading branch information
3 people authored Aug 25, 2023
1 parent 5c30815 commit 92ddf8d
Show file tree
Hide file tree
Showing 10 changed files with 299 additions and 53 deletions.
53 changes: 50 additions & 3 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ bool checkForRequirement(sycl::detail::AccessorImplHost *Req,
}
return SuccessorAddedDep;
}

void duplicateNode(const std::shared_ptr<node_impl> Node,
std::shared_ptr<node_impl> &NodeCopy) {
if (Node->MCGType == sycl::detail::CG::None) {
NodeCopy = std::make_shared<node_impl>();
NodeCopy->MCGType = sycl::detail::CG::None;
} else {
NodeCopy = std::make_shared<node_impl>(Node->MCGType, Node->getCGCopy());
}
}

} // anonymous namespace

void exec_graph_impl::schedule() {
Expand All @@ -81,7 +92,7 @@ void exec_graph_impl::schedule() {
}
}

std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
std::shared_ptr<node_impl> graph_impl::addNodesToExits(
const std::list<std::shared_ptr<node_impl>> &NodeList) {
// Find all input and output nodes from the node list
std::vector<std::shared_ptr<node_impl>> Inputs;
Expand All @@ -104,6 +115,36 @@ std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
return this->add(Outputs);
}

std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
const std::shared_ptr<exec_graph_impl> &SubGraphExec) {
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;

std::list<std::shared_ptr<node_impl>> NodesList = SubGraphExec->getSchedule();
std::list<std::shared_ptr<node_impl>> NewNodesList{NodesList.size()};

// Duplication of nodes
for (auto NodeIt = NodesList.end(), NewNodesIt = NewNodesList.end();
NodeIt != NodesList.begin();) {
--NodeIt;
--NewNodesIt;
auto Node = *NodeIt;
std::shared_ptr<node_impl> NodeCopy;
duplicateNode(Node, NodeCopy);
*NewNodesIt = NodeCopy;
NodesMap.insert({Node, NodeCopy});
for (auto &NextNode : Node->MSuccessors) {
if (NodesMap.find(NextNode) != NodesMap.end()) {
auto Successor = NodesMap[NextNode];
NodeCopy->registerSuccessor(Successor, NodeCopy);
} else {
assert("Node duplication failed. A duplicated node is missing.");
}
}
}

return addNodesToExits(NewNodesList);
}

void graph_impl::addRoot(const std::shared_ptr<node_impl> &Root) {
MRoots.insert(Root);
}
Expand Down Expand Up @@ -313,6 +354,11 @@ void exec_graph_impl::createCommandBuffers(sycl::device Device) {

// TODO extract kernel bundle logic from enqueueImpKernel
for (const auto &Node : MSchedule) {
// Empty nodes are not processed as other nodes, but only their
// dependencies are propagated in findRealDeps
if (Node->isEmpty())
continue;

sycl::detail::CG::CGTYPE type = Node->MCGType;
// If the node is a kernel with no special requirements we can enqueue it
// directly.
Expand Down Expand Up @@ -453,8 +499,9 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
"Error during emulated graph command group submission.");
}
ScheduledEvents.push_back(NewEvent);
} else {

} else if (!NodeImpl->isEmpty()) {
// Empty nodes are node processed as other nodes, but only their
// dependencies are propagated in findRealDeps
sycl::detail::EventImplPtr EventImpl =
sycl::detail::Scheduler::getInstance().addCG(NodeImpl->getCGCopy(),
Queue);
Expand Down
84 changes: 64 additions & 20 deletions sycl/source/detail/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,51 @@ class node_impl {
std::unique_ptr<sycl::detail::CG> &&CommandGroup)
: MCGType(CGType), MCommandGroup(std::move(CommandGroup)) {}

/// Tests if two nodes have the same content,
/// i.e. same command group
/// This function should only be used for internal purposes.
/// A true return from this operator is not a guarantee that the nodes are
/// equals according to the Common reference semantics. But this function is
/// an helper to verify that two nodes contain equivalent Command Groups.
/// @param Node node to compare with
/// @return true if two nodes have equivament command groups. false otherwise.
bool operator==(const node_impl &Node) {
if (MCGType != Node.MCGType)
return false;

switch (MCGType) {
case sycl::detail::CG::CGTYPE::Kernel: {
sycl::detail::CGExecKernel *ExecKernelA =
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
sycl::detail::CGExecKernel *ExecKernelB =
static_cast<sycl::detail::CGExecKernel *>(Node.MCommandGroup.get());
return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0;
}
case sycl::detail::CG::CGTYPE::CopyUSM: {
sycl::detail::CGCopyUSM *CopyA =
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
sycl::detail::CGCopyUSM *CopyB =
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
return (CopyA->getSrc() == CopyB->getSrc()) &&
(CopyA->getDst() == CopyB->getDst()) &&
(CopyA->getLength() == CopyB->getLength());
}
case sycl::detail::CG::CGTYPE::CopyAccToAcc:
case sycl::detail::CG::CGTYPE::CopyAccToPtr:
case sycl::detail::CG::CGTYPE::CopyPtrToAcc: {
sycl::detail::CGCopy *CopyA =
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
sycl::detail::CGCopy *CopyB =
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
return (CopyA->getSrc() == CopyB->getSrc()) &&
(CopyA->getDst() == CopyB->getDst());
}
default:
assert(false && "Unexpected command group type!");
return false;
}
}

/// Recursively add nodes to execution stack.
/// @param NodeImpl Node to schedule.
/// @param Schedule Execution ordering to add node to.
Expand All @@ -83,10 +128,8 @@ class node_impl {
if (std::find(Schedule.begin(), Schedule.end(), Next) == Schedule.end())
Next->sortTopological(Next, Schedule);
}
// We don't need to schedule empty nodes as they are only used when
// calculating dependencies
if (!NodeImpl->isEmpty())
Schedule.push_front(NodeImpl);

Schedule.push_front(NodeImpl);
}

/// Checks if this node has a given requirement.
Expand Down Expand Up @@ -171,26 +214,16 @@ class node_impl {
/// Tests is the caller is similar to Node
/// @return True if the two nodes are similar
bool isSimilar(std::shared_ptr<node_impl> Node) {
if (MCGType != Node->MCGType)
return false;

if (MSuccessors.size() != Node->MSuccessors.size())
return false;

if (MPredecessors.size() != Node->MPredecessors.size())
return false;

if ((MCGType == sycl::detail::CG::CGTYPE::Kernel) &&
(Node->MCGType == sycl::detail::CG::CGTYPE::Kernel)) {
sycl::detail::CGExecKernel *ExecKernelA =
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
sycl::detail::CGExecKernel *ExecKernelB =
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
if (*this == *Node.get())
return true;

if (ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) != 0)
return false;
}
return true;
return false;
}

/// Recursive traversal of successor nodes checking for
Expand Down Expand Up @@ -336,11 +369,12 @@ class graph_impl {
"No event has been recorded for the specified graph node");
}

/// Adds sub-graph nodes from an executable graph to this graph.
/// @param NodeList List of nodes from sub-graph in schedule order.
/// Duplicates and Adds sub-graph nodes from an executable graph to this
/// graph.
/// @param SubGraphExec sub-graph to add to the parent.
/// @return An empty node is used to schedule dependencies on this sub-graph.
std::shared_ptr<node_impl>
addSubgraphNodes(const std::list<std::shared_ptr<node_impl>> &NodeList);
addSubgraphNodes(const std::shared_ptr<exec_graph_impl> &SubGraphExec);

/// Query for the context tied to this graph.
/// @return Context associated with graph.
Expand Down Expand Up @@ -486,6 +520,12 @@ class graph_impl {
/// Insert node into list of root nodes.
/// @param Root Node to add to list of root nodes.
void addRoot(const std::shared_ptr<node_impl> &Root);

/// Adds nodes to the exit nodes of this graph.
/// @param NodeList List of nodes from sub-graph in schedule order.
/// @return An empty node is used to schedule dependencies on this sub-graph.
std::shared_ptr<node_impl>
addNodesToExits(const std::list<std::shared_ptr<node_impl>> &NodeList);
};

/// Class representing the implementation of command_graph<executable>.
Expand Down Expand Up @@ -537,6 +577,10 @@ class exec_graph_impl {
return MSchedule;
}

/// Query the graph_impl.
/// @return pointer to the graph_impl MGraphImpl
const std::shared_ptr<graph_impl> &getGraphImpl() const { return MGraphImpl; }

private:
/// Create a command-group for the node and add it to command-buffer by going
/// through the scheduler.
Expand Down
4 changes: 3 additions & 1 deletion sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,9 @@ void handler::ext_oneapi_graph(
}
// Store the node representing the subgraph in the handler so that we can
// return it to the user later.
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl->getSchedule());
// The nodes of the subgraph are duplicated when added to its parents.
// This avoids changing properties of the graph added as a subgraph.
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl);

// If we are recording an in-order queue remember the subgraph node, so it
// can be used as a dependency for any more nodes recorded from this queue.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL:*
// Submit a graph as a subgraph more than once doesn't yet work.

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/sub_graph_multiple_submission.cpp"
3 changes: 0 additions & 3 deletions sycl/test-e2e/Graph/Explicit/sub_graph_two_parent_graphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL: *
// Subgraph doesn't work properly in second parent graph

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/sub_graph_two_parent_graphs.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int main() {
Queue.memcpy(Output.data(), X, N * sizeof(float), E).wait();

for (size_t i = 0; i < N; i++) {
assert(Output[i] == -6.25f);
assert(Output[i] == -4.5f);
}

sycl::free(X, Queue);
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Graph/Inputs/sub_graph_two_parent_graphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ int main() {
auto ExecGraphA = GraphA.finalize();

auto B1 = add_node(GraphB, Queue, [&](handler &CGH) {
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(i); });
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(it); });
});

auto B2 = add_node(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL:*
// Submit a graph as a subgraph more than once doesn't yet work.

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/sub_graph_multiple_submission.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL: *
// Subgraph doesn't work properly in second parent graph

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/sub_graph_two_parent_graphs.cpp"
Loading

0 comments on commit 92ddf8d

Please sign in to comment.