diff --git a/docs/assets/images/MSTProperty3.png b/docs/assets/images/MSTProperty3.png new file mode 100644 index 0000000..b6c8201 Binary files /dev/null and b/docs/assets/images/MSTProperty3.png differ diff --git a/docs/assets/images/MSTProperty4.png b/docs/assets/images/MSTProperty4.png new file mode 100644 index 0000000..fb4a303 Binary files /dev/null and b/docs/assets/images/MSTProperty4.png differ diff --git a/src/main/java/algorithms/minimumSpanningTree/README.md b/src/main/java/algorithms/minimumSpanningTree/README.md new file mode 100644 index 0000000..e788bd1 --- /dev/null +++ b/src/main/java/algorithms/minimumSpanningTree/README.md @@ -0,0 +1,53 @@ +# Minimum Spanning Tree Algorithms + +## Background + +Minimum Spanning Tree (MST) algorithms are used to find the minimum spanning tree of a weighted, connected graph. A +spanning tree of a graph is a connected, acyclic subgraph that includes all the vertices of the original graph. An MST +is a spanning tree with the minimum possible total edge weight. + +### 4 Properties of MST +1. An MST should not have any cycles +2. If you cut an MST at any single edge, the two pieces will also be MSTs +3. **Cycle Property:** For every cycle, the maximum weight edge is not in the MST + +![MST Property 3](../../../../../docs/assets/images/MSTProperty3.png) + +Image Source: CS2040S 22/23 Sem 2 Lecture Slides + +4. **Cut Property:** For every partition of the nodes, the minimum weight edge across the cut is in the MST + +![MST Property 4](../../../../../docs/assets/images/MSTProperty4.png) + +Image Source: CS2040S 22/23 Sem 2 Lecture Slides + +Note that the other edges across the partition may or may not be in the MST. + +## Prim's Algorithm and Kruskal's Algorithm + +We will discuss more implementation-specific details and complexity analysis in the respective folders. In short, +1. [Prim's Algorithm](prim) is a greedy algorithm that finds the minimum spanning tree of a graph by starting from an +arbitrary node (vertex) and adding the edge with the minimum weight that connects the current tree to a new node, adding +the node to the current tree, until all nodes are included in the tree. +<<<<<<< HEAD +2. [Kruskal's Algorithm](kruskal) is a greedy algorithm that finds the minimum spanning tree of a graph by sorting the +edges by weight and adding the edge with the minimum weight that does not form a cycle into the current tree. + +## Notes + +### Difference between Minimum Spanning Tree and Shortest Path +It is important to note that a Minimum Spanning Tree of a graph does not represent the shortest path between all the +nodes. See below for an example: + +The below graph is a weighted, connected graph with 5 nodes and 6 edges: +![original graph img](../../../../../docs/assets/images/originalGraph.jpg) + +The following is the Minimum Spanning Tree of the above graph: +![MST img](../../../../../docs/assets/images/MST.jpg) + +Taking node A and D into consideration, the shortest path between them is A -> D, with a total weight of 4. +![SPOriginal img](../../../../../docs/assets/images/SPOriginal.jpg) + +However, the shortest path between A and D in the Minimum Spanning Tree is A -> C -> D, with a total weight of 5, which +is not the shortest path in the original graph. +![SPMST img](../../../../../docs/assets/images/SPMST.jpg) diff --git a/src/main/java/algorithms/minimumSpanningTree/kruskal/Kruskal.java b/src/main/java/algorithms/minimumSpanningTree/kruskal/Kruskal.java new file mode 100644 index 0000000..23f2196 --- /dev/null +++ b/src/main/java/algorithms/minimumSpanningTree/kruskal/Kruskal.java @@ -0,0 +1,134 @@ +package algorithms.minimumSpanningTree.kruskal; + +import java.util.ArrayList; +import java.util.List; + +import dataStructures.disjointSet.weightedUnion.DisjointSet; + +/** + * Implementation of Kruskal's Algorithm to find MSTs + * Idea: + * Sort all edges by weight in non-decreasing order. Consider the edges in this order. If an edge does not form a cycle + * with the edges already in the MST, add it to the MST. Repeat until all nodes are in the MST. + * Actual implementation: + * An Edge class is implemented for easier sorting of edges by weight and for identifying the source and destination. + * A Node class is implemented for easier tracking of nodes in the graph for the disjoint set. + * A DisjointSet class is used to track the nodes in the graph and to determine if adding an edge will form a cycle. + */ +public class Kruskal { + public static int[][] getKruskalMST(Node[] nodes, int[][] adjacencyMatrix) { + int numOfNodes = nodes.length; + List edges = new ArrayList<>(); + + // Convert adjacency matrix to list of edges + for (int i = 0; i < numOfNodes; i++) { + for (int j = i + 1; j < numOfNodes; j++) { + if (adjacencyMatrix[i][j] != Integer.MAX_VALUE) { + edges.add(new Edge(nodes[i], nodes[j], adjacencyMatrix[i][j])); + } + } + } + + // Sort edges by weight + edges.sort(Edge::compareTo); + + // Initialize Disjoint Set for vertex tracking + DisjointSet ds = new DisjointSet<>(nodes); + + // MST adjacency matrix to be returned + int[][] mstMatrix = new int[numOfNodes][numOfNodes]; + + // Initialize the MST matrix to represent no edges with Integer.MAX_VALUE and 0 for self loops + for (int i = 0; i < nodes.length; i++) { + for (int j = 0; j < nodes.length; j++) { + mstMatrix[i][j] = (i == j) ? 0 : Integer.MAX_VALUE; + } + } + + // Process edges to build MST + for (Edge edge : edges) { + Node source = edge.getSource(); + Node destination = edge.getDestination(); + if (!ds.find(source, destination)) { + mstMatrix[source.getIndex()][destination.getIndex()] = edge.getWeight(); + mstMatrix[destination.getIndex()][source.getIndex()] = edge.getWeight(); + ds.union(source, destination); + } + } + + return mstMatrix; + } + + /** + * Node class to represent a node in the graph + * Note: In our Node class, we do not allow the currMinWeight to be updated after initialization to prevent any + * reference issues in the PriorityQueue. + */ + static class Node { + private final int index; // Index of this node in the adjacency matrix + private final String identifier; + + /** + * Constructor for Node + * @param identifier + * @param index + */ + public Node(String identifier, int index) { + this.identifier = identifier; + this.index = index; + } + + /** + * Getter for identifier + * @return identifier + */ + public String getIdentifier() { + return identifier; + } + + public int getIndex() { + return index; + } + + @Override + public String toString() { + return "Node{" + "identifier='" + identifier + '\'' + ", index=" + index + '}'; + } + } + + /** + * Edge class to represent an edge in the graph + */ + static class Edge implements Comparable { + private final Node source; + private final Node destination; + private final int weight; + + /** + * Constructor for Edge + */ + public Edge(Node source, Node destination, int weight) { + this.source = source; + this.destination = destination; + this.weight = weight; + } + + public int getWeight() { + return weight; + } + + public Node getSource() { + return source; + } + + public Node getDestination() { + return destination; + } + + @Override + public int compareTo(Edge other) { + return Integer.compare(this.weight, other.weight); + } + } +} + diff --git a/src/main/java/algorithms/minimumSpanningTree/kruskal/README.md b/src/main/java/algorithms/minimumSpanningTree/kruskal/README.md new file mode 100644 index 0000000..811e5d6 --- /dev/null +++ b/src/main/java/algorithms/minimumSpanningTree/kruskal/README.md @@ -0,0 +1,23 @@ +# Kruskal's Algorithm + +## Background +Kruskal's Algorithm is a greedy algorithm used to find the minimum spanning tree (MST) of a connected, weighted graph. +It works by sorting all the edges in the graph by their weight in non-decreasing order and then adding the smallest edge +to the MST, provided it does not form a cycle with the already included edges. This is repeated until all vertices are +included in the MST. + +## Implementation Details +Kruskal's Algorithm uses a simple `ArrayList` to sort the edges by weight. + +A [`DisjointSet`](/dataStructures/disjointSet/weightedUnion) data structure is also used to keep track of the +connectivity of vertices and detect cycles. + +## Complexity Analysis + +**Time Complexity:** +Sorting the edges by weight: O(E log E) = O(E log V), where V and E is the number of vertices and edges respectively. +Union-Find operations: O(E α(V)), where α is the inverse Ackermann function. +Overall complexity: O(E log V) + +**Space Complexity:** +O(V + E) for the storage of vertices in the disjoint set and edges in the priority queue. \ No newline at end of file diff --git a/src/main/java/algorithms/minimumSpanningTree/prim/Prim.java b/src/main/java/algorithms/minimumSpanningTree/prim/Prim.java new file mode 100644 index 0000000..e5baad9 --- /dev/null +++ b/src/main/java/algorithms/minimumSpanningTree/prim/Prim.java @@ -0,0 +1,135 @@ +package algorithms.minimumSpanningTree.prim; + +import java.util.Arrays; +import java.util.PriorityQueue; + +/** + * Implementation of Prim's Algorithm to find MSTs + * Idea: + * Starting from any source (this will be the first node to be in the MST), pick the lightest outgoing edge, and + * include the node at the other end as part of a set of nodes S. Now repeatedly do the above by picking the lightest + * outgoing edge adjacent to any node in the MST (ensure the other end of the node is not already in the MST). + * Repeat until S contains all nodes in the graph. S is the MST. + * Actual implementation: + * No Edge class was implemented. Instead, the weights of the edges are stored in a 2D array adjacency matrix. An + * adjacency list may be used instead + * A Node class is implemented to encapsulate the current minimum weight to reach the node. + */ +public class Prim { + public static int[][] getPrimsMST(Node[] nodes, int[][] adjacencyMatrix) { + // Recall that PriorityQueue is a min heap by default + PriorityQueue pq = new PriorityQueue<>((a, b) -> a.getCurrMinWeight() - b.getCurrMinWeight()); + int[][] mstMatrix = new int[nodes.length][nodes.length]; // MST adjacency matrix + + int[] parent = new int[nodes.length]; // To track the parent node of each node in the MST + Arrays.fill(parent, -1); // Initialize parent array with -1, indicating no parent + + boolean[] visited = new boolean[nodes.length]; // To track visited nodes + Arrays.fill(visited, false); // Initialize visited array with false, indicating not visited + + // Initialize the MST matrix to represent no edges with Integer.MAX_VALUE and 0 for self loops + for (int i = 0; i < nodes.length; i++) { + for (int j = 0; j < nodes.length; j++) { + mstMatrix[i][j] = (i == j) ? 0 : Integer.MAX_VALUE; + } + } + + // Add all nodes to the priority queue, with each node's curr min weight already set to Integer.MAX_VALUE + pq.addAll(Arrays.asList(nodes)); + + while (!pq.isEmpty()) { + Node current = pq.poll(); + + int currentIndex = current.getIndex(); + + if (visited[currentIndex]) { // Skip if node is already visited + continue; + } + + visited[currentIndex] = true; + + for (int i = 0; i < nodes.length; i++) { + if (adjacencyMatrix[currentIndex][i] != Integer.MAX_VALUE && !visited[nodes[i].getIndex()]) { + int weight = adjacencyMatrix[currentIndex][i]; + + if (weight < nodes[i].getCurrMinWeight()) { + Node newNode = new Node(nodes[i].getIdentifier(), nodes[i].getIndex(), weight); + parent[i] = currentIndex; // Set current node as parent of adjacent node + pq.add(newNode); + } + } + } + } + + // Build MST matrix based on parent array + for (int i = 1; i < nodes.length; i++) { + int p = parent[i]; + if (p != -1) { + int weight = adjacencyMatrix[p][i]; + mstMatrix[p][i] = weight; + mstMatrix[i][p] = weight; // For undirected graphs + } + } + + return mstMatrix; + } + + /** + * Node class to represent a node in the graph + * Note: In our Node class, we do not allow the currMinWeight to be updated after initialization to prevent any + * reference issues in the PriorityQueue. + */ + static class Node { + private final int currMinWeight; // Current minimum weight to get to this node + private int index; // Index of this node in the adjacency matrix + private final String identifier; + + /** + * Constructor for Node + * @param identifier + * @param index + * @param currMinWeight + */ + public Node(String identifier, int index, int currMinWeight) { + this.identifier = identifier; + this.index = index; + this.currMinWeight = currMinWeight; + } + + /** + * Constructor for Node with default currMinWeight + * @param identifier + * @param index + */ + public Node(String identifier, int index) { + this.identifier = identifier; + this.index = index; + this.currMinWeight = Integer.MAX_VALUE; + } + + /** + * Getter and setter for currMinWeight + */ + public int getCurrMinWeight() { + return currMinWeight; + } + + /** + * Getter for identifier + * @return identifier + */ + public String getIdentifier() { + return identifier; + } + + public int getIndex() { + return index; + } + + @Override + public String toString() { + return "Node{" + "identifier='" + identifier + '\'' + ", index=" + index + '}'; + } + } +} + diff --git a/src/main/java/algorithms/minimumSpanningTree/prim/README.md b/src/main/java/algorithms/minimumSpanningTree/prim/README.md new file mode 100644 index 0000000..33356aa --- /dev/null +++ b/src/main/java/algorithms/minimumSpanningTree/prim/README.md @@ -0,0 +1,50 @@ +# Prim's Algorithm + +## Background + +Prim's Algorithm is a greedy algorithm that finds the minimum spanning tree of a graph by starting from an +arbitrary node (vertex) and adding the edge, with the minimum weight that connects the current tree to an unexplored +node, and the unexplored node to the current tree, until all nodes are included in the tree. + +### Implementation Details + +A `PriorityQueue` (binary heap) is utilised to keep track of the minimum weight edge that connects the current tree to +an unexplored node. In an ideal scenario, the minimum weight edge to each node in the priority queue should be updated each +time a lighter edge is found to maintain a single unique node in the priority queue. This means that a decrease key +operation is required. + +**Decrease Key Operation:** + +However, we know that the decrease key operation of a binary heap implementation of a priority +queue will take O(V) time, which will result in a larger time complexity for the entire algorithm compared to using only +O(log V) operations for each edge. Hence, in our implementation, to avoid the use of a decrease key operation, we will simply insert duplicate nodes with +their new minimum weight edge, which will take O(log E) = O(log V) given an upper bound of E = V^2, into the queue, +while leaving the old node in the queue. Additionally, we will track if a node has already been added into the MST to +avoid adding duplicate nodes. + +**Priority Queue Implementation:** + +Note that a priority queue is an abstract data type that can be implemented using different data structures. In this +implementation, the default Java `PriorityQueue` is used, which is a binary heap. By implementing the priority queue +with an AVL tree, a decrease key operation that has a time complexity of O(log V) can also be achieved. + +## Complexity Analysis + +**Time Complexity:** +- O(V^2 log V) for the basic version with an adjacency matrix, where V is the number of vertices. +- O(E log V) with a binary heap and adjacency list, where V and E is the number of vertices and edges +respectively. + +**Space Complexity:** +- O(V^2) for the adjacency matrix representation. +- O(V + E) for the adjacency list representation. + +## Notes + +### Difference between Prim's Algorithm and Dijkstra's Algorithm + +| | Prim's Algorithm | Dijkstra's Algorithm | +|-------------------------------------|---------------------------------------------------------------------------------|----------------------------------------------------------| +| Purpose | Finds MST - minimum sum of edge weights that includes all vertices in the graph | Finds shortest path from a single source to all vertices | +| Property Compared in Priority Queue | Minimum weight of incoming edge to a vertex | Minimum distance from source vertex to current vertex | + diff --git a/src/test/java/algorithms/minimumSpanningTree/kruskal/KruskalTest.java b/src/test/java/algorithms/minimumSpanningTree/kruskal/KruskalTest.java new file mode 100644 index 0000000..fdd18e3 --- /dev/null +++ b/src/test/java/algorithms/minimumSpanningTree/kruskal/KruskalTest.java @@ -0,0 +1,84 @@ +package algorithms.minimumSpanningTree.kruskal; + +import static org.junit.Assert.assertArrayEquals; + +import org.junit.Test; + +public class KruskalTest { + @Test + public void test_simpleGraph() { + // Graph setup (Adjacency Matrix) + // B + // / \ + // 1 1 + // / \ + // A - 1 - C + int[][] adjacencyMatrix = { + {0, 1, 1}, // A: A-B, A-C + {1, 0, 1}, // B: B-A, B-C + {1, 1, 0} // C: C-A, C-B + }; + + Kruskal.Node[] nodes = { + new Kruskal.Node("A", 0), + new Kruskal.Node("B", 1), + new Kruskal.Node("C", 2) + }; + + // Run Kruskal's algorithm + int[][] actualMST = Kruskal.getKruskalMST(nodes, adjacencyMatrix); + + // Expected MST + // A -1- B -1- C + int[][] expectedMST = { + {0, 1, 1}, // A: A-B, A-C + {1, 0, Integer.MAX_VALUE}, // B: B-A + {1, Integer.MAX_VALUE, 0} // C: C-A + }; + + // Assertion + assertArrayEquals(expectedMST, actualMST); + } + + @Test + public void test_complexGraph() { + // Graph setup + // A + // / | \ + // 1 4 3 + /// | \ + //B --3-- D + // \ | / + // 2 4 1 + // \|/ + // C + int[][] adjacencyMatrix = { + {0, 1, 4, 3}, // A: A-B, A-C, A-D + {1, 0, 2, 3}, // B: B-A, B-C, B-D + {4, 2, 0, 1}, // C: C-A, C-B, C-D + {3, 3, 1, 0} // D: D-A, D-B, D-C + }; + + Kruskal.Node[] nodes = { + new Kruskal.Node("A", 0), + new Kruskal.Node("B", 1), + new Kruskal.Node("C", 2), + new Kruskal.Node("D", 3) + }; + + // Run Prim's algorithm + int[][] actualMST = Kruskal.getKruskalMST(nodes, adjacencyMatrix); + + // Expected MST + // Based on the graph, assuming the MST is correctly computed + int[][] expectedMST = { + {0, 1, Integer.MAX_VALUE, Integer.MAX_VALUE}, // A: A-B + {1, 0, 2, Integer.MAX_VALUE}, // B: B-A, B-C + {Integer.MAX_VALUE, 2, 0, 1}, // C: C-B, C-D + {Integer.MAX_VALUE, Integer.MAX_VALUE, 1, 0} // D: D-C + }; + + // Assertion + assertArrayEquals(expectedMST, actualMST); + } +} diff --git a/src/test/java/algorithms/minimumSpanningTree/prim/PrimTest.java b/src/test/java/algorithms/minimumSpanningTree/prim/PrimTest.java new file mode 100644 index 0000000..bb0012d --- /dev/null +++ b/src/test/java/algorithms/minimumSpanningTree/prim/PrimTest.java @@ -0,0 +1,85 @@ +package algorithms.minimumSpanningTree.prim; + +import static org.junit.Assert.assertArrayEquals; + +import org.junit.Test; + +public class PrimTest { + + @Test + public void test_simpleGraph() { + // Graph setup (Adjacency Matrix) + // B + // / \ + // 1 1 + // / \ + // A - 1 - C + int[][] adjacencyMatrix = { + {0, 1, 1}, // A: A-B, A-C + {1, 0, 1}, // B: B-A, B-C + {1, 1, 0} // C: C-A, C-B + }; + + Prim.Node[] nodes = { + new Prim.Node("A", 0), + new Prim.Node("B", 1), + new Prim.Node("C", 2) + }; + + // Run Prim's algorithm + int[][] actualMST = Prim.getPrimsMST(nodes, adjacencyMatrix); + + // Expected MST + // A -1- B -1- C + int[][] expectedMST = { + {0, 1, Integer.MAX_VALUE}, // A: A-B + {1, 0, 1}, // B: B-A, B-C + {Integer.MAX_VALUE, 1, 0} // C: C-B + }; + + // Assertion + assertArrayEquals(expectedMST, actualMST); + } + + @Test + public void test_complexGraph() { + // Graph setup + // A + // / | \ + // 1 4 3 + /// | \ + //B --3-- D + // \ | / + // 2 4 1 + // \|/ + // C + int[][] adjacencyMatrix = { + {0, 1, 4, 3}, // A: A-B, A-C, A-D + {1, 0, 2, 3}, // B: B-A, B-C, B-D + {4, 2, 0, 1}, // C: C-A, C-B, C-D + {3, 3, 1, 0} // D: D-A, D-B, D-C + }; + + Prim.Node[] nodes = { + new Prim.Node("A", 0), + new Prim.Node("B", 1), + new Prim.Node("C", 2), + new Prim.Node("D", 3) + }; + + // Run Prim's algorithm + int[][] actualMST = Prim.getPrimsMST(nodes, adjacencyMatrix); + + // Expected MST + // Based on the graph, assuming the MST is correctly computed + int[][] expectedMST = { + {0, 1, Integer.MAX_VALUE, Integer.MAX_VALUE}, // A: A-B + {1, 0, 2, Integer.MAX_VALUE}, // B: B-A, B-C + {Integer.MAX_VALUE, 2, 0, 1}, // C: C-B, C-D + {Integer.MAX_VALUE, Integer.MAX_VALUE, 1, 0} // D: D-C + }; + + // Assertion + assertArrayEquals(expectedMST, actualMST); + } +}