Merge pull request #84 from Suf42/lruCache

Add LRU Cache
4ndrelim · Aug 20, 2024 · 67bd654 · 67bd654
2 parents 19dabf2 + ba44980
commit 67bd654
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 95 deletions.
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ Gradle is used for development.
 - [Heap](src/main/java/dataStructures/heap)
     * Max heap implementation 
 - [Linked List](src/main/java/dataStructures/linkedList)
-- LRU Cache
+- [LRU Cache](src/main/java/dataStructures/lruCache)
 - Minimum Spanning Tree 
     * Kruskal
     * Prim's

diff --git a/src/main/java/dataStructures/lruCache/LRU.java b/src/main/java/dataStructures/lruCache/LRU.java
@@ -1,7 +1,6 @@
 package dataStructures.lruCache;
 
 import java.util.HashMap;
-import java.util.Map;
 
 /**
  * Implementation of Least Recently Used (LRU) Cache
@@ -13,126 +12,99 @@
  *            Client methods:
  *            get(K key)
  *            put(K key, V value)
- *            Both methods above run in O(1) average time complexity
+ *            Both methods above run in expected O(1) time complexity
  */
-public class LRU<K, V> {
-    private final int cap;
-    private final Map<K, Node<K, V>> map;
-    private final Node<K, V> left;  // dummy left node to point to the left end
-    private final Node<K, V> right; // dummy right node to point to the right end
-
+class LRU<K, V> {
     /**
-     * Helper node class that encapsulates key-value pair and act as linked list to neighbour nodes.
+     * Helper node class that implements doubly linked list
      */
-    private class Node<K, V> {
-        private final K key;
+    private class doublyLinkedListNode<K, V> {
+        private K key;
         private V val;
-        private Node<K, V> next;
-        private Node<K, V> prev;
-
-        Node(K key, V value) {
-            this.key = key;
-            this.val = value;
-            this.next = null;
-            this.prev = null;
-        }
+        private doublyLinkedListNode<K, V> next;
+        private doublyLinkedListNode<K, V> prev;
     }
 
+    private doublyLinkedListNode<K, V> dllHead;
+    private doublyLinkedListNode<K, V> dllTail;
+    private HashMap<K, doublyLinkedListNode<K, V>> keyToNode = new HashMap<>();
+    private int capacity;
+    private int lengthOfList = 0;
+
     /**
-     * Constructs an instance of Least Recently Used Cache.
+     * Constructs an instance of Least Recently Used Cache
      *
-     * @param capacity the maximum capacity of the cache.
+     * @param capacity the maximum capacity of the cache
      */
     public LRU(int capacity) {
-        this.cap = capacity;
-        this.map = new HashMap<>();
-        this.left = new Node<>(null, null);
-        this.right = new Node<>(null, null);
-        this.left.next = this.right;
-        this.right.prev = this.left;
-    }
+        this.capacity = capacity;
 
-    /**
-     * Helper method to remove the specified node from the doubly linked list
-     *
-     * @param node to be removed from the linked list
-     */
-    private void remove(Node<K, V> node) {
-        Node<K, V> prev = node.prev;
-        Node<K, V> nxt = node.next;
-        prev.next = nxt;
-        nxt.prev = prev;
+        dllHead = new doublyLinkedListNode<>();
+        dllTail = new doublyLinkedListNode<>();
+        dllHead.next = dllTail;
+        dllTail.prev = dllHead;
     }
 
     /**
-     * Helper method to insert a node to the right end of the double linked list (Most Recently Used)
+     * Return the value of the key if it exists or return null
      *
-     * @param node to be inserted
-     */
-    private void insert(Node<K, V> node) {
-        Node<K, V> prev = this.right.prev;
-        prev.next = node;
-        node.prev = prev;
-        node.next = this.right;
-        this.right.prev = node;
-    }
-
-    /**
-     * return the value of the key if it exists; otherwise null
-     *
-     * @param key whose value, if exists, to be obtained
+     * @param key key of the value to be obtained from LRU cache
      */
     public V get(K key) {
-        if (this.map.containsKey(key)) {
-            Node<K, V> node = this.map.get(key);
-            this.remove(node);
-            this.insert(node);
-            return node.val;
+        if (!keyToNode.containsKey(key)) {
+            return null;
         }
-        return null;
+
+        doublyLinkedListNode<K, V> temp = keyToNode.get(key);
+        temp.prev.next = temp.next;
+        temp.next.prev = temp.prev;
+
+        temp.next = dllHead.next;
+        dllHead.next.prev = temp;
+        temp.prev = dllHead;
+        dllHead.next = temp;
+
+        return keyToNode.get(key).val;
     }
 
     /**
-     * Update the value of the key if the key exists.
-     * Otherwise, add the key-value pair to the cache.
-     * If the number of keys exceeds the capacity from this operation, evict the least recently used key
+     * Insert key-value pair to LRU cache
      *
-     * @param key the key
-     * @param val the associated value
+     * @param key key of the value to be inserted to LRU cache
+     * @param value value to be inserted to LRU cache
      */
-    public void update(K key, V val) {
-        if (this.map.containsKey(key)) {
-            Node<K, V> node = this.map.get(key);
-            this.remove(node);
-            node.val = val;
-            this.insert(node); // make most recently used
+    public void put(K key, V value) {
+        boolean addingNewNode = true;
+
+        doublyLinkedListNode<K, V> newlyCached;
+
+        if (!keyToNode.containsKey(key)) {
+            newlyCached = new doublyLinkedListNode<>();
+            newlyCached.key = key;
+            newlyCached.val = value;
+            keyToNode.put(key, newlyCached);
         } else {
-            Node<K, V> node = new Node<>(key, val);
-            this.map.put(node.key, node);
-            this.insert(node);
-        }
+            newlyCached = keyToNode.get(key);
+            newlyCached.val = value;
+            addingNewNode = false;
 
-        if (this.map.size() > this.cap) { // evict LRU since capacity exceeded
-            Node<K, V> toRemove = this.left.next;
-            this.map.remove(toRemove.key);
-            this.remove(toRemove);
+            newlyCached.prev.next = newlyCached.next;
+            newlyCached.next.prev = newlyCached.prev;
         }
-    }
 
-    /**
-     * Custom print for testing
-     * prints from LRU to MRU (Most recently used)
-     */
-    public void print() {
-        Node<K, V> trav = this.left.next;
-        System.out.print("Dummy");
-        while (trav != this.right) {
-            System.out.print(" ->");
-            System.out.print(trav.key);
-            System.out.print(",");
-            System.out.print(trav.val);
-            trav = trav.next;
+        newlyCached.next = dllHead.next;
+        dllHead.next.prev = newlyCached;
+        newlyCached.prev = dllHead;
+        dllHead.next = newlyCached;
+
+        if (addingNewNode) {
+            if (lengthOfList == capacity) {
+                keyToNode.remove(dllTail.prev.key);
+                dllTail.prev.prev.next = dllTail;
+                dllTail.prev = dllTail.prev.prev;
+            } else {
+                lengthOfList++;
+            }
         }
-        System.out.println();
     }
 }
diff --git a/src/main/java/dataStructures/lruCache/README.md b/src/main/java/dataStructures/lruCache/README.md
@@ -0,0 +1,50 @@
+# LRU Cache
+
+## Background
+
+Assuming that software engineers develop their applications using well-structured design patterns, programs tend to reuse data and instructions they've recently accessed (temporal locality) or access data elements that are close together in memory (spatial locality).
+
+### Temporal Locality
+
+The Least Recently Used (LRU) Cache operates on the principle that the data most recently accessed is likely to be accessed again in the near future (temporal locality). By evicting the least recently accessed items first, LRU cache ensures that the most relevant data remains available in the cache.
+
+### Applications
+
+<ol>
+	<li>Operating systems: Operating systems use LRU cache for memory management in page replacement algorithms. When a program requires more memory pages than are available in physical memory, the operating system decides which pages to evict to disc based on LRU caching, ensuring that the most recently accessed pages remain in memory.</li>
+	<li>Web browsers: Web browsers use LRU cache to store frequently accessed web pages. This allows users to quickly revisit pages without the need to fetch the entire content from the server.</li>
+	<li>Databases: Databases use LRU cache to store frequent query results. This reduces the need to access the underlying storage system for repeated queries.</li>
+</ol>
+
+### Data Structures
+
+Implementing an LRU cache typically involves using a combination of data structures. A common approach is to use a doubly-linked list to maintain the order of items based on access recency and a hash map to achieve constant-time access to any item in the cache. This combination effectively creates a data structure that supports the operations required for LRU cache. As nodes are connected in a doubly-linked list fashion, updating neighbours when rearranging recently cached items is as simple as redirecting the next and previous pointers of affected nodes.
+
+<img src = "https://cdn.hashnode.com/res/hashnode/image/upload/v1655812960691/pqAZ20NyS.png?auto=compress,format&format=webp" alt = "Hash Map">
+
+### Cache Key
+
+The hash map values are accessed through cache keys, which are unique references to the cached items in a LRU cache. Moreover, storing key-value pairs of hash keys and their corresponding nodes, which encapsulate cached items in a hash map and allows us to avoid O(n) sequential access of cached items.
+
+### Eviction
+
+When the cache is full and a new item needs to be added, the eviction process is triggered. The item at the back of the list, which represents the least recently used data, is removed from both the list and the hash map. The new item is then added to the front of the list, and the cache key is stored in the hash map along with its corresponding cache value. 
+
+However, if a cached item is accessed through a read-only operation, we still move the cached item to the front of the list without any eviction. Therefore, any form of interaction with a key will move its corresponding node to the front of the doubly-linked list without evection being triggered. Eviction is only applicable to write operations when a cache is considered full.
+
+## Complexity Analysis
+
+**Time**: **expected** O(1) complexity
+
+As we rely on basic hash map operations to insert, access, and delete cache nodes, the get and put operations supported by LRU cache are influenced by the time complexity of these hash map operations. Insertion, lookup, and deletion operations in a well-designed hash map take O(1) time on average. Therefore, the hash map provides expected O(1) time on operations, and the doubly-linked list provides insertion and removal of nodes in O(1) time.
+
+**Space**: O(cache capacity)
+
+## Notes
+
+<ol>
+	<li>Cache hit/miss ratio: A simple metric for measuring the effectiveness of the cache is the cache hit ratio. It is represented by the percentage of requests that are served from the cache without needing to access the original data store. Generally speaking, for most applications, a hit ratio of 95 - 99% is ideal.</li>
+	<li>Outdated cached data: A cached item that is constantly accessed and remains in cache for too long may become outdated.</li>
+	<li>Thread safety: When working with parallel computation, careful considerations have to be made when multiple threads try to access the cache at the same time. Thread-safe caching mechanisms may involve the proper use of mutex locks.</li>
+	<li>Other caching algorithms: First-In-First-Out (FIFO) cache, Least Frequently Used (LFU) cache, Most Recently Used (MRU) cache, and Random Replacement (RR) cache. The performance of different caching algorithms depends entirely on the application. LRU caching provides a good balance between performance and memory usage, making it suitable for a wide range of applications as most applications obey recency of data access (we often do reuse the same data in many applications). However, in the event that access patterns are random or even anti-recent, random replacement may perform better as it has less overhead when compared to LRU due to lack of bookkeeping.</li>
+</ol>