[SPARK-50030][PYTHON][CONNECT] API compatibility check for Window

### What changes were proposed in this pull request? This PR proposes to add API compatibility check for Spark SQL Window functions ### Why are the changes needed? To guarantee of the same behavior between Spark Classic and Spark Connect ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UTs ### Was this patch authored or co-authored using generative AI tooling? No Closes #48541 from itholic/SPARK-50030. Authored-by: Haejoon Lee <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
apache · Oct 19, 2024 · 14ed86e · 14ed86e
1 parent f8d9222
commit 14ed86e
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 10 deletions.
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
@@ -84,23 +84,21 @@ def __init__(
         self._orderSpec = orderSpec
         self._frame = frame
 
-    def partitionBy(
-        self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]
-    ) -> ParentWindowSpec:
+    def partitionBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return WindowSpec(
             partitionSpec=[c._expr for c in _to_cols(cols)],  # type: ignore[misc]
             orderSpec=self._orderSpec,
             frame=self._frame,
         )
 
-    def orderBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
+    def orderBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return WindowSpec(
             partitionSpec=self._partitionSpec,
             orderSpec=[cast(SortOrder, F._sort_col(c)._expr) for c in _to_cols(cols)],
             frame=self._frame,
         )
 
-    def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
+    def rowsBetween(self, start: int, end: int) -> "WindowSpec":
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -112,7 +110,7 @@ def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
             frame=WindowFrame(isRowFrame=True, start=start, end=end),
         )
 
-    def rangeBetween(self, start: int, end: int) -> ParentWindowSpec:
+    def rangeBetween(self, start: int, end: int) -> "WindowSpec":
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -141,19 +139,19 @@ class Window(ParentWindow):
     _spec = WindowSpec(partitionSpec=[], orderSpec=[], frame=None)
 
     @staticmethod
-    def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
+    def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return Window._spec.partitionBy(*cols)
 
     @staticmethod
-    def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
+    def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return Window._spec.orderBy(*cols)
 
     @staticmethod
-    def rowsBetween(start: int, end: int) -> ParentWindowSpec:
+    def rowsBetween(start: int, end: int) -> "WindowSpec":
         return Window._spec.rowsBetween(start, end)
 
     @staticmethod
-    def rangeBetween(start: int, end: int) -> ParentWindowSpec:
+    def rangeBetween(start: int, end: int) -> "WindowSpec":
         return Window._spec.rangeBetween(start, end)
 
 

diff --git a/python/pyspark/sql/tests/test_connect_compatibility.py b/python/pyspark/sql/tests/test_connect_compatibility.py
@@ -28,6 +28,8 @@
 from pyspark.sql.readwriter import DataFrameReader as ClassicDataFrameReader
 from pyspark.sql.readwriter import DataFrameWriter as ClassicDataFrameWriter
 from pyspark.sql.readwriter import DataFrameWriterV2 as ClassicDataFrameWriterV2
+from pyspark.sql.window import Window as ClassicWindow
+from pyspark.sql.window import WindowSpec as ClassicWindowSpec
 
 if should_test_connect:
     from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
@@ -37,6 +39,8 @@
     from pyspark.sql.connect.readwriter import DataFrameReader as ConnectDataFrameReader
     from pyspark.sql.connect.readwriter import DataFrameWriter as ConnectDataFrameWriter
     from pyspark.sql.connect.readwriter import DataFrameWriterV2 as ConnectDataFrameWriterV2
+    from pyspark.sql.connect.window import Window as ConnectWindow
+    from pyspark.sql.connect.window import WindowSpec as ConnectWindowSpec
 
 
 class ConnectCompatibilityTestsMixin:
@@ -303,6 +307,38 @@ def test_dataframe_writer_v2_compatibility(self):
             expected_missing_classic_methods,
         )
 
+    def test_window_compatibility(self):
+        """Test Window compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicWindow,
+            ConnectWindow,
+            "Window",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_window_spec_compatibility(self):
+        """Test WindowSpec compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicWindowSpec,
+            ConnectWindowSpec,
+            "WindowSpec",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
 
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class ConnectCompatibilityTests(ConnectCompatibilityTestsMixin, ReusedSQLTestCase):