microsoft · jmp41 · Dec 2, 2024
diff --git a/examples/portfolio/BPQP/BPQP.pdf b/examples/portfolio/BPQP/BPQP.pdf
diff --git a/examples/portfolio/BPQP/README.md b/examples/portfolio/BPQP/README.md
@@ -0,0 +1,57 @@
+# BPQP
+The implementation of the paper: "BPQP: A Differentiable Convex Optimization Framework for Efficient End-to-End Learning" [TODO: arXiv Hyperlink]
+
+![avatar](frame.png) 
+
+# Data & Environment
+* Install python3.7, 3.8 or 3.9.
+* Install Pytorch(1.12.0 in our experiment)
+* Install the requirements in [requirements.txt](requirements.txt).
+* Install the quantitative investment platform Qlib and download the data from Qlib:
+```
+# install Qlib from source
+pip install --upgrade  cython
+git clone https://github.com/microsoft/qlib.git && cd qlib
+python setup.py install
+
+# Download the stock features of Alpha158 from Qlib
+python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --version v2 
+```
+* Run [dataset/papare_dataset.py](dataset/prepare_dataset.py) to generate train/valid/test dataset
+```
+python papare_dataset.py
+```
+# Reproduce our BPQP in large-scale QPs and LPs experiment
+
+![avatar](speed.png) 
+
+[Large scale QPs and LPs experiment.ipynb](Large scale QPs and LPs experiment.ipynb)
+
+# Reproduce our BPQP for SOCP experiment
+
+[SOCP_exp.ipynb](SOCP_exp.ipynb)
+
+# Reproduce our BPQP for end-to-end portfolio optimization
+```
+python main.py --market CN --loss e2e --predictor mlp --solver bpqp
+```
+
+# Reproduce benchmark
+* Two-Stage
+```
+python main.py --market CN --loss mse --predictor mlp --solver bpqp
+```
+
+* DC3
+```
+python main.py --market CN --loss e2e --predictor mlp --solver dc3  
+```
+
+# About the  analysis & results
+The results in the paper are located in the directory [./analysis/](./analysis/). They are named after the corresponding table or figure name in the paper.
+
+For example, if you want to obtain the results of Table 1.
+- The dependent data is located in.  [./analysis/data/Table1/](./analysis/data/Table1/)
+- The script for generating the results will be [./analysis/Table1.py](./analysis/Table1.py).
+
+Similarly, for the rest.
diff --git a/examples/portfolio/BPQP/analysis/Table1.py b/examples/portfolio/BPQP/analysis/Table1.py
@@ -0,0 +1,110 @@
+from pathlib import Path
+import re
+import sys
+
+import numpy as np
+import pandas as pd
+
+from data_loader import OURM, load_lp_qp_data, load_socp_data
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+sys.path.append(str(DIRNAME))
+
+method_order = [OURM, f"{OURM}+Exact", f"{OURM}[O.F.]", "DC3", "Qpth", "QPTH", 'CVXPY', 'Exact']
+pass_order = ['F.', "B.", 'A.']
+ppname = "BPQP"
+
+
+def print_latex_table(df_mean, df_std, method_grp):
+    print("=" * 50, method_grp, "=" * 50)
+
+    def get_key(x):
+        if hasattr(x, "to_series"):  # incase of index
+            x = x.to_series()
+
+        cat_list = method_order + pass_order
+        try:
+            x = x.apply(cat_list.index)
+            return x
+        except ValueError:
+            pass
+        m = re.match(r"\d+(x|\*|×)\d+", x.iloc[0])
+        if m is not None:
+            sep = m.groups()[0]
+            x = x.apply(lambda z: int(z.split(sep)[0]))
+        return x
+
+    def rename(x):
+        # print(x)
+        if OURM in x:
+            return x.replace(OURM, ppname)
+        mp = {
+            "Qpth": "qpth/OptNet",
+            "QPTH": "qpth/OptNet",
+            "F.": "Forward",
+            "B.": "Backward",
+            "A.": "Total(Forward + Backward)",
+        }
+        return mp.get(x, x)
+
+    def tex_rename(x):
+        if 'x' in x:
+            return x.replace("x", r"×")
+        return x
+
+    def get_tex_df(df):
+        text_df2 = df
+        text_df2 = text_df2.stack(dropna=False).to_frame("time").reset_index()
+        text_df2 = text_df2.sort_values(['size', 'method', 'pass'], key=get_key)
+        tex_df = text_df2.set_index(['pass', 'method',
+                                     'size'])['time'].unstack('size').stack().unstack('pass').unstack().sort_index(
+                                         key=get_key).sort_index(axis=1, key=get_key).iloc[
+                                             ::-1,
+                                         ]
+        tex_df = tex_df.rename(rename, axis=0).rename(rename, axis=1)
+        return tex_df
+
+    def add_level(df, name, idx_name):
+        names = [idx_name] + list(df.index.names)
+        df = pd.concat({name: df})
+        df.index.names = names
+        return df
+
+    tex_df = get_tex_df(df_mean)
+    tex_df_std = get_tex_df(df_std)
+
+    scale = 10**np.ceil(np.log10(tex_df.min().min()))
+
+    def cbf(s1, s2):
+        if np.isnan(s1):
+            return "-"
+        return "{:.1f}(±{:.1f})".format(s1, s2)
+
+    def cbf2(s1, s2):
+        return s1.combine(s2, cbf)
+
+    tex_df_all = (tex_df / scale).combine((tex_df_std / scale), cbf2)
+
+    tex_df_all = tex_df_all.reindex(tex_df.index, axis=0).reindex(tex_df.columns, axis=1)
+
+    print("(scale {:.1e})".format(scale))
+
+    print(
+        add_level(add_level(tex_df_all.drop("Forward", axis=1, level=0), 'abs. time', 'metric'), method_grp,
+                  'dataset').rename(columns=tex_rename).to_latex())
+
+
+# QP or LP data
+for fname in [
+        "BPQP_QP_raw_results.csv",  # selected QP results
+        "BPQP_LP_raw_results.csv",  # Selected LP
+]:
+    method_grp = fname[5:7]
+    df_mean, df_std = load_lp_qp_data(fname)
+    df_std = df_std.reindex(df_mean.index)
+    print_latex_table(df_mean, df_std, method_grp)
+
+# ## SOCP loader
+df_mean, df_std = load_socp_data()
+print_latex_table(df_mean, df_std, "SOCP")
diff --git a/examples/portfolio/BPQP/analysis/Table2.py b/examples/portfolio/BPQP/analysis/Table2.py
@@ -0,0 +1,123 @@
+from pathlib import Path
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+data_path = DIRNAME / "data" / "Table2"
+
+import numpy as np
+import pandas as pd
+from tqdm.auto import tqdm
+
+from e2eutils import rename, add_level, get_key, tex_rename
+
+import re
+
+
+def load_qp_acc():
+
+    acc = pd.read_csv(data_path / 'BPQP_QP_results.csv')
+
+    acc['Err.'] = acc.loc[:, "0"].apply(lambda x: re.match(r"(.*)\((.*)\)", x).groups()[0])
+    acc['Std.'] = acc.loc[:, "0"].apply(lambda x: re.match(r"(.*)\((.*)\)", x).groups()[1])
+
+    acc = acc.set_index('avg')
+
+    acc = acc.loc[acc.index.str.startswith("Acc"), :]
+
+    acc = acc.loc[~acc.index.str.startswith("Accuracy Forward"), :]
+
+    from collections import defaultdict
+
+    new_col = defaultdict(list)
+
+    for row in acc.iterrows():
+        m = re.match(r"Accuracy (?P<method>\w+) (?P<pass>\w+) ndim:(?P<var_n>\d+) neq=nineq=(?P<con_n>\d+)", row[0])
+        gd = m.groupdict()
+        gd['scale'] = f"{gd['var_n']}x{gd['con_n']}"
+        for k, v in gd.items():
+            new_col[k].append(v)
+
+    for col, values in new_col.items():
+        acc[col] = values
+
+    acc = acc.loc[:, ['scale', 'method', 'pass', 'Err.', 'Std.']]
+
+    df = acc.set_index([
+        'scale',
+        'method',
+        'pass',
+    ]).loc[:, 'Err.'].unstack('scale').swaplevel()
+    df_std = acc.set_index([
+        'scale',
+        'method',
+        'pass',
+    ]).loc[:, 'Std.'].unstack('scale').swaplevel()
+
+    # final_df = acc.rename(tex_rename, axis=1).append(dc3.swaplevel().rename(tex_rename, axis=1))
+    final_df = df.rename(tex_rename, axis=1)
+    final_df_std = df_std.rename(tex_rename, axis=1)
+
+    final_df = final_df.rename({"Backward": "QP"})
+
+    final_df_std = final_df_std.rename({"Backward": "QP"})
+    return final_df, final_df_std
+
+
+final_df, final_df_std = load_qp_acc()
+
+# load ospq
+df = pd.concat({
+    "CVXPY": pd.read_csv(data_path / 'cp_acc.csv', index_col=0),  #.iloc[50:],
+    "OSQP": pd.read_csv(data_path / 'bpqp_acc.csv', index_col=0),  # .iloc[:50],
+}, axis=1).rename(columns=rename)  # .unstack(level=0)
+
+final_df = final_df.append(pd.concat({
+    "SOCP": df.mean().unstack(),
+}, axis=0))
+
+final_df_std = final_df_std.append(pd.concat({
+    "SOCP": df.std().unstack(),
+}, axis=0))
+
+
+def show_table(final_df, final_df_std):
+
+    def agg_info(final_df):
+        final_df = final_df.astype("float")
+        final_df = final_df.sort_index(key=get_key).sort_index(key=get_key, axis=1)
+        final_df = final_df.rename(rename, axis=0)
+        final_df.columns.name = 'scale'
+        final_df = final_df.iloc[
+            ::-1,
+        ]
+
+        final_df = final_df.dropna(axis=1)
+
+        from scipy.stats import gmean
+
+        final_df = final_df.apply(gmean, axis=1).to_frame("Avg. Err.")
+        return final_df
+
+    df = agg_info(final_df)
+
+    # print(final_df.T.to_latex( float_format="{:.1e}".format, na_rep="-"))
+
+    df_std = agg_info(final_df_std)
+
+    def cbf(s1, s2):
+        if np.isnan(s1):
+            return "-"
+        return "{:.2e}(±{:.2e})".format(s1, s2)
+
+    def cbf2(s1, s2):
+        return s1.combine(s2, cbf)
+
+    df_all = df.combine(df_std, cbf2)
+
+    df_all = df_all.T.sort_index(key=get_key, axis=1)
+    df_all.columns.names = ['', "method"]
+
+    print(df_all.to_latex())
+
+
+show_table(final_df, final_df_std)