Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add BPQP example #1863

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added examples/portfolio/BPQP/BPQP.pdf
Binary file not shown.
57 changes: 57 additions & 0 deletions examples/portfolio/BPQP/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# BPQP
The implementation of the paper: "BPQP: A Differentiable Convex Optimization Framework for Efficient End-to-End Learning" [TODO: arXiv Hyperlink]

![avatar](frame.png)

# Data & Environment
* Install python3.7, 3.8 or 3.9.
* Install Pytorch(1.12.0 in our experiment)
* Install the requirements in [requirements.txt](requirements.txt).
* Install the quantitative investment platform Qlib and download the data from Qlib:
```
# install Qlib from source
pip install --upgrade cython
git clone https://github.com/microsoft/qlib.git && cd qlib
python setup.py install

# Download the stock features of Alpha158 from Qlib
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --version v2
```
* Run [dataset/papare_dataset.py](dataset/prepare_dataset.py) to generate train/valid/test dataset
```
python papare_dataset.py
```
# Reproduce our BPQP in large-scale QPs and LPs experiment

![avatar](speed.png)

[Large scale QPs and LPs experiment.ipynb](Large scale QPs and LPs experiment.ipynb)

# Reproduce our BPQP for SOCP experiment

[SOCP_exp.ipynb](SOCP_exp.ipynb)

# Reproduce our BPQP for end-to-end portfolio optimization
```
python main.py --market CN --loss e2e --predictor mlp --solver bpqp
```

# Reproduce benchmark
* Two-Stage
```
python main.py --market CN --loss mse --predictor mlp --solver bpqp
```

* DC3
```
python main.py --market CN --loss e2e --predictor mlp --solver dc3
```

# About the analysis & results
The results in the paper are located in the directory [./analysis/](./analysis/). They are named after the corresponding table or figure name in the paper.

For example, if you want to obtain the results of Table 1.
- The dependent data is located in. [./analysis/data/Table1/](./analysis/data/Table1/)
- The script for generating the results will be [./analysis/Table1.py](./analysis/Table1.py).

Similarly, for the rest.
110 changes: 110 additions & 0 deletions examples/portfolio/BPQP/analysis/Table1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from pathlib import Path
import re
import sys

import numpy as np
import pandas as pd

from data_loader import OURM, load_lp_qp_data, load_socp_data

DIRNAME = Path(__file__).absolute().resolve().parent

sys.path.append(str(DIRNAME))

method_order = [OURM, f"{OURM}+Exact", f"{OURM}[O.F.]", "DC3", "Qpth", "QPTH", 'CVXPY', 'Exact']
pass_order = ['F.', "B.", 'A.']
ppname = "BPQP"


def print_latex_table(df_mean, df_std, method_grp):
print("=" * 50, method_grp, "=" * 50)

def get_key(x):
if hasattr(x, "to_series"): # incase of index
x = x.to_series()

cat_list = method_order + pass_order
try:
x = x.apply(cat_list.index)
return x
except ValueError:
pass
m = re.match(r"\d+(x|\*|×)\d+", x.iloc[0])
if m is not None:
sep = m.groups()[0]
x = x.apply(lambda z: int(z.split(sep)[0]))
return x

def rename(x):
# print(x)
if OURM in x:
return x.replace(OURM, ppname)
mp = {
"Qpth": "qpth/OptNet",
"QPTH": "qpth/OptNet",
"F.": "Forward",
"B.": "Backward",
"A.": "Total(Forward + Backward)",
}
return mp.get(x, x)

def tex_rename(x):
if 'x' in x:
return x.replace("x", r"×")
return x

def get_tex_df(df):
text_df2 = df
text_df2 = text_df2.stack(dropna=False).to_frame("time").reset_index()
text_df2 = text_df2.sort_values(['size', 'method', 'pass'], key=get_key)
tex_df = text_df2.set_index(['pass', 'method',
'size'])['time'].unstack('size').stack().unstack('pass').unstack().sort_index(
key=get_key).sort_index(axis=1, key=get_key).iloc[
::-1,
]
tex_df = tex_df.rename(rename, axis=0).rename(rename, axis=1)
return tex_df

def add_level(df, name, idx_name):
names = [idx_name] + list(df.index.names)
df = pd.concat({name: df})
df.index.names = names
return df

tex_df = get_tex_df(df_mean)
tex_df_std = get_tex_df(df_std)

scale = 10**np.ceil(np.log10(tex_df.min().min()))

def cbf(s1, s2):
if np.isnan(s1):
return "-"
return "{:.1f}(±{:.1f})".format(s1, s2)

def cbf2(s1, s2):
return s1.combine(s2, cbf)

tex_df_all = (tex_df / scale).combine((tex_df_std / scale), cbf2)

tex_df_all = tex_df_all.reindex(tex_df.index, axis=0).reindex(tex_df.columns, axis=1)

print("(scale {:.1e})".format(scale))

print(
add_level(add_level(tex_df_all.drop("Forward", axis=1, level=0), 'abs. time', 'metric'), method_grp,
'dataset').rename(columns=tex_rename).to_latex())


# QP or LP data
for fname in [
"BPQP_QP_raw_results.csv", # selected QP results
"BPQP_LP_raw_results.csv", # Selected LP
]:
method_grp = fname[5:7]
df_mean, df_std = load_lp_qp_data(fname)
df_std = df_std.reindex(df_mean.index)
print_latex_table(df_mean, df_std, method_grp)

# ## SOCP loader
df_mean, df_std = load_socp_data()
print_latex_table(df_mean, df_std, "SOCP")
123 changes: 123 additions & 0 deletions examples/portfolio/BPQP/analysis/Table2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from pathlib import Path

DIRNAME = Path(__file__).absolute().resolve().parent

data_path = DIRNAME / "data" / "Table2"

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from e2eutils import rename, add_level, get_key, tex_rename

import re


def load_qp_acc():

acc = pd.read_csv(data_path / 'BPQP_QP_results.csv')

acc['Err.'] = acc.loc[:, "0"].apply(lambda x: re.match(r"(.*)\((.*)\)", x).groups()[0])
acc['Std.'] = acc.loc[:, "0"].apply(lambda x: re.match(r"(.*)\((.*)\)", x).groups()[1])

acc = acc.set_index('avg')

acc = acc.loc[acc.index.str.startswith("Acc"), :]

acc = acc.loc[~acc.index.str.startswith("Accuracy Forward"), :]

from collections import defaultdict

new_col = defaultdict(list)

for row in acc.iterrows():
m = re.match(r"Accuracy (?P<method>\w+) (?P<pass>\w+) ndim:(?P<var_n>\d+) neq=nineq=(?P<con_n>\d+)", row[0])
gd = m.groupdict()
gd['scale'] = f"{gd['var_n']}x{gd['con_n']}"
for k, v in gd.items():
new_col[k].append(v)

for col, values in new_col.items():
acc[col] = values

acc = acc.loc[:, ['scale', 'method', 'pass', 'Err.', 'Std.']]

df = acc.set_index([
'scale',
'method',
'pass',
]).loc[:, 'Err.'].unstack('scale').swaplevel()
df_std = acc.set_index([
'scale',
'method',
'pass',
]).loc[:, 'Std.'].unstack('scale').swaplevel()

# final_df = acc.rename(tex_rename, axis=1).append(dc3.swaplevel().rename(tex_rename, axis=1))
final_df = df.rename(tex_rename, axis=1)
final_df_std = df_std.rename(tex_rename, axis=1)

final_df = final_df.rename({"Backward": "QP"})

final_df_std = final_df_std.rename({"Backward": "QP"})
return final_df, final_df_std


final_df, final_df_std = load_qp_acc()

# load ospq
df = pd.concat({
"CVXPY": pd.read_csv(data_path / 'cp_acc.csv', index_col=0), #.iloc[50:],
"OSQP": pd.read_csv(data_path / 'bpqp_acc.csv', index_col=0), # .iloc[:50],
}, axis=1).rename(columns=rename) # .unstack(level=0)

final_df = final_df.append(pd.concat({
"SOCP": df.mean().unstack(),
}, axis=0))

final_df_std = final_df_std.append(pd.concat({
"SOCP": df.std().unstack(),
}, axis=0))


def show_table(final_df, final_df_std):

def agg_info(final_df):
final_df = final_df.astype("float")
final_df = final_df.sort_index(key=get_key).sort_index(key=get_key, axis=1)
final_df = final_df.rename(rename, axis=0)
final_df.columns.name = 'scale'
final_df = final_df.iloc[
::-1,
]

final_df = final_df.dropna(axis=1)

from scipy.stats import gmean

final_df = final_df.apply(gmean, axis=1).to_frame("Avg. Err.")
return final_df

df = agg_info(final_df)

# print(final_df.T.to_latex( float_format="{:.1e}".format, na_rep="-"))

df_std = agg_info(final_df_std)

def cbf(s1, s2):
if np.isnan(s1):
return "-"
return "{:.2e}(±{:.2e})".format(s1, s2)

def cbf2(s1, s2):
return s1.combine(s2, cbf)

df_all = df.combine(df_std, cbf2)

df_all = df_all.T.sort_index(key=get_key, axis=1)
df_all.columns.names = ['', "method"]

print(df_all.to_latex())


show_table(final_df, final_df_std)
Loading
Loading