-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Small fix for local storage #1556
base: main
Are you sure you want to change the base?
Changes from all commits
223252c
60edbc4
d607d81
5579b8a
c0b60e9
07083ae
655e666
0a0d7dc
a0d1450
70b5c9f
58f73de
ba2df87
1e9140d
194ac59
a656648
706138c
fab4e0a
9a0291f
d9936c4
6cefe4a
a65fca8
9e990e5
e5df276
ee50f7c
9864038
2d0162d
e2019f8
42ba746
b624ddf
e9fbb4f
10e27d5
b300af7
8e446aa
8bcf09e
97c6799
265fdc9
f3ce11a
68e2640
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
[build-system] | ||
requires = ["setuptools", "numpy", "Cython"] | ||
requires = ["setuptools", "numpy", "Cython"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,6 +80,7 @@ def __init__(self, freq: str, future: bool, provider_uri: dict = None, **kwargs) | |
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri) | ||
self.enable_read_cache = True # TODO: make it configurable | ||
self.region = C["region"] | ||
self.uri.parent.mkdir(parents=True, exist_ok=True) | ||
|
||
@property | ||
def file_name(self) -> str: | ||
|
@@ -90,7 +91,7 @@ def _freq_file(self) -> str: | |
"""the freq to read from file""" | ||
if not hasattr(self, "_freq_file_cache"): | ||
freq = Freq(self.freq) | ||
if freq not in self.support_freq: | ||
if self.support_freq and freq not in self.support_freq: | ||
# NOTE: uri | ||
# 1. If `uri` does not exist | ||
# - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri` | ||
|
@@ -199,6 +200,7 @@ def __init__(self, market: str, freq: str, provider_uri: dict = None, **kwargs): | |
super(FileInstrumentStorage, self).__init__(market, freq, **kwargs) | ||
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri) | ||
self.file_name = f"{market.lower()}.txt" | ||
self.uri.parent.mkdir(parents=True, exist_ok=True) | ||
|
||
def _read_instrument(self) -> Dict[InstKT, InstVT]: | ||
if not self.uri.exists(): | ||
|
@@ -233,7 +235,6 @@ def _write_instrument(self, data: Dict[InstKT, InstVT] = None) -> None: | |
df.loc[:, [self.SYMBOL_FIELD_NAME, self.INSTRUMENT_START_FIELD, self.INSTRUMENT_END_FIELD]].to_csv( | ||
self.uri, header=False, sep=self.INSTRUMENT_SEP, index=False | ||
) | ||
df.to_csv(self.uri, sep="\t", encoding="utf-8", header=False, index=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove this to_csv method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previous line already have |
||
|
||
def clear(self) -> None: | ||
self._write_instrument(data={}) | ||
|
@@ -287,6 +288,7 @@ def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = | |
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs) | ||
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri) | ||
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin" | ||
self.uri.parent.mkdir(parents=True, exist_ok=True) | ||
|
||
def clear(self): | ||
with self.uri.open("wb") as _: | ||
|
@@ -318,15 +320,15 @@ def write(self, data_array: Union[List, np.ndarray], index: int = None) -> None: | |
# rewrite | ||
with self.uri.open("rb+") as fp: | ||
_old_data = np.fromfile(fp, dtype="<f") | ||
_old_index = _old_data[0] | ||
_old_index = int(_old_data[0]) | ||
_old_df = pd.DataFrame( | ||
_old_data[1:], index=range(_old_index, _old_index + len(_old_data) - 1), columns=["old"] | ||
) | ||
fp.seek(0) | ||
_new_df = pd.DataFrame(data_array, index=range(index, index + len(data_array)), columns=["new"]) | ||
_df = pd.concat([_old_df, _new_df], sort=False, axis=1) | ||
_df = _df.reindex(range(_df.index.min(), _df.index.max() + 1)) | ||
_df["new"].fillna(_df["old"]).values.astype("<f").tofile(fp) | ||
np.hstack([_old_index, _df["new"].fillna(_df["old"]).values]).astype("<f").tofile(fp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why fill the missing values in np.hstack([_old_index, _df["new"]) instead of _df["new"]? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first item But I think current version still have bug when new |
||
|
||
@property | ||
def start_index(self) -> Union[int, None]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The data structure of the score returned by get_signal is defined as Union[pd.Series, pd.DataFrame, None]:
qlib/qlib/contrib/strategy/signal_strategy.py
Line 353 in 05d67b3
Is it appropriate to restrict the score to be only a DataFrame?