From 6ce01a828deba5283fd7493b9ab35154d5bf133a Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Sat, 9 Nov 2024 17:04:25 +0200 Subject: [PATCH] Handle two warnings that pollute the output of sssom-py CLI (#561) See commits for a more detailed description of the changes; This PR should not introduce any functional changes. --- src/sssom/cli.py | 4 ++++ src/sssom/parsers.py | 4 +--- src/sssom/util.py | 16 ++++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/sssom/cli.py b/src/sssom/cli.py index 9762d700..3bd8b72a 100644 --- a/src/sssom/cli.py +++ b/src/sssom/cli.py @@ -54,6 +54,7 @@ filter_redundant_rows, invert_mappings, merge_msdf, + pandas_set_no_silent_downcasting, reconcile_prefix_and_data, remove_unmatched, sort_df_rows_columns, @@ -126,6 +127,9 @@ def main(verbose: int, quiet: bool): """Run the SSSOM CLI.""" logger = _logging.getLogger() + + pandas_set_no_silent_downcasting() + if verbose >= 2: logger.setLevel(level=_logging.DEBUG) elif verbose == 1: diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py index c2348fe3..28f96b43 100644 --- a/src/sssom/parsers.py +++ b/src/sssom/parsers.py @@ -424,9 +424,7 @@ def from_sssom_dataframe( # Need to revisit this solution. # This is to address: A value is trying to be set on a copy of a slice from a DataFrame if CONFIDENCE in df.columns: - df2 = df.copy() - df2[CONFIDENCE].replace(r"^\s*$", np.nan, regex=True, inplace=True) - df = df2 + df.replace({CONFIDENCE: r"^\s*$"}, np.nan, regex=True, inplace=True) mapping_set = _get_mapping_set_from_df(df=df, meta=meta) doc = MappingSetDocument(mapping_set=mapping_set, converter=converter) diff --git a/src/sssom/util.py b/src/sssom/util.py index f506c44a..6684f9a1 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -158,14 +158,9 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings) meta = _extract_global_metadata(doc) - if pandas_version >= (2, 0, 0): - # For pandas >= 2.0.0, use the 'copy' parameter - df = df.infer_objects(copy=False) - else: - # For pandas < 2.0.0, call 'infer_objects()' without any parameters - df = df.infer_objects() # remove columns where all values are blank. df.replace("", np.nan, inplace=True) + df = df.infer_objects() df.dropna(axis=1, how="all", inplace=True) # remove columns with all row = 'None'-s. slots = _get_sssom_schema_object().dict["slots"] @@ -1493,3 +1488,12 @@ def safe_compress(uri: str, converter: Converter) -> str: :return: A CURIE """ return converter.compress_or_standardize(uri, strict=True) + + +def pandas_set_no_silent_downcasting(no_silent_downcasting=True): + """Set pandas future.no_silent_downcasting option. Context https://github.com/pandas-dev/pandas/issues/57734.""" + try: + pd.set_option("future.no_silent_downcasting", no_silent_downcasting) + except KeyError: + # Option does not exist in this version of pandas + pass