From df2b5e83fc20edf0d7ca7a1dc471aa926e2ebfde Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Sun, 17 Dec 2017 18:57:24 +0100 Subject: [PATCH] io: Refactor importers and exporters in preparation for netcdf support --- pypsa/io.py | 544 ++++++++++++++++++++++++++-------------------------- 1 file changed, 274 insertions(+), 270 deletions(-) diff --git a/pypsa/io.py b/pypsa/io.py index f8ae69955..de8d4c144 100644 --- a/pypsa/io.py +++ b/pypsa/io.py @@ -16,159 +16,320 @@ """Functions for importing and exporting data. """ - # make the code as Python 3 compatible as possible from __future__ import division, absolute_import -from six import iteritems +from six import iteritems, iterkeys, string_types from six.moves import filter, range - __author__ = "Tom Brown (FIAS), Jonas Hoersch (FIAS)" __copyright__ = "Copyright 2015-2017 Tom Brown (FIAS), Jonas Hoersch (FIAS), GNU GPL 3" import logging logger = logging.getLogger(__name__) -from textwrap import dedent import os +from textwrap import dedent +from glob import glob import pandas as pd import pypsa import numpy as np - - -def export_to_csv_folder(network, csv_folder_name, encoding=None, export_standard_types=False): +class ImpExper(object): + ds = None + + def __enter__(self): + if self.ds is not None: + self.ds = self.ds.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.finish() + ds = self.ds + if ds is not None: + ds.__exit__(exc_type, exc_val, exc_tb) + + def finish(self): + pass + +class Exporter(ImpExper): + def remove_static(self, list_name): + pass + + def remove_series(self, list_name, attr): + pass + +class Importer(ImpExper): + pass + +class ImporterCSV(Importer): + def __init__(self, csv_folder_name, encoding): + self.csv_folder_name = csv_folder_name + self.encoding = encoding + + assert os.path.isdir(csv_folder_name), "Directory {} does not exist.".format(csv_folder_name) + + def get_attributes(self): + fn = os.path.join(self.csv_folder_name, "network.csv") + if not os.path.isfile(fn): return None + return dict(pd.read_csv(fn, encoding=self.encoding).iloc[0]) + + def get_snapshots(self): + fn = os.path.join(self.csv_folder_name, "snapshots.csv") + return pd.read_csv(fn, index_col=0, encoding=self.encoding, parse_dates=True) + + def get_static(self, list_name): + fn = os.path.join(self.csv_folder_name, list_name + ".csv") + return (pd.read_csv(fn, index_col=0, encoding=self.encoding) + if os.path.isfile(fn) else None) + + def get_series(self, list_name): + for fn in os.listdir(self.csv_folder_name): + if fn.startswith(list_name+"-") and fn.endswith(".csv"): + attr = fn[len(list_name)+1:-4] + df = pd.read_csv(os.path.join(self.csv_folder_name, fn), + index_col=0, encoding=self.encoding, parse_dates=True) + yield attr, df + +class ExporterCSV(Exporter): + def __init__(self, csv_folder_name, encoding): + self.csv_folder_name = csv_folder_name + self.encoding = encoding + + #make sure directory exists + if not os.path.isdir(csv_folder_name): + logger.warning("Directory {} does not exist, creating it" + .format(csv_folder_name)) + os.mkdir(csv_folder_name) + + def save_attributes(self, attrs): + name = attrs.pop('name') + df = pd.DataFrame(attrs, index=pd.Index([name], name='name')) + fn = os.path.join(self.csv_folder_name, "network.csv") + df.to_csv(fn, encoding=self.encoding) + + def save_snapshots(self, snapshots): + fn = os.path.join(self.csv_folder_name, "snapshots.csv") + snapshots.to_csv(fn, encoding=self.encoding) + + def save_static(self, list_name, df): + fn = os.path.join(self.csv_folder_name, list_name + ".csv") + df.to_csv(fn, encoding=self.encoding) + + def save_series(self, list_name, attr, df): + fn = os.path.join(self.csv_folder_name, list_name + "-" + attr + ".csv") + df.to_csv(fn, encoding=self.encoding) + + def remove_static(self, list_name): + fns = glob(os.path.join(self.csv_folder_name, list_name) + "*.csv") + if fns: + for fn in fns: os.unlink(fn) + logger.warning("Stale csv file(s) {} removed".format(', '.join(fns))) + + def remove_series(self, list_name, attr): + fn = os.path.join(self.csv_folder_name, list_name + "-" + attr + ".csv") + if os.path.exists(fn): + os.unlink(fn) + +class ImporterHDF5(Importer): + def __init__(self, path): + self.ds = pd.HDFStore(path) + + def get_attributes(self): + return dict(self.ds["/network"].reset_index().iloc[0]) + + def get_snapshots(self): + return self.ds["/snapshots"] if "/snapshots" in self.ds else None + + def get_static(self, list_name): + return (self.ds["/" + list_name] + if "/" + list_name in self.ds else None) + + def get_series(self, list_name): + for tab in self.ds: + if tab.startswith('/' + list_name + '_t/'): + attr = tab[len('/' + list_name + '_t/'):] + yield attr, self.ds[tab] + +class ExporterHDF5(Exporter): + def __init__(self, path, **kwargs): + self.ds = pd.HDFStore(path, mode='w', **kwargs) + + def save_attributes(self, attrs): + name = attrs.pop('name') + self.ds.put('/network', + pd.DataFrame(attrs, index=pd.Index([name], name='name')), + format='table', index=False) + + def save_snapshots(self, snapshots): + self.ds.put('/snapshots', snapshots, format='table', index=False) + + def save_static(self, list_name, df): + self.ds.put('/' + list_name, df, format='table', index=False) + + def save_series(self, list_name, attr, df): + self.ds.put('/' + list_name + '_t/' + attr, df, format='table', index=False) + +def _export_to_exporter(network, exporter, basename, export_standard_types=False): """ - Export network and components to a folder of CSVs. + Export to exporter. Both static and series attributes of components are exported, but only if they have non-default values. - If csv_folder_name does not already exist, it is created. - Parameters ---------- - csv_folder_name : string - Name of folder to which to export. - encoding : str, default None - Encoding to use for UTF when reading (ex. 'utf-8'). `List of Python - standard encodings - `_ + exporter : Exporter + Initialized exporter instance + basename : str + Basename, used for logging export_standard_types : boolean, default False If True, then standard types are exported too (upon reimporting you should then set "ignore_standard_types" when initialising the netowrk). - - Examples - -------- - >>> export_to_csv(network,csv_folder_name) - OR - >>> network.export_to_csv(csv_folder_name) """ - #exportable component types #what about None???? - nan is float? allowed_types = [float,int,str,bool] + list(np.typeDict.values()) - #make sure directory exists - if not os.path.isdir(csv_folder_name): - logger.warning("Directory {} does not exist, creating it".format(csv_folder_name)) - os.mkdir(csv_folder_name) - - #first export network properties - - columns = [attr for attr in dir(network) if type(getattr(network,attr)) in allowed_types and attr != "name" and attr[:2] != "__"] - index = [network.name] - df = pd.DataFrame(index=index,columns=columns,data = [[getattr(network,col) for col in columns]]) - df.index.name = "name" - - df.to_csv(os.path.join(csv_folder_name,"network.csv"),encoding=encoding) + attrs = dict((attr, getattr(network, attr)) + for attr in dir(network) + if (attr[:2] != "__" and + type(getattr(network,attr)) in allowed_types)) + exporter.save_attributes(attrs) #now export snapshots - - df = pd.DataFrame(index=network.snapshots) - df["weightings"] = network.snapshot_weightings - df.index.name = "name" - - df.to_csv(os.path.join(csv_folder_name,"snapshots.csv"),encoding=encoding) - - #now export all other components + snapshots = pd.DataFrame(dict(weightings=network.snapshot_weightings), + index=pd.Index(network.snapshots, name="name")) + exporter.save_snapshots(snapshots) exported_components = [] - for component in pypsa.components.all_components - {"SubNetwork"}: list_name = network.components[component]["list_name"] attrs = network.components[component]["attrs"] + df = network.df(component) pnl = network.pnl(component) - if not export_standard_types and component in pypsa.components.standard_types: df = df.drop(network.components[component]["standard_types"].index) - - #first do static attributes - filename = os.path.join(csv_folder_name,list_name+".csv") + # first do static attributes df.index.name = "name" if df.empty: - if os.path.exists(filename): - os.unlink(filename) - - fns = [os.path.basename(filename)] - for attr in attrs.index[attrs.varying]: - fn = os.path.join(csv_folder_name,list_name+'-'+attr+'.csv') - if os.path.exists(fn): - os.unlink(fn) - fns.append(os.path.basename(fn)) - - logger.warning("Stale csv file(s) {} removed".format(', '.join(fns))) - + exporter.remove_static(list_name) continue col_export = [] for col in df.columns: - #do not export derived attributes - if col in ["sub_network","r_pu","x_pu","g_pu","b_pu"]: + # do not export derived attributes + if col in ["sub_network", "r_pu", "x_pu", "g_pu", "b_pu"]: continue - if col in attrs.index and pd.isnull(attrs.at[col,"default"]) and pd.isnull(df[col]).all(): + if col in attrs.index and pd.isnull(attrs.at[col, "default"]) and pd.isnull(df[col]).all(): continue if (col in attrs.index and df[col].dtype == attrs.at[col, 'dtype'] - and (df[col] == attrs.at[col,"default"]).all()): + and (df[col] == attrs.at[col, "default"]).all()): continue col_export.append(col) - df[col_export].to_csv(filename,encoding=encoding) - + exporter.save_static(list_name, df[col_export]) #now do varying attributes for attr in pnl: if attr not in attrs.index: col_export = pnl[attr].columns else: - default = attrs.at[attr,"default"] + default = attrs.at[attr, "default"] if pd.isnull(default): col_export = pnl[attr].columns[(~pd.isnull(pnl[attr])).any()] else: col_export = pnl[attr].columns[(pnl[attr] != default).any()] - filename = os.path.join(csv_folder_name,list_name+"-" + attr + ".csv") if len(col_export) > 0: - pnl[attr].loc[:,col_export].to_csv(filename,encoding=encoding) + df = pnl[attr][col_export] + exporter.save_series(list_name, attr, df) else: - if os.path.exists(filename): - os.unlink(filename) - logger.warning("Stale csv file {} removed" - .format(os.path.basename(filename))) + exporter.remove_series(list_name, attr) exported_components.append(list_name) - logger.info("Exported network {} has {}".format(os.path.basename(csv_folder_name), ", ".join(exported_components))) + logger.info("Exported network {} has {}".format(basename, ", ".join(exported_components))) + +def import_from_csv_folder(network, csv_folder_name, encoding=None, skip_time=False): + """ + Import network data from CSVs in a folder. + + The CSVs must follow the standard form, see pypsa/examples. + + Parameters + ---------- + csv_folder_name : string + Name of folder + encoding : str, default None + Encoding to use for UTF when reading (ex. 'utf-8'). `List of Python + standard encodings + `_ + skip_time : bool, default False + Skip reading in time dependent attributes + """ + + basename = os.path.basename(csv_folder_name) + with ImporterCSV(csv_folder_name, encoding=encoding) as importer: + _import_from_importer(network, importer, basename=basename, skip_time=skip_time) + +def export_to_csv_folder(network, csv_folder_name, encoding=None, export_standard_types=False): + """ + Export network and components to a folder of CSVs. + + Both static and series attributes of components are exported, but only + if they have non-default values. + + If csv_folder_name does not already exist, it is created. + + Parameters + ---------- + csv_folder_name : string + Name of folder to which to export. + encoding : str, default None + Encoding to use for UTF when reading (ex. 'utf-8'). `List of Python + standard encodings + `_ + export_standard_types : boolean, default False + If True, then standard types are exported too (upon reimporting you + should then set "ignore_standard_types" when initialising the netowrk). + + Examples + -------- + >>> export_to_csv(network,csv_folder_name) + OR + >>> network.export_to_csv(csv_folder_name) + """ + + basename = os.path.basename(csv_folder_name) + with ExporterCSV(csv_folder_name=csv_folder_name, encoding=encoding) as exporter: + _export_to_exporter(network, exporter, basename=basename, + export_standard_types=export_standard_types) + +def import_from_hdf5(network, path, skip_time=False): + """ + Import network data from HDF5 store at `path`. + + Parameters + ---------- + path : string + Name of HDF5 store + skip_time : bool, default False + Skip reading in time dependent attributes + """ + + basename = os.path.basename(path) + with ImporterHDF5(path) as importer: + _import_from_importer(network, importer, basename=basename, skip_time=skip_time) def export_to_hdf5(network, path, export_standard_types=False, **kwargs): """ @@ -196,105 +357,41 @@ def export_to_hdf5(network, path, export_standard_types=False, **kwargs): kwargs.setdefault('complevel', 4) - with pd.HDFStore(path, mode='w', **kwargs) as store: - #first export network properties - - #exportable component types - #what about None???? - nan is float? - allowed_types = [float,int,str,bool] + list(np.typeDict.values()) - - columns = [attr for attr in dir(network) - if (attr != "name" and attr[:2] != "__" and - type(getattr(network,attr)) in allowed_types)] - index = pd.Index([network.name], name="name") - store.put('/network', - pd.DataFrame(index=index, columns=columns, - data=[[getattr(network, col) for col in columns]]), - format='table', index=False) + basename = os.path.basename(path) + with ExporterHDF5(path, **kwargs) as exporter: + _export_to_exporter(network, exporter, basename=basename, + export_standard_types=export_standard_types) - #now export snapshots - store.put('/snapshots', - pd.DataFrame(dict(weightings=network.snapshot_weightings), - index=pd.Index(network.snapshots, name="name")), - format='table', index=False) - #now export all other components - exported_components = [] - for component in pypsa.components.all_components - {"SubNetwork"}: - list_name = network.components[component]["list_name"] - attrs = network.components[component]["attrs"] - df = network.df(component) - pnl = network.pnl(component) - if not export_standard_types and component in pypsa.components.standard_types: - df = df.drop(network.components[component]["standard_types"].index) - #first do static attributes - df.index.name = "name" - if df.empty: - continue - col_export = [] - for col in df.columns: - #do not export derived attributes - if col in ["sub_network", "r_pu", "x_pu", "g_pu", "b_pu"]: - continue - if col in attrs.index and pd.isnull(attrs.at[col, "default"]) and pd.isnull(df[col]).all(): - continue - if (col in attrs.index - and df[col].dtype == attrs.at[col, 'dtype'] - and (df[col] == attrs.at[col, "default"]).all()): - continue - - col_export.append(col) - - store.put('/' + list_name, df[col_export], format='table', index=False) - - #now do varying attributes - for attr in pnl: - if attr not in attrs.index: - col_export = pnl[attr].columns - else: - default = attrs.at[attr, "default"] - if pd.isnull(default): - col_export = pnl[attr].columns[(~pd.isnull(pnl[attr])).any()] - else: - col_export = pnl[attr].columns[(pnl[attr] != default).any()] - df = pnl[attr][col_export] - if not df.empty: - store.put('/' + list_name + '_t/' + attr, df, format='table', index=False) - exported_components.append(list_name) - logger.info("Exported network {} has {}".format(os.path.basename(path), ", ".join(exported_components))) - -def import_from_hdf5(network, path, skip_time=False): +def _import_from_importer(network, importer, basename, skip_time=False): """ - Import network data from HDF5 store at `path`. + Import network data from importer. Parameters ---------- - path : string - Name of HDF5 store + skip_time : bool + Skip importing time """ - with pd.HDFStore(path, mode='r') as store: - df = store['/network'] - logger.debug("/network") - logger.debug(df) - network.name = df.index[0] + attrs = importer.get_attributes() + if attrs is not None: + network.name = attrs.pop('name') ##https://docs.python.org/3/tutorial/datastructures.html#comparing-sequences-and-other-types current_pypsa_version = [int(s) for s in network.pypsa_version.split(".")] try: - pypsa_version = [int(s) for s in df.at[network.name, 'pypsa_version'].split(".")] - df = df.drop('pypsa_version', axis=1) + pypsa_version = [int(s) for s in attrs.pop("pypsa_version").split(".")] except KeyError: pypsa_version = None @@ -305,45 +402,41 @@ def import_from_hdf5(network, path, skip_time=False): carefully to prepare your network for import. """).format(network.pypsa_version)) - for col in df.columns: - setattr(network, col, df[col][network.name]) + for attr, val in iteritems(attrs): + setattr(network, attr, val) - #if there is snapshots.csv, read in snapshot data - - if '/snapshots' in store: - df = store['/snapshots'] - - network.set_snapshots(df.index) - if "weightings" in df.columns: - network.snapshot_weightings = df["weightings"].reindex(network.snapshots) + # if there is snapshots.csv, read in snapshot data + df = importer.get_snapshots() + if df is not None: + network.set_snapshots(df.index) + if "weightings" in df.columns: + network.snapshot_weightings = df["weightings"].reindex(network.snapshots) - imported_components = [] + imported_components = [] - #now read in other components; make sure buses and carriers come first - for component in ["Bus", "Carrier"] + sorted(pypsa.components.all_components - {"Bus", "Carrier", "SubNetwork"}): - list_name = network.components[component]["list_name"] + # now read in other components; make sure buses and carriers come first + for component in ["Bus", "Carrier"] + sorted(pypsa.components.all_components - {"Bus", "Carrier", "SubNetwork"}): + list_name = network.components[component]["list_name"] - if '/' + list_name not in store: - if component == "Bus": - logger.error("Error, no buses found") - return - else: - continue + df = importer.get_static(list_name) + if df is None: + if component == "Bus": + logger.error("Error, no buses found") + return + else: + continue - df = store['/' + list_name] - import_components_from_dataframe(network, df, component) + import_components_from_dataframe(network, df, component) - if not skip_time: - for attr in store: - if attr.startswith('/' + list_name + '_t/'): - attr_name = attr[len('/' + list_name + '_t/'):] - import_series_from_dataframe(network, store[attr], component, attr_name) + if not skip_time: + for attr, df in importer.get_series(list_name): + import_series_from_dataframe(network, df, component, attr) - logger.debug(getattr(network,list_name)) + logger.debug(getattr(network,list_name)) - imported_components.append(list_name) + imported_components.append(list_name) - logger.info("Imported network {} has {}".format(os.path.basename(path), ", ".join(imported_components))) + logger.info("Imported network{} has {}".format(" " + basename, ", ".join(imported_components))) def import_components_from_dataframe(network, dataframe, cls_name): """ @@ -467,95 +560,6 @@ def import_series_from_dataframe(network, dataframe, cls_name, attr): -def import_from_csv_folder(network, csv_folder_name, encoding=None, skip_time=False): - """ - Import network data from CSVs in a folder. - - The CSVs must follow the standard form, see pypsa/examples. - - Parameters - ---------- - csv_folder_name : string - Name of folder - encoding : str, default None - Encoding to use for UTF when reading (ex. 'utf-8'). `List of Python - standard encodings - `_ - skip_time : bool, default False - Skip reading in time dependent attributes - """ - - if not os.path.isdir(csv_folder_name): - logger.error("Directory {} does not exist.".format(csv_folder_name)) - return - - #if there is network.csv, read in network data - - file_name = os.path.join(csv_folder_name,"network.csv") - - if os.path.isfile(file_name): - df = pd.read_csv(file_name,index_col=0,encoding=encoding) - logger.debug("networks.csv:") - logger.debug(df) - network.name = df.index[0] - - ##https://docs.python.org/3/tutorial/datastructures.html#comparing-sequences-and-other-types - current_pypsa_version = [int(s) for s in network.pypsa_version.split(".")] - pypsa_version = None - for col in df.columns: - if col == "pypsa_version": - pypsa_version = [int(s) for s in df.at[network.name,"pypsa_version"].split(".")] - else: - setattr(network,col,df[col][network.name]) - - if pypsa_version is None or pypsa_version < current_pypsa_version: - logger.warning("Importing PyPSA from older version of PyPSA than current version {}.\n\ - Please read the release notes at https://pypsa.org/doc/release_notes.html\n\ - carefully to prepare your network for import.".format(network.pypsa_version)) - - #if there is snapshots.csv, read in snapshot data - - file_name = os.path.join(csv_folder_name,"snapshots.csv") - - if os.path.isfile(file_name): - df = pd.read_csv(file_name, index_col=0, encoding=encoding, parse_dates=True) - network.set_snapshots(df.index) - if "weightings" in df.columns: - network.snapshot_weightings = df["weightings"].reindex(network.snapshots) - - imported_components = [] - - #now read in other components; make sure buses and carriers come first - for component in ["Bus", "Carrier"] + sorted(pypsa.components.all_components - {"Bus","Carrier","SubNetwork"}): - - list_name = network.components[component]["list_name"] - - file_name = os.path.join(csv_folder_name,list_name+".csv") - - if not os.path.isfile(file_name): - if component == "Bus": - logger.error("Error, no buses found") - return - else: - continue - - df = pd.read_csv(file_name,index_col=0,encoding=encoding) - - import_components_from_dataframe(network,df,component) - - if not skip_time: - file_attrs = [n for n in os.listdir(csv_folder_name) if n.startswith(list_name+"-") and n.endswith(".csv")] - - for file_name in file_attrs: - df = pd.read_csv(os.path.join(csv_folder_name,file_name), index_col=0, encoding=encoding, parse_dates=True) - import_series_from_dataframe(network,df,component,file_name[len(list_name)+1:-4]) - - logger.debug(getattr(network,list_name)) - - imported_components.append(list_name) - - logger.info("Imported network {} has {}".format(os.path.basename(csv_folder_name), ", ".join(imported_components))) - def import_from_pypower_ppc(network, ppc, overwrite_zero_s_nom=None): """ Import network from PYPOWER PPC dictionary format version 2.