Skip to content
This repository has been archived by the owner on Jun 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #16 from amsnyder/separate_data_exp_out
Browse files Browse the repository at this point in the history
separate gap analysis csv and plot outputs
  • Loading branch information
amsnyder authored Jan 18, 2022
2 parents 325c450 + fc91d29 commit a7a3fa5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
2 changes: 1 addition & 1 deletion 02_munge/src/munge_usgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def process_data_to_csv(raw_datafile, params_to_process, params_df, flags_to_dro
df.drop(col, axis=1, inplace=True)

# drop any columns with no data
df.dropna(axis=1, inplace=True)
df.dropna(axis=1, how='all', inplace=True)

# process parameter codes to names
df = param_code_to_name(df, params_df)
Expand Down
14 changes: 11 additions & 3 deletions data_exploration/src/gap_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ def compile_data(nwis_var_names, source):
return var_dfs

def gap_analysis_calc(source, var_dfs):
# make output directory if it doesn't exist
os.makedirs(os.path.join('data_exploration', 'out', 'gap_analysis_csvs'), exist_ok = True)
# define metric names that we will calculate
metrics = ['p_coverage', 'n_gaps', 'gap_median_days', 'gap_max_days']
gap_template_df = pd.DataFrame(columns=metrics)
metric_dfs = {}
for var, df in var_dfs.items():
print(f'calculating metrics for {var}')
df.dropna(axis=0, how='all', inplace=True)
if df.empty:
continue
Expand All @@ -49,12 +53,15 @@ def gap_analysis_calc(source, var_dfs):
var_site_gap_df.loc[year, 'n_gaps'] = len(gaps)
var_site_gap_df.loc[year, 'gap_median_days'] = gaps.median().days if pd.notna(gaps.median().days) else 0
var_site_gap_df.loc[year, 'gap_max_days'] = gaps.max().days if pd.notna(gaps.max().days) else 0
var_site_gap_df.to_csv(os.path.join('data_exploration', 'out', f'{source}_{var}_{site}_gap_analysis.csv'))
var_site_gap_df.to_csv(os.path.join('data_exploration', 'out', 'gap_analysis_csvs', f'{source}_{var}_{site}_gap_analysis.csv'))
metric_dfs[var][site]= var_site_gap_df
return metric_dfs, metrics

def plot_gap_analysis(source, metric_dfs, metrics, site_colors):
# make output directory if it doesn't exist
os.makedirs(os.path.join('data_exploration', 'out', 'gap_analysis_plots'), exist_ok = True)
for var, data_by_site in metric_dfs.items():
print(f'plotting metrics for {var}')
plot_df = pd.DataFrame()
fig, axs = plt.subplots(4, sharex=True, figsize=(8,8))
i=0
Expand All @@ -68,7 +75,7 @@ def plot_gap_analysis(source, metric_dfs, metrics, site_colors):
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, bbox_to_anchor=(1.15,0.9), loc='upper right')
fig.suptitle(var)
save_path = os.path.join('data_exploration', 'out', f'{source}_{var}_gap_analysis_plot.png')
save_path = os.path.join('data_exploration', 'out', 'gap_analysis_plots', f'{source}_{var}_gap_analysis_plot.png')
fig.savefig(save_path, bbox_inches = 'tight')

def main():
Expand All @@ -85,7 +92,8 @@ def main():
config = yaml.safe_load(stream)['gap_analysis.py']
# read in data source we want to do gap analysis for
source = config['source']
os.makdirs('data_exploration/out/', exist_ok = True)
# make output directory if it doesn't exist
os.makedirs(os.path.join('data_exploration', 'out'), exist_ok = True)
# fetch site data and compile into nested dictionary of dataframes
var_dfs = compile_data(var_names, source)

Expand Down

0 comments on commit a7a3fa5

Please sign in to comment.