Source code for nygrid.preprocessing

"""
Preprocessing functions for NY grid data.

Created: 2023-12-26, by Bo Yuan (Cornell University)
Last modified: 2023-12-26, by Bo Yuan (Cornell University)
"""

import os
import numpy as np
import pandas as pd
from typing import List, Tuple, Optional


ZONE_NAME2ID = {
    'WEST': 'A', 
    'GENESEE': 'B', 
    'CENTRAL': 'C', 
    'NORTH': 'D',
    'MOHAWK VALLEY': 'E', 
    'CAPITAL': 'F', 
    'HUDSON VALLEY': 'G',
    'MILLWOOD': 'H', 
    'DUNWOODIE': 'I', 
    'NYC': 'J', 
    'L ISLAND': 'K'
}


[docs]def agg_demand_county2bus(demand_inc_county: pd.DataFrame, county2bus: pd.DataFrame ) -> pd.DataFrame: """ County-level consumption to bus-level consumption. Parameters ---------- demand_inc_county : pd.DataFrame County-level consumption county2bus : pd.DataFrame County to bus mapping Returns ------- demand_inc_bus : pd.DataFrame Bus-level consumption """ demand_inc_county_erie = demand_inc_county['Erie'] demand_inc_county_westchester = demand_inc_county['Westchester'] demand_inc_county_rest = demand_inc_county.drop( columns=['Erie', 'Westchester']) county2bus_erie = county2bus[county2bus['NAME'] == 'Erie'] county2bus_westchester = county2bus[county2bus['NAME'] == 'Westchester'] county2bus_rest = county2bus[(county2bus['NAME'] != 'Erie') & (county2bus['NAME'] != 'Westchester')] demand_inc_bus_erie = demand_inc_county_erie.to_frame() demand_inc_bus_erie['55'] = demand_inc_bus_erie['Erie'] * 0.5 demand_inc_bus_erie['57'] = demand_inc_bus_erie['Erie'] * 0.125 demand_inc_bus_erie['59'] = demand_inc_bus_erie['Erie'] * 0.375 demand_inc_bus_erie = demand_inc_bus_erie.drop(columns=['Erie']) demand_inc_bus_erie.columns = demand_inc_bus_erie.columns.astype(int) demand_inc_bus_westchester = demand_inc_county_westchester.to_frame() demand_inc_bus_westchester['74'] = demand_inc_bus_westchester['Westchester'] * 0.5 demand_inc_bus_westchester['78'] = demand_inc_bus_westchester['Westchester'] * 0.5 demand_inc_bus_westchester = demand_inc_bus_westchester.drop(columns=[ 'Westchester']) demand_inc_bus_westchester.columns = demand_inc_bus_westchester.columns.astype(int) county_bus_alloc_rest = county2bus_rest.set_index('NAME').to_dict()['busIdx'] demand_inc_bus_rest = demand_inc_county_rest.T.groupby( county_bus_alloc_rest).sum().T demand_inc_bus = demand_inc_bus_rest.add(demand_inc_bus_erie, fill_value=0) demand_inc_bus = demand_inc_bus.add(demand_inc_bus_westchester, fill_value=0) demand_inc_bus.columns = demand_inc_bus.columns.astype(int) return demand_inc_bus
[docs]def get_building_load_change_county(county_id: str, upgrade_id: int, bldg_type_list: List[str], bldg_proc_dir: str ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Read building timeseries data aggregated by county and building type. Parameters ---------- county_id : str County ID upgrade_id : int Upgrade ID bldg_type_list : list List of building types bldg_proc_dir : str Directory for processed building data Returns ------- df_county_base: pd.DataFrame Dataframe with baseline energy consumption df_county_future: pd.DataFrame Dataframe with future energy consumption df_county_saving: pd.DataFrame Dataframe with energy savings """ # Read building timeseries data aggregated by county and building type first_df = True for bldg_type in bldg_type_list: filename = os.path.join(bldg_proc_dir, f"{county_id.lower()}_{bldg_type.replace(' ', '_').lower()}.parquet") if os.path.isfile(filename): # Future df_county_bldg_type_future = pd.read_parquet( filename, engine='pyarrow') col_total_cons = [col for col in df_county_bldg_type_future.columns if col.endswith( '.energy_consumption') and 'total' in col] df_county_bldg_type_future = df_county_bldg_type_future[col_total_cons] # Baseline df_county_bldg_type_base = pd.read_parquet(filename.replace( f'upgrade={upgrade_id}', 'upgrade=0'), engine='pyarrow') df_county_bldg_type_base = df_county_bldg_type_base[col_total_cons] # Savings = Baseline - Future df_county_bldg_type_saving = df_county_bldg_type_base - df_county_bldg_type_future # Add to county saving dataframe if first_df: df_county_base = df_county_bldg_type_base df_county_future = df_county_bldg_type_future df_county_saving = df_county_bldg_type_saving first_df = False else: df_county_base = df_county_base + df_county_bldg_type_base df_county_future = df_county_future + df_county_bldg_type_future df_county_saving = df_county_saving + df_county_bldg_type_saving else: print( f'Building load data is not available for county {county_id} {bldg_type}. Skipping...') continue # Rename columns col_rename = {col: col.split('.')[1] for col in df_county_base.columns} df_county_base = df_county_base.rename(columns=col_rename) df_county_future = df_county_future.rename(columns=col_rename) df_county_saving = df_county_saving.rename(columns=col_rename) return df_county_base, df_county_future, df_county_saving
[docs]def add_load_weighted(hourly_load_zonal: pd.DataFrame, bus_info: pd.DataFrame, ) -> pd.DataFrame: """ Distribute zonal load to individual buses based on load distribution ratio. Parameters ---------- hourly_load_zonal : pd.DataFrame Zonal load timeseries bus_info : pd.DataFrame Bus information Returns ------- bus_wload : pd.DataFrame Bus-level load timeseries """ # Subset of bus in NY control area nys_bus = bus_info[~bus_info['zone'].isnull()] # Subset of bus with load nys_bus_wload = nys_bus[nys_bus['sumLoadP0'] > 0] # Load bus and ratio calculation zone_ids = nys_bus['zone'].unique() load_bus_zone = np.empty(11, dtype=object) load_ratio_zone = np.empty(11, dtype=object) num_load_bus_zone = np.zeros(11, dtype=int) # Calculate zonal load distribution ratio for i, zone_id in enumerate(zone_ids): load_bus_table = nys_bus_wload[nys_bus_wload['zone'] == zone_id] load_bus_zone[i] = load_bus_table['idx'].values if load_bus_table.shape[0] > 0: load_ratio_zone[i] = (load_bus_table['sumLoadP0'] / load_bus_table['sumLoadP0'].sum()).values else: load_bus_zone[i] = nys_bus[nys_bus['zone'] == zone_id]['idx'].values load_ratio_zone[i] = np.ones( len(load_bus_zone[i])) / len(load_bus_zone[i]) num_load_bus_zone[i] = len(load_bus_zone[i]) num_load_bus_tot = sum(num_load_bus_zone) # Distribute zonal load to individual buses num_hours = hourly_load_zonal.shape[0] zone_load_bus = np.empty(len(zone_ids), dtype=object) load_bus_idx = np.zeros(num_load_bus_tot) load_bus_load = np.zeros((num_load_bus_tot, num_hours)) n = 0 for i, zone_id in enumerate(zone_ids): zone_load_tot = hourly_load_zonal[zone_id].values zone_load_bus[i] = np.outer(load_ratio_zone[i], zone_load_tot) load_bus_idx[n:n+num_load_bus_zone[i]] = load_bus_zone[i] load_bus_load[n:n+num_load_bus_zone[i], :] = zone_load_bus[i] n += num_load_bus_zone[i] bus_wload = pd.DataFrame(load_bus_load.T, columns=load_bus_idx, index=hourly_load_zonal.index) bus_wload = bus_wload.sort_index(axis=1) bus_wload.columns = bus_wload.columns.astype(int) return bus_wload