Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve imports fuel costs #13

Merged
merged 17 commits into from
Aug 28, 2024
17 changes: 11 additions & 6 deletions src/r2x/parser/parser_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,25 @@ def fill_missing_timestamps(data_file: pl.DataFrame, date_time_column: list[str]
.alias("timestamp")
).with_columns(pl.col("timestamp").dt.cast_time_unit("ns"))

complete_timestamps_df = pl.from_pandas(pd.DataFrame({"timestamp": date_time_column}))
missing_timestamps_df = complete_timestamps_df.join(data_file, on="timestamp", how="anti")
data_file = data_file.with_columns(
pl.col("year").cast(pl.Int32),
pl.col("month").cast(pl.Int8),
pl.col("day").cast(pl.Int8),
pl.col("hour").cast(pl.Int8),
)

missing_timestamps_df = missing_timestamps_df.with_columns(
complete_timestamps = pl.from_pandas(pd.DataFrame({"timestamp": date_time_column}))
missing_timestamps = complete_timestamps.join(data_file, on="timestamp", how="anti")

missing_timestamps = missing_timestamps.with_columns(
pl.col("timestamp").dt.year().alias("year"),
pl.col("timestamp").dt.month().alias("month"),
pl.col("timestamp").dt.day().alias("day"),
pl.col("timestamp").dt.hour().alias("hour"),
pl.lit(None).alias("value"),
).select(["year", "month", "day", "hour", "value", "timestamp"])

complete_df = (
pl.concat([data_file, missing_timestamps_df]).sort("timestamp").fill_null(strategy="forward")
)
complete_df = pl.concat([data_file, missing_timestamps]).sort("timestamp").fill_null(strategy="forward")
complete_df.drop_in_place("timestamp")
return complete_df

Expand Down
111 changes: 74 additions & 37 deletions src/r2x/parser/plexos.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
PROPERTY_TS_COLUMNS_BASIC = ["year", "month", "day", "period", "value"]
PROPERTY_TS_COLUMNS_MULTIZONE = ["year", "month", "day", "period"]
PROPERTY_TS_COLUMNS_PIVOT = ["year", "month", "day"]
PROPERTY_TS_COLUMNS_YM = ["year", "month"]
PROPERTY_TS_COLUMNS_MDP = ["month", "day", "period"]
PROPERTY_TS_COLUMNS_MONTH_PIVOT = [
"name",
Expand Down Expand Up @@ -192,8 +193,6 @@ def build_system(self) -> System:
self._add_battery_reserves()

self._construct_load_profiles()
# self._construct_areas()
# self._construct_transformers()
return self.system

def _collect_horizon_data(self, model_name: str) -> dict[str, float]:
Expand Down Expand Up @@ -254,6 +253,37 @@ def _reconcile_timeseries(self, data_file):

return data_file

def _get_fuel_prices(self):
logger.debug("Creating fuel representation")
system_fuels = (pl.col("child_class_name") == ClassEnum.Fuel.value) & (
pl.col("parent_class_name") == ClassEnum.System.value
)
fuels = self._get_model_data(system_fuels)
fuel_prices = {}
for fuel_name, fuel_data in fuels.group_by("name"):
fuel_name = fuel_name[0]
property_records = fuel_data[
[
"band",
"property_name",
"property_value",
"property_unit",
"data_file",
"variable",
"action",
"variable_tag",
]
].to_dicts()

logger.debug("Parsing fuel = {}", fuel_name)
for property in property_records:
property.update({"property_unit": "$/MMBtu"})

mapped_records, multi_band_records = self._parse_property_data(property_records, fuel_name)
mapped_records["name"] = fuel_name
fuel_prices[fuel_name] = mapped_records["Price"]
return fuel_prices

def _construct_load_zones(self, default_model=LoadZone) -> None:
"""Create LoadZone representation.

Expand Down Expand Up @@ -393,21 +423,10 @@ def _construct_branches(self, default_model=MonitoredLine):
)
for line in lines_pivot.iter_rows(named=True):
line_properties_mapped = {self.property_map.get(key, key): value for key, value in line.items()}
valid_fields = {
k: v
for k, v in line_properties_mapped.items()
if k in default_model.model_fields
if v is not None
}
line_properties_mapped["rating_up"] = line_properties_mapped.pop("max_power_flow", None)
line_properties_mapped["rating_down"] = line_properties_mapped.pop("max_power_flow", None)

ext_data = {
k: v
for k, v in line_properties_mapped.items()
if k not in default_model.model_fields
if v is not None
}
if ext_data:
valid_fields["ext"] = ext_data
valid_fields, ext_data = self._field_filter(line_properties_mapped, default_model.model_fields)

from_bus_name = next(
membership
Expand Down Expand Up @@ -497,6 +516,7 @@ def _construct_generators(self):
"""
generator_fuel = self.db.query(fuel_query)
generator_fuel_map = {key: value for key, value in generator_fuel}
fuel_prices = self._get_fuel_prices()

# Iterate over properties for generator
for generator_name, generator_data in system_generators.group_by("name"):
Expand Down Expand Up @@ -560,6 +580,7 @@ def _construct_generators(self):
continue # Pass if not available

mapped_records = self._construct_value_curves(mapped_records, generator_name)
mapped_records["fuel_price"] = fuel_prices.get(generator_fuel_map.get(generator_name))

valid_fields, ext_data = self._field_filter(mapped_records, model_map.model_fields)

Expand Down Expand Up @@ -928,14 +949,14 @@ def _set_unit_availability(self, records):
rating_factor = records.get("Rating Factor", 100)
rating_factor = self._apply_action(np.divide, rating_factor, 100)
rating = records.get("rating", None)
max_capacity = records.get("Max Capacity", None) or records.get("Firm Capacity", None)
base_power = records.get("base_power", None)

if rating is not None:
units = rating.units
val = rating_factor * rating.magnitude
elif max_capacity is not None:
units = max_capacity.units
val = self._apply_action(np.multiply, rating_factor, max_capacity.magnitude)
elif base_power is not None:
units = base_power.units
val = self._apply_action(np.multiply, rating_factor, base_power.magnitude)
else:
return records
val = self._apply_unit(val, units)
Expand Down Expand Up @@ -975,17 +996,20 @@ def _get_model_data(self, data_filter) -> pl.DataFrame:

base_case_filter = pl.col("scenario").is_null()
if scenario_specific_data.is_empty():
return self.plexos_data.filter(data_filter & base_case_filter)

combined_key_base = pl.col("name") + "_" + pl.col("property_name")
combined_key_scenario = scenario_specific_data["name"] + "_" + scenario_specific_data["property_name"]
system_data = self.plexos_data.filter(data_filter & base_case_filter)
else:
# include both scenario specific and basecase data
combined_key_base = pl.col("name") + "_" + pl.col("property_name")
combined_key_scenario = (
scenario_specific_data["name"] + "_" + scenario_specific_data["property_name"]
)

base_case_filter = base_case_filter & (
~combined_key_base.is_in(combined_key_scenario) | pl.col("property_name").is_null()
)
base_case_data = self.plexos_data.filter(data_filter & base_case_filter)
base_case_filter = base_case_filter & (
~combined_key_base.is_in(combined_key_scenario) | pl.col("property_name").is_null()
)
base_case_data = self.plexos_data.filter(data_filter & base_case_filter)

system_data = pl.concat([scenario_specific_data, base_case_data])
system_data = pl.concat([scenario_specific_data, base_case_data])

# get system variables
variable_filter = (
Expand Down Expand Up @@ -1127,12 +1151,13 @@ def _csv_file_handler(self, property_name, property_data):
time_series_data = self._retrieve_time_series_data(property_name, data_file)
if time_series_data is not None:
return time_series_data
logger.warning("Property {} not supported. Skipping it.", property_name)
logger.warning("Data file {} not supported yet. Skipping it.", relative_path)
logger.warning("Columns not supported: {}", data_file.columns)
logger.debug("Skipped file {}", relative_path)

def _retrieve_single_value_data(self, property_name, data_file):
if all(column in data_file.columns for column in [property_name.lower(), "year"]):
if (
all(column in data_file.columns for column in [property_name.lower(), "year"])
and "month" not in data_file.columns
):
data_file = data_file.filter(pl.col("year") == self.study_year)
return data_file[property_name.lower()][0]

Expand Down Expand Up @@ -1193,6 +1218,14 @@ def _retrieve_time_series_data(self, property_name, data_file):
data_file = data_file.rename({"period": "hour"})
data_file = data_file.select(output_columns)

case columns if all(
column in columns for column in [*PROPERTY_TS_COLUMNS_YM, property_name.lower()]
):
data_file = data_file.filter(pl.col("year") == self.study_year)
data_file = data_file.rename({property_name.lower(): "value"})
data_file = data_file.with_columns(day=pl.lit(1), hour=pl.lit(0))
data_file = data_file.select(output_columns)

case columns if all(column in columns for column in PROPERTY_TS_COLUMNS_BASIC):
data_file = data_file.rename({"period": "hour"})
data_file = data_file.filter(pl.col("year") == self.study_year)
Expand All @@ -1213,11 +1246,15 @@ def _retrieve_time_series_data(self, property_name, data_file):
data_file = data_file.melt(id_vars=PROPERTY_TS_COLUMNS_PIVOT, variable_name="hour")

case _:
if data_file.is_empty():
logger.warning("Weather year doesn't exist in {}. Skipping it.", property_name)
return
logger.warning("Data file columns not supported. Skipping it.")
logger.warning("Datafile Columns: {}", data_file.columns)
return

if data_file.is_empty():
logger.warning("Weather year doesn't exist in {}. Skipping it.", property_name)
return

assert not data_file.is_empty()
# assert not data_file.is_empty()
# Format to SingleTimeSeries
if data_file.columns == output_columns:
resolution = timedelta(hours=1)
Expand Down