Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion benchmarks/benchmark_io_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,18 @@ def run_io_benchmarks(

print('\n2. Clustering and solving...')
fs_clustered = fs.transform.cluster(n_clusters=n_clusters, cluster_duration='1D')
fs_clustered.optimize(fx.solvers.GurobiSolver())

# Try Gurobi first, fall back to HiGHS if not available
try:
solver = fx.solvers.GurobiSolver()
fs_clustered.optimize(solver)
except Exception as e:
if 'gurobi' in str(e).lower() or 'license' in str(e).lower():
print(f' Gurobi not available ({e}), falling back to HiGHS...')
solver = fx.solvers.HighsSolver()
fs_clustered.optimize(solver)
else:
raise

print('\n3. Expanding...')
fs_expanded = fs_clustered.transform.expand()
Expand Down
29 changes: 21 additions & 8 deletions flixopt/clustering/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1113,12 +1113,17 @@ def _create_reference_structure(self, include_original_data: bool = True) -> tup
original_data_refs = None
if include_original_data and self.original_data is not None:
original_data_refs = []
for name, da in self.original_data.data_vars.items():
# Use variables for faster access (avoids _construct_dataarray overhead)
variables = self.original_data.variables
for name in self.original_data.data_vars:
var = variables[name]
ref_name = f'original_data|{name}'
# Rename time dim to avoid xarray alignment issues
if 'time' in da.dims:
da = da.rename({'time': 'original_time'})
arrays[ref_name] = da
if 'time' in var.dims:
new_dims = tuple('original_time' if d == 'time' else d for d in var.dims)
arrays[ref_name] = xr.Variable(new_dims, var.values, attrs=var.attrs)
else:
arrays[ref_name] = var
original_data_refs.append(f':::{ref_name}')

# NOTE: aggregated_data is NOT serialized - it's identical to the FlowSystem's
Expand All @@ -1129,9 +1134,11 @@ def _create_reference_structure(self, include_original_data: bool = True) -> tup
metrics_refs = None
if self._metrics is not None:
metrics_refs = []
for name, da in self._metrics.data_vars.items():
# Use variables for faster access (avoids _construct_dataarray overhead)
metrics_vars = self._metrics.variables
for name in self._metrics.data_vars:
ref_name = f'metrics|{name}'
arrays[ref_name] = da
arrays[ref_name] = metrics_vars[name]
metrics_refs.append(f':::{ref_name}')

reference = {
Expand Down Expand Up @@ -1415,9 +1422,15 @@ def compare(

if kind == 'duration_curve':
sorted_vars = {}
# Use variables for faster access (avoids _construct_dataarray overhead)
variables = ds.variables
rep_values = ds.coords['representation'].values
rep_idx = {rep: i for i, rep in enumerate(rep_values)}
for var in ds.data_vars:
for rep in ds.coords['representation'].values:
values = np.sort(ds[var].sel(representation=rep).values.flatten())[::-1]
data = variables[var].values
for rep in rep_values:
# Direct numpy indexing instead of .sel()
values = np.sort(data[rep_idx[rep]].flatten())[::-1]
sorted_vars[(var, rep)] = values
# Get length from first sorted array
n = len(next(iter(sorted_vars.values())))
Expand Down
6 changes: 3 additions & 3 deletions flixopt/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,11 +1505,11 @@ def _add_linking_constraints(

# Apply self-discharge decay factor (1-loss)^hours to soc_before per Eq. 5
# relative_loss_per_hour is per-hour, so we need total hours per cluster
# Use sum over time to handle both regular and segmented systems
# Use sum over time to get total duration (handles both regular and segmented systems)
# Keep as DataArray to respect per-period/scenario values
rel_loss = _scalar_safe_reduce(self.element.relative_loss_per_hour, 'time', 'mean')
hours_per_cluster = _scalar_safe_reduce(self._model.timestep_duration, 'time', 'mean')
decay_n = (1 - rel_loss) ** hours_per_cluster
total_hours_per_cluster = _scalar_safe_reduce(self._model.timestep_duration, 'time', 'sum')
decay_n = (1 - rel_loss) ** total_hours_per_cluster

lhs = soc_after - soc_before * decay_n - delta_soc_ordered
self.add_constraints(lhs == 0, short_name='link')
Expand Down
17 changes: 12 additions & 5 deletions flixopt/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,17 +629,24 @@ def drop_constant_arrays(
Dataset with constant variables removed.
"""
drop_vars = []
# Use ds.variables for faster access (avoids _construct_dataarray overhead)
variables = ds.variables

for name, da in ds.data_vars.items():
for name in ds.data_vars:
var = variables[name]
# Skip variables without the dimension
if dim not in da.dims:
if dim not in var.dims:
if drop_arrays_without_dim:
drop_vars.append(name)
continue

# Check if variable is constant along the dimension (ptp < atol)
ptp = da.max(dim, skipna=True) - da.min(dim, skipna=True)
if (ptp < atol).all().item():
# Check if variable is constant along the dimension using numpy (ptp < atol)
axis = var.dims.index(dim)
data = var.values
# Use numpy operations directly for speed
with np.errstate(invalid='ignore'): # Ignore NaN warnings
ptp = np.nanmax(data, axis=axis) - np.nanmin(data, axis=axis)
if np.all(ptp < atol):
drop_vars.append(name)

if drop_vars:
Expand Down
6 changes: 6 additions & 0 deletions flixopt/flow_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ def __init__(
elif computed_timestep_duration is not None:
self.timestep_duration = self.fit_to_model_coords('timestep_duration', computed_timestep_duration)
else:
# RangeIndex (segmented systems) requires explicit timestep_duration
if isinstance(self.timesteps, pd.RangeIndex):
raise ValueError(
'timestep_duration is required when using RangeIndex timesteps (segmented systems). '
'Provide timestep_duration explicitly or use DatetimeIndex timesteps.'
)
self.timestep_duration = None

# Cluster weight for cluster() optimization (default 1.0)
Expand Down
81 changes: 53 additions & 28 deletions flixopt/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,14 +561,18 @@ def save_dataset_to_netcdf(
ds.attrs = {'attrs': json.dumps(ds.attrs)}

# Convert all DataArray attrs to JSON strings
for var_name, data_var in ds.data_vars.items():
if data_var.attrs: # Only if there are attrs
ds[var_name].attrs = {'attrs': json.dumps(data_var.attrs)}
# Use ds.variables to avoid slow _construct_dataarray calls
variables = ds.variables
for var_name in ds.data_vars:
var = variables[var_name]
if var.attrs: # Only if there are attrs
var.attrs = {'attrs': json.dumps(var.attrs)}

# Also handle coordinate attrs if they exist
for coord_name, coord_var in ds.coords.items():
if hasattr(coord_var, 'attrs') and coord_var.attrs:
ds[coord_name].attrs = {'attrs': json.dumps(coord_var.attrs)}
for coord_name in ds.coords:
var = variables[coord_name]
if var.attrs:
var.attrs = {'attrs': json.dumps(var.attrs)}

# Suppress numpy binary compatibility warnings from netCDF4 (numpy 1->2 transition)
with warnings.catch_warnings():
Expand Down Expand Up @@ -602,25 +606,38 @@ def _reduce_constant_arrays(ds: xr.Dataset) -> xr.Dataset:
Dataset with constant dimensions reduced.
"""
new_data_vars = {}
variables = ds.variables

for name in ds.data_vars:
var = variables[name]
dims = var.dims
data = var.values

for name, da in ds.data_vars.items():
if not da.dims or da.size == 0:
new_data_vars[name] = da
if not dims or data.size == 0:
new_data_vars[name] = var
continue

# Try to reduce each dimension
reduced = da
for dim in list(da.dims):
if dim not in reduced.dims:
# Try to reduce each dimension using numpy operations
reduced_data = data
reduced_dims = list(dims)

for _axis, dim in enumerate(dims):
if dim not in reduced_dims:
continue # Already removed
# Check if constant along this dimension
first_slice = reduced.isel({dim: 0})
is_constant = (reduced == first_slice).all()

current_axis = reduced_dims.index(dim)
# Check if constant along this axis using numpy
first_slice = np.take(reduced_data, 0, axis=current_axis)
# Broadcast first_slice to compare
expanded = np.expand_dims(first_slice, axis=current_axis)
is_constant = np.allclose(reduced_data, expanded, equal_nan=True)

if is_constant:
# Remove this dimension by taking first slice
reduced = first_slice
reduced_data = first_slice
reduced_dims.pop(current_axis)

new_data_vars[name] = reduced
new_data_vars[name] = xr.Variable(tuple(reduced_dims), reduced_data, attrs=var.attrs)

return xr.Dataset(new_data_vars, coords=ds.coords, attrs=ds.attrs)

Expand Down Expand Up @@ -754,14 +771,18 @@ def load_dataset_from_netcdf(path: str | pathlib.Path) -> xr.Dataset:
ds.attrs = json.loads(ds.attrs['attrs'])

# Restore DataArray attrs (before unstacking, as stacked vars have no individual attrs)
for var_name, data_var in ds.data_vars.items():
if 'attrs' in data_var.attrs:
ds[var_name].attrs = json.loads(data_var.attrs['attrs'])
# Use ds.variables to avoid slow _construct_dataarray calls
variables = ds.variables
for var_name in ds.data_vars:
var = variables[var_name]
if 'attrs' in var.attrs:
var.attrs = json.loads(var.attrs['attrs'])

# Restore coordinate attrs
for coord_name, coord_var in ds.coords.items():
if hasattr(coord_var, 'attrs') and 'attrs' in coord_var.attrs:
ds[coord_name].attrs = json.loads(coord_var.attrs['attrs'])
for coord_name in ds.coords:
var = variables[coord_name]
if 'attrs' in var.attrs:
var.attrs = json.loads(var.attrs['attrs'])

# Unstack variables if they were stacked during saving
# Detection: check if any dataset dimension starts with '__stacked__'
Expand Down Expand Up @@ -1577,7 +1598,10 @@ def _fast_get_dataarray(ds: xr.Dataset, name: str, coord_cache: dict[str, xr.Dat
Constructed DataArray
"""
variable = ds.variables[name]
coords = {k: coord_cache[k] for k in variable.dims if k in coord_cache}
var_dims = set(variable.dims)
# Include coordinates whose dims are a subset of the variable's dims
# This preserves both dimension coordinates and auxiliary coordinates
coords = {k: v for k, v in coord_cache.items() if set(v.dims).issubset(var_dims)}
return xr.DataArray(variable, coords=coords, name=name)

@staticmethod
Expand Down Expand Up @@ -1865,9 +1889,10 @@ def _add_clustering_to_dataset(
clustering_ref, clustering_arrays = clustering._create_reference_structure(
include_original_data=include_original_data
)
# Add clustering arrays with prefix
for name, arr in clustering_arrays.items():
ds[f'{cls.CLUSTERING_PREFIX}{name}'] = arr
# Add clustering arrays with prefix using batch assignment
# (individual ds[name] = arr assignments are slow)
prefixed_arrays = {f'{cls.CLUSTERING_PREFIX}{name}': arr for name, arr in clustering_arrays.items()}
ds = ds.assign(prefixed_arrays)
ds.attrs['clustering'] = json.dumps(clustering_ref)

return ds
Expand Down
12 changes: 11 additions & 1 deletion flixopt/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,17 @@ def from_dataset(cls, ds: xr.Dataset) -> Interface:
reference_structure.pop('__class__', None)

# Create arrays dictionary from dataset variables
arrays_dict = {name: array for name, array in ds.data_vars.items()}
# Use ds.variables with coord_cache for faster DataArray construction
variables = ds.variables
coord_cache = {k: ds.coords[k] for k in ds.coords}
arrays_dict = {
name: xr.DataArray(
variables[name],
coords={k: coord_cache[k] for k in variables[name].dims if k in coord_cache},
name=name,
)
for name in ds.data_vars
}

# Resolve all references using the centralized method
resolved_params = cls._resolve_reference_structure(reference_structure, arrays_dict)
Expand Down
Loading