import os
import glob
from typing import Any
import h5py
import numpy as np
from ._base import BaseListMovingWindowIterator
from ._download import download_equation
from ._dataitems import DataItem, GridDataItem, CloudDataItem
from .transforms import BaseTransform, DefaultTransform
from warnings import warn
[docs]
class DynabenchIterator(BaseListMovingWindowIterator):
"""
Iterator for the Dynabench dataset. This iterator will iterate over each simulation in the dataset,
by moving a window over the simulation data.
The window size is defined by the lookback and rollout parameters, which define the number of timesteps
to be used as input and output, respectively.
Parameters
----------
split : str
The split of the dataset to use. Can be "train", "val" or "test".
equation : str
The equation to use. Can be "advection", "burgers", "gasdynamics", "kuramotosivashinsky", "reactiondiffustion" or "wave".
structure : str
The structure of the dataset. Can be "cloud" or "grid".
resolution : str
The resolution of the dataset. Can be *low*, *medium*, *high* or *full*.
Low resolution corresponds to 225 points in total (aranged in a 15x15 grid for the grid structure).
Medium resolution corresponds to 484 points in total (aranged in a 22x22 grid for the grid structure).
High resolution corresponds to 900 points in total (aranged in a 30x30 grid for the grid structure).
Full resolution uses the full simulation grid of shape (64x64) that has been used to numerically solve the simulations.
base_path : str
Location where the data is stored. Defaults to "data".
lookback : int
Number of timesteps to use for the input data. Defaults to 1.
squeeze_lookback_dim: bool
Whether to squeeze the lookback dimension. Defaults to False. If lookback > 1 has no effect.
rollout : int
Number of timesteps to use for the target data. Defaults to 1.
download: int
Whether to download the data. Defaults to False.
"""
def __init__(
self,
split: str="train",
equation: str="wave",
structure: str="cloud",
resolution: str="low",
base_path: str="data",
lookback: int=1,
squeeze_lookback_dim: bool=False,
rollout: int=1,
transforms: BaseTransform=DefaultTransform(),
dtype: np.dtype=np.float32,
download: bool=False,
*args,
**kwargs,
) -> None:
# download
if download:
download_equation(equation, structure, resolution, base_path)
# parameters
self.split = split
self.equation = equation
self.structure = structure
self.resolution = resolution
self.base_path = base_path
self.download = download
# get the shapes of the simulations
self.file_list = glob.glob(os.path.join(base_path, equation, structure, resolution, f"*{split}*.h5"))
super().__init__(
data_paths = self.file_list,
lookback = lookback,
rollout = rollout,
squeeze_lookback_dim = squeeze_lookback_dim,
is_batched = True,
transforms = transforms,
dtype = dtype,
)
[docs]
class DynabenchSimulationIterator:
"""
Iterator for the Dynabench dataset. This iterator will iterate all the simulations in the dataset, returning the full simulation as a single sample.
Parameters
----------
split : str
The split of the dataset to use. Can be "train", "val" or "test".
equation : str
The equation to use. Can be "advection", "burgers", "gasdynamics", "kuramotosivashinsky", "reactiondiffustion" or "wave".
structure : str
The structure of the dataset. Can be "cloud" or "grid".
resolution : str
The resolution of the dataset. Can be *low*, *medium*, *high* or *full*.
Low resolution corresponds to 225 points in total (aranged in a 15x15 grid for the grid structure).
Medium resolution corresponds to 484 points in total (aranged in a 22x22 grid for the grid structure).
High resolution corresponds to 900 points in total (aranged in a 30x30 grid for the grid structure).
Full resolution uses the full simulation grid of shape (64x64) that has been used to numerically solve the simulations.
base_path : str
Location where the data is stored. Defaults to "data".
download: int
Whether to download the data. Defaults to False.
dtype: np.dtype
Data type of the input data. Defaults to np.float32.
"""
def __init__(
self,
split: str="train",
equation: str="wave",
structure: str="cloud",
resolution: str="low",
transforms: BaseTransform=DefaultTransform(),
base_path: str="data",
download: bool=False,
dtype: np.dtype=np.float32,
*args,
**kwargs,
) -> None:
# download
if download:
download_equation(equation, structure, resolution, base_path)
# parameters
self.split = split
self.equation = equation
self.structure = structure
self.resolution = resolution
self.base_path = base_path
self.dtype = dtype
self.download = download
# get the shapes of the simulations
self.file_list = glob.glob(os.path.join(base_path, equation, structure, resolution, f"*{split}*.h5"))
super().__init__(
data_paths = self.file_list,
is_batched = True,
transforms = transforms,
dtype = dtype,
)