Skip to content

config

Batch

Bases: BatchSpec, TaskArrayMixin

Class to represent the entirety of the ensembler configuration.

Parameters:

Name Type Description Default
pre_batch object

TaskArray configuration setup.

None
pre_run object

TaskArray configuration setup.

None
post_run object

TaskArray configuration setup.

None
post_batch object

TaskArray configuration setup.

None
**kwargs tuple

Keyword arguments for BatchSpec instance.

{}
Source code in model_ensembler/config.py
class Batch(BatchSpec, TaskArrayMixin):
    """Class to represent the entirety of the ensembler configuration.

    Args:
        pre_batch (object): TaskArray configuration setup.
        pre_run (object): TaskArray configuration setup.
        post_run (object): TaskArray configuration setup.
        post_batch (object): TaskArray configuration setup.
        **kwargs (tuple): Keyword arguments for BatchSpec instance.
    """
    def __init__(self,
                 *args,
                 pre_batch=None, pre_run=None, post_run=None, post_batch=None,
                 **kwargs):
        super().__init__()
        self._pre_batch = pre_batch
        self._pre_run = pre_run
        self._post_run = post_run
        self._post_batch = post_batch

    @property
    def pre_batch(self):
        """ Property decorator managing pre batch tasks.

        Returns:
            (list): Pre batch tasks.
        """
        return self.task_array("_pre_batch")

    @property
    def pre_run(self):
        """ Property decorator managing pre run tasks.

        Returns:
            (list): Pre run tasks.
        """
        return self.task_array("_pre_run")

    @property
    def post_run(self):
        """ Property decorator managing post run tasks.

        Returns:
            (list): Post run tasks.
        """
        return self.task_array("_post_run")

    @property
    def post_batch(self):
        """ Property decorator managing post batch tasks.

        Returns:
            (list): Post batch tasks.
        """
        return self.task_array("_post_batch")

post_batch property

Property decorator managing post batch tasks.

Returns:

Type Description
list

Post batch tasks.

post_run property

Property decorator managing post run tasks.

Returns:

Type Description
list

Post run tasks.

pre_batch property

Property decorator managing pre batch tasks.

Returns:

Type Description
list

Pre batch tasks.

pre_run property

Property decorator managing pre run tasks.

Returns:

Type Description
list

Pre run tasks.

EnsembleConfig

Bases: YAMLConfig, TaskArrayMixin

Class to represent the entirety of the ensembler configuration.

Parameters:

Name Type Description Default
*args tuple

See YAMLConfig.

()
**kwargs tuple

arbitrary keyword arguments.

{}
Source code in model_ensembler/config.py
class EnsembleConfig(YAMLConfig, TaskArrayMixin):
    """Class to represent the entirety of the ensembler configuration.

    Args:
        *args (tuple): See ``YAMLConfig``.
        **kwargs (tuple): arbitrary keyword arguments.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._vars = self._data['ensemble']['vars']
        self._pre_process = self._data['ensemble']['pre_process']
        self._post_process = self._data['ensemble']['post_process']
        self._batches = self._data['ensemble']['batches']

    @property
    def pre_process(self):
        """ Property decorator managing preprocessing attributes.

        Returns:
            (list): Preprocessing Tasks.
        """
        return self.task_array("_pre_process")

    @property
    def post_process(self):
        """ Property decorator managing postprocessing attributes.

        Returns:
            (list): Postprocessing Tasks.
        """
        return self.task_array("_post_process")

    @property
    def batches(self):
        """ Property decorator managing batches contained in ensemble config.

        Returns:
            (list): Batches contained in the ensemble configuration.
        """
        batches = list()
        for batch in self._batches:
            batches.append(Batch(**batch))
        return batches

    @property
    def vars(self):
        """ Property decorator managing vars from the ensemble configuration.

        Returns:
            (dict): vars from ensemble configuration.
        """
        return self._vars

batches property

Property decorator managing batches contained in ensemble config.

Returns:

Type Description
list

Batches contained in the ensemble configuration.

post_process property

Property decorator managing postprocessing attributes.

Returns:

Type Description
list

Postprocessing Tasks.

pre_process property

Property decorator managing preprocessing attributes.

Returns:

Type Description
list

Preprocessing Tasks.

vars property

Property decorator managing vars from the ensemble configuration.

Returns:

Type Description
dict

vars from ensemble configuration.

Task

Bases: TaskSpec

Task definition class derived from the TaskSpec namedtuple

Source code in model_ensembler/config.py
class Task(TaskSpec):
    """Task definition class derived from the TaskSpec namedtuple"""
    pass

TaskArrayMixin

Generates sets of Task objects from Batch object members

Source code in model_ensembler/config.py
class TaskArrayMixin:
    """Generates sets of Task objects from Batch object members"""

    def task_array(self, attr):
        """Yields Tasks from a configuration instance.

        Args:
            attr (str): Name of the member property in the configuration that
                        defines the list of Task objects.

        Yields:
            (object): Task object
        """
        field = getattr(self, attr)
        if field:
            for raw_task in field:
                yield Task(**raw_task)
        return None

task_array(attr)

Yields Tasks from a configuration instance.

Parameters:

Name Type Description Default
attr str

Name of the member property in the configuration that defines the list of Task objects.

required

Yields:

Type Description
object

Task object

Source code in model_ensembler/config.py
def task_array(self, attr):
    """Yields Tasks from a configuration instance.

    Args:
        attr (str): Name of the member property in the configuration that
                    defines the list of Task objects.

    Yields:
        (object): Task object
    """
    field = getattr(self, attr)
    if field:
        for raw_task in field:
            yield Task(**raw_task)
    return None

YAMLConfig

Configuration processor for model-ensemble YAML-based configurations.

Parameters:

Name Type Description Default
configuration str

Name of the YAML configuration to load.

required
Source code in model_ensembler/config.py
class YAMLConfig():
    """Configuration processor for model-ensemble YAML-based configurations.

    Args:
        configuration (str): Name of the YAML configuration to load.
    """

    def __init__(self, configuration):
        self._schema = os.path.join(path, "model-ensemble.json")
        self._configuration_file = configuration

        self._schema_data, self._data = \
            self.__class__.validate(self._schema, self._configuration_file)

    @staticmethod
    def validate(json_schema, yaml_file):
        """Validate a YAML configuration against a JSON schema.

        Args:
            json_schema (str): Name of schema to validate against.
            yaml_file (str): Name of the configuration to validate.

        Returns:
            (tuple): contains JSON schema, YAML data.

        Raises:
            RuntimeError: If "name" and "basedir" are specified in
                        "batch_config:" instead of "batches:".
        """
        logging.debug("Assessing {} against {}".format(
            json_schema, yaml_file
        ))

        with open(yaml_file, "r") as fh:
            yaml_data = load(fh, Loader=Loader)

        # FIXME: this is a cheat for extreme batch numbers by allowing common
        #  parameters
        if "batch_config" in yaml_data["ensemble"]:
            batch_config = yaml_data["ensemble"]["batch_config"]

            for batch in yaml_data["ensemble"]["batches"]:
                for k, v in batch_config.items():
                    if k in ["name", "basedir"]:
                        raise RuntimeError("'name' and 'basedir' should be defined "
                                           "for each batch, rather than all batches."
                                           "Please move these from 'batch_config:'"
                                           "to 'batch:'.")
                    if k not in batch:
                        batch[k] = v

        with open(json_schema, "r") as fh:
            json_data = json.load(fh)

        try:
            jsonschema.validate(instance=yaml_data, schema=json_data)
        except jsonschema.ValidationError as e:
            logging.error("There's an error with configuration file: {}".
                          format(yaml_file))
            raise e
        logging.info("Validated configuration file {} successfully".
                     format(yaml_file))
        return json_data, yaml_data

validate(json_schema, yaml_file) staticmethod

Validate a YAML configuration against a JSON schema.

Parameters:

Name Type Description Default
json_schema str

Name of schema to validate against.

required
yaml_file str

Name of the configuration to validate.

required

Returns:

Type Description
tuple

contains JSON schema, YAML data.

Raises:

Type Description
RuntimeError

If "name" and "basedir" are specified in "batch_config:" instead of "batches:".

Source code in model_ensembler/config.py
@staticmethod
def validate(json_schema, yaml_file):
    """Validate a YAML configuration against a JSON schema.

    Args:
        json_schema (str): Name of schema to validate against.
        yaml_file (str): Name of the configuration to validate.

    Returns:
        (tuple): contains JSON schema, YAML data.

    Raises:
        RuntimeError: If "name" and "basedir" are specified in
                    "batch_config:" instead of "batches:".
    """
    logging.debug("Assessing {} against {}".format(
        json_schema, yaml_file
    ))

    with open(yaml_file, "r") as fh:
        yaml_data = load(fh, Loader=Loader)

    # FIXME: this is a cheat for extreme batch numbers by allowing common
    #  parameters
    if "batch_config" in yaml_data["ensemble"]:
        batch_config = yaml_data["ensemble"]["batch_config"]

        for batch in yaml_data["ensemble"]["batches"]:
            for k, v in batch_config.items():
                if k in ["name", "basedir"]:
                    raise RuntimeError("'name' and 'basedir' should be defined "
                                       "for each batch, rather than all batches."
                                       "Please move these from 'batch_config:'"
                                       "to 'batch:'.")
                if k not in batch:
                    batch[k] = v

    with open(json_schema, "r") as fh:
        json_data = json.load(fh)

    try:
        jsonschema.validate(instance=yaml_data, schema=json_data)
    except jsonschema.ValidationError as e:
        logging.error("There's an error with configuration file: {}".
                      format(yaml_file))
        raise e
    logging.info("Validated configuration file {} successfully".
                 format(yaml_file))
    return json_data, yaml_data