Module hipe4ml.model_handler

Module containing the class used for wrapping the models from different ML libraries to build a new model with common methods


class ModelHandler (input_model=None,
Expand source code
class ModelHandler:
    Class used for wrapping the models from different ML libraries to
    build a new model with common methods. Currently LightGBM, XGBoost
    (through their sklearn interface) and sklearn models are supported.

    input_model: XGBoost, LightGBM or sklearn model

    training_columns: list
        Contains the name of the features used for the training.
        Example: ['dEdx', 'pT', 'ct']

    model_params: dict
        Model hyper-parameter values. For
        example (XGBoost): max_depth, learning_rate,
        n_estimators, gamma, min_child_weight, ...

    task_type: str
        Task type of the model: 'classification' or 'regression'

    def __init__(self, input_model=None, training_columns=None, model_params=None, task_type='classification'):
        self.model = input_model
        self.training_columns = training_columns
        self.model_params = model_params
        self._n_classes = None
        self.optuna_cross_val_score = None
        self._task_type = task_type
        if self._task_type not in ['classification', 'regression']:
            raise ValueError(
                "Task type must be either 'classification' or 'regression'")

        if self.model is not None:
            self.model_string = inspect.getmodule(

            if self.model_string not in ["xgboost", "lightgbm", "sklearn"]:
                raise ValueError(
                    "Model must be either 'xgboost', 'lightgbm' or 'sklearn'")

            if self.model_params is None:
                self.model_params = self.model.get_params()

    def set_model_params(self, model_params):
        Set the model (hyper-)parameters

        model_params: dict
            Model hyper-parameter values. For
            example (XGBoost): max_depth, learning_rate,
            n_estimators, gamma, min_child_weight, ...
        self.model_params = model_params

    def get_model_params(self):
        Get the model (hyper-)parameters

        out: dict
            Model hyper-parameter values. For
            example (XGBoost): max_depth, learning_rate,
            n_estimators, gamma, min_child_weight, ...
        return self.model.get_params()

    def set_training_columns(self, training_columns):
        Set the features used for the training process

        training_columns: list
            Contains the name of the features used for the training.
            Example: ['dEdx', 'pT', 'ct']
        self.training_columns = training_columns

    def get_training_columns(self):
        Get the features used for the training process

        out: list
            Names of the features used for the training.
            Example: ['dEdx', 'pT', 'ct']

        return self.training_columns

    def get_original_model(self):
        Get the original unwrapped model

        out: XGBoost, LGBM or sklearn model
        return self.model

    def get_model_module(self):
        Get the string containing the name
        of the model module

        out: str
            Name of the model module
        return self.model_string

    def get_n_classes(self):
        Get the number of classes

        out: int
            Number of classes
        return self._n_classes

    def get_task_type(self):
        Get the task type of the model

        out: str
            Task type of the model: 'classification' or 'regression'
        return self._task_type

    def fit(self, x_train, y_train, **kwargs):
        Fit Model

        x_train: array-like, sparse matrix
            Training data

        y_train: array-like, sparse matrix
            Target data

            Extra kwargs passed on to method
        if self._task_type == 'classification':
            n_classes = len(np.unique(y_train))
            self._n_classes = n_classes
        if self.training_columns is None:
            self.training_columns = list(x_train.columns)[self.training_columns], y_train, **kwargs)

    def predict(self, x_test, output_margin=True, **kwargs):
        Return model prediction for the array x_test
        x_test: hipe4ml tree_handler, array-like, sparse matrix
            The input sample.

        output_margin: bool
            Whether to output the raw untransformed margin value. If False model
            probabilities are returned. Not used when task_type is 'regression'.

            Extra kwargs passed on to the following model prediction function:
            if (task_type == 'classification')
            - predict() (XGBoost and LGBM) or decision_function() (sklearn) if output_margin==True
            - predict_proba() if output_margin==False
            if (task_type == 'regression')
            - predict()

        out: numpy array
            Model predictions
        if isinstance(x_test, hipe4ml.tree_handler.TreeHandler):
            x_test = x_test.get_data_frame()

        x_test = x_test[self.training_columns]

        # regression
        if self._task_type == 'regression':
            return self.model.predict(x_test, **kwargs)

        # classification
        if output_margin:
            if self.model_string == 'xgboost':
                return self.model.predict(x_test, output_margin=True, **kwargs)
            if self.model_string == 'lightgbm':
                return self.model.predict(x_test, raw_score=True, **kwargs)
            if self.model_string == 'sklearn':
                if not hasattr(self.model, 'decision_function'):
                    raise ValueError(
                        "This Model does not support a decision_function(): use output_margin=False")
                return self.model.decision_function(x_test, **kwargs).ravel()

        pred = self.model.predict_proba(x_test, **kwargs)
        # in case of binary classification return only the scores of
        # the signal class
        if pred.shape[1] <= 2:
            pred = pred[:, 1]
        return pred

    def train_test_model(self, data, return_prediction=False, output_margin=False, average='macro',
                         multi_class_opt='raise', **kwargs):
        Perform the training and the testing of the model. The model performance is estimated
        using the ROC AUC metric for classification and the MSE for regression.

        data: list
            Contains respectively: training
            set dataframe, training label array,
            test set dataframe, test label array

        return_prediction: bool
            If True Model predictions on the test set are

        output_margin: bool
            Whether to output the raw untransformed margin value. If False model
            probabilities are returned. Not used when task_type is 'regression'.

        average: string
            Option for the average of ROC AUC scores used only in case of multi-classification.
            You can choose between 'macro' and 'weighted'. For more information see

        multi_class_opt: string
            Option to compute ROC AUC scores used only in case of multi-classification.
            The one-vs-one 'ovo' and one-vs-rest 'ovr' approaches are available

        **kwargs: dict
            Extra kwargs passed on to the model fit method

        out: numpy array or None
            If return_prediction==True, Model predictions on the test set are


        # get number of classes
        n_classes = len(np.unique(data[1]))
        self._n_classes = n_classes
        print(f"Training {self.model_string} model for {self._task_type}")
        if self._task_type == 'classification':
            print('Number of detected classes:', n_classes)

        # final training with the optimized hyperparams
        print('Training the model: ...')[0], data[1], **kwargs)
        print('Training the model: Done!')
        print('Testing the model: ...')
        y_pred = self.predict(data[2], output_margin=output_margin)
        if self._task_type == 'classification':
            roc_score = roc_auc_score(
                data[3], y_pred, average=average, multi_class=multi_class_opt)
            print(f'ROC_AUC_score: {roc_score:.6f}')
            mse_score = mean_squared_error(data[3], y_pred)
            print(f'Mean squared error: {mse_score:.6f}')
        print('Testing the model: Done!')
        if return_prediction:
            return y_pred
        return None

    def optimize_params_optuna(self, data, hyperparams_ranges, cross_val_scoring, nfold=5, direction='maximize',
                               optuna_sampler=None, resume_study=None, save_study=None, **kwargs):
        Perform hyperparameter optimization of ModelHandler using the Optuna module.
        The model hyperparameters are automatically set as the ones that provided the
        best result during the optimization.

        data: list
            Contains respectively: training
            set dataframe, training label array,
            test set dataframe, test label array

        hyperparams_ranges: dict
            Hyperparameter ranges (in tuples or list). If a parameter is not
            in a tuple or a list it will be considered constant.
            Important: the type of the params must be preserved
            when passing the ranges.
            For example:
                'learning_rate': (0.01,0.03)
                'n_jobs': 8

        cross_val_scoring: string, callable or None
            Score metrics used for the cross-validation.
            A string (see sklearn model evaluation documentation:
            or a scorer callable object / function with signature scorer(estimator, X, y)
            which should return only a single value.
            In binary classification 'roc_auc' is suggested.
            In multi-classification one between ‘roc_auc_ovr’, ‘roc_auc_ovo’,
            ‘roc_auc_ovr_weighted’ and ‘roc_auc_ovo_weighted’ is suggested.
            For more information see

        direction: str
            The direction of optimization. Either 'maximize' or 'minimize'.
            (e.g. for the metric 'roc_auc' the direction is 'maximize')

        optuna_sampler: optuna.samplers.BaseSampler
            Sampler to be used for the optuna (maxi-)minimisation.
            If None, default TPESampler is used. For more information see:

        nfold: int
            Number of folds to calculate the cross validation error

        resume_study: str
            A string indicating the filename of the study to be resumed.
            If None, the study is not resumed.

        save_study: str
            A string indicating the filename of the study. If None,
            the study is not saved into a file.

        **kwargs: dict
            Optuna study parameters


            The obtuna object which stores the whole study. See Optuna's documentation for more details:

        n_classes = len(np.unique(data[1]))
        self._n_classes = n_classes
        if self.training_columns is None:
            self.training_columns = list(data[0].columns)

        x_train, y_train, _, _ = data

        def __get_int_or_uniform(hyperparams_ranges, trial):

            params = {}

            for key in hyperparams_ranges:
                if isinstance(hyperparams_ranges[key][0], int):
                    params[key] = trial.suggest_int(
                        key, hyperparams_ranges[key][0], hyperparams_ranges[key][1])
                elif isinstance(hyperparams_ranges[key][0], float):
                    params[key] = trial.suggest_uniform(
                        key, hyperparams_ranges[key][0], hyperparams_ranges[key][1])

            return params

        def __objective(trial):

            params = __get_int_or_uniform(hyperparams_ranges, trial)
            model_copy = deepcopy(self.model)
            model_copy.set_params(**{**self.model_params, **params})
            return np.mean(cross_val_score(model_copy, x_train[self.training_columns], y_train,
                                           cv=nfold, scoring=cross_val_scoring, n_jobs=1))
        if resume_study:
            with open(resume_study, 'rb') as resume_study_file:
                study = pickle.load(resume_study_file)
            study = optuna.create_study(
                direction=direction, sampler=optuna_sampler)

        study.optimize(__objective, **kwargs)

        if save_study:
            with open(save_study, 'wb') as study_file:
                pickle.dump(study, study_file)

        print(f"Number of finished trials: {len(study.trials)}")
        print("Best trial:")
        best_trial = study.best_trial

        print(f"Value: {best_trial.value}")
        print("Params: ")
        for key, value in best_trial.params.items():
            print(f"    {key}: {value}")

        self.set_model_params({**self.model_params, **best_trial.params})
        self.optuna_cross_val_score = best_trial.value

        return study

    def dump_original_model(self, filename, xgb_format=False):
        Save the trained model into a pickle
        file. Only for xgboost models it is also given
        the possibility to save them into a .model file

        filename: str
            Name of the file in which the model is saved

        xgb_format : bool
            If True saves the xgboost model into a .model file
        if xgb_format is False:
            with open(filename, "wb") as output_file:
                pickle.dump(self.model, output_file)
            if self.model_string == 'xgboost':
                print("File not saved: only xgboost models support the .model extension")

    def dump_model_handler(self, filename):
        Save the model handler into a pickle file

        filename: str
            Name of the file in which the model is saved
        with open(filename, "wb") as output_file:
            pickle.dump(self, output_file)

    def load_model_handler(self, filename):
        Load a model handler saved into a pickle file

        filename: str
            Name of the file in which the model is saved
        with open(filename, "rb") as input_file:
            loaded_model = pickle.load(input_file)
            self.model = loaded_model.get_original_model()
            self.training_columns = loaded_model.get_training_columns()
            self.model_params = loaded_model.get_model_params()
            self.model_string = loaded_model.get_model_module()
            self._n_classes = loaded_model.get_n_classes()
            self._task_type = loaded_model.get_task_type()

Class used for wrapping the models from different ML libraries to build a new model with common methods. Currently LightGBM, XGBoost (through their sklearn interface) and sklearn models are supported.


input_model : XGBoost, LightGBM or sklearn model
training_columns : list
Contains the name of the features used for the training. Example: ['dEdx', 'pT', 'ct']
model_params : dict
Model hyper-parameter values. For example (XGBoost): max_depth, learning_rate, n_estimators, gamma, min_child_weight, …
task_type : str
Task type of the model: 'classification' or 'regression'


def dump_model_handler(self, filename)
Expand source code
def dump_model_handler(self, filename):
    Save the model handler into a pickle file

    filename: str
        Name of the file in which the model is saved
    with open(filename, "wb") as output_file:
        pickle.dump(self, output_file)

Save the model handler into a pickle file


filename : str
Name of the file in which the model is saved
def dump_original_model(self, filename, xgb_format=False)
Expand source code
def dump_original_model(self, filename, xgb_format=False):
    Save the trained model into a pickle
    file. Only for xgboost models it is also given
    the possibility to save them into a .model file

    filename: str
        Name of the file in which the model is saved

    xgb_format : bool
        If True saves the xgboost model into a .model file
    if xgb_format is False:
        with open(filename, "wb") as output_file:
            pickle.dump(self.model, output_file)
        if self.model_string == 'xgboost':
            print("File not saved: only xgboost models support the .model extension")

Save the trained model into a pickle file. Only for xgboost models it is also given the possibility to save them into a .model file


filename : str
Name of the file in which the model is saved
xgb_format : bool
If True saves the xgboost model into a .model file
def fit(self, x_train, y_train, **kwargs)
Expand source code
def fit(self, x_train, y_train, **kwargs):
    Fit Model

    x_train: array-like, sparse matrix
        Training data

    y_train: array-like, sparse matrix
        Target data

        Extra kwargs passed on to method
    if self._task_type == 'classification':
        n_classes = len(np.unique(y_train))
        self._n_classes = n_classes
    if self.training_columns is None:
        self.training_columns = list(x_train.columns)[self.training_columns], y_train, **kwargs)

Fit Model


x_train : array-like, sparse matrix
Training data
y_train : array-like, sparse matrix
Target data

**kwargs: Extra kwargs passed on to method

def get_model_module(self)
Expand source code
def get_model_module(self):
    Get the string containing the name
    of the model module

    out: str
        Name of the model module
    return self.model_string

Get the string containing the name of the model module


out : str
Name of the model module
def get_model_params(self)
Expand source code
def get_model_params(self):
    Get the model (hyper-)parameters

    out: dict
        Model hyper-parameter values. For
        example (XGBoost): max_depth, learning_rate,
        n_estimators, gamma, min_child_weight, ...
    return self.model.get_params()

Get the model (hyper-)parameters


out : dict
Model hyper-parameter values. For example (XGBoost): max_depth, learning_rate, n_estimators, gamma, min_child_weight, …
def get_n_classes(self)
Expand source code
def get_n_classes(self):
    Get the number of classes

    out: int
        Number of classes
    return self._n_classes

Get the number of classes


out : int
Number of classes
def get_original_model(self)
Expand source code
def get_original_model(self):
    Get the original unwrapped model

    out: XGBoost, LGBM or sklearn model
    return self.model

Get the original unwrapped model


out : XGBoost, LGBM or sklearn model
def get_task_type(self)
Expand source code
def get_task_type(self):
    Get the task type of the model

    out: str
        Task type of the model: 'classification' or 'regression'
    return self._task_type

Get the task type of the model


out : str
Task type of the model: 'classification' or 'regression'
def get_training_columns(self)
Expand source code
def get_training_columns(self):
    Get the features used for the training process

    out: list
        Names of the features used for the training.
        Example: ['dEdx', 'pT', 'ct']

    return self.training_columns

Get the features used for the training process


out : list
Names of the features used for the training. Example: ['dEdx', 'pT', 'ct']
def load_model_handler(self, filename)
Expand source code
def load_model_handler(self, filename):
    Load a model handler saved into a pickle file

    filename: str
        Name of the file in which the model is saved
    with open(filename, "rb") as input_file:
        loaded_model = pickle.load(input_file)
        self.model = loaded_model.get_original_model()
        self.training_columns = loaded_model.get_training_columns()
        self.model_params = loaded_model.get_model_params()
        self.model_string = loaded_model.get_model_module()
        self._n_classes = loaded_model.get_n_classes()
        self._task_type = loaded_model.get_task_type()

Load a model handler saved into a pickle file


filename : str
Name of the file in which the model is saved
def optimize_params_optuna(self,
Expand source code
def optimize_params_optuna(self, data, hyperparams_ranges, cross_val_scoring, nfold=5, direction='maximize',
                           optuna_sampler=None, resume_study=None, save_study=None, **kwargs):
    Perform hyperparameter optimization of ModelHandler using the Optuna module.
    The model hyperparameters are automatically set as the ones that provided the
    best result during the optimization.

    data: list
        Contains respectively: training
        set dataframe, training label array,
        test set dataframe, test label array

    hyperparams_ranges: dict
        Hyperparameter ranges (in tuples or list). If a parameter is not
        in a tuple or a list it will be considered constant.
        Important: the type of the params must be preserved
        when passing the ranges.
        For example:
            'learning_rate': (0.01,0.03)
            'n_jobs': 8

    cross_val_scoring: string, callable or None
        Score metrics used for the cross-validation.
        A string (see sklearn model evaluation documentation:
        or a scorer callable object / function with signature scorer(estimator, X, y)
        which should return only a single value.
        In binary classification 'roc_auc' is suggested.
        In multi-classification one between ‘roc_auc_ovr’, ‘roc_auc_ovo’,
        ‘roc_auc_ovr_weighted’ and ‘roc_auc_ovo_weighted’ is suggested.
        For more information see

    direction: str
        The direction of optimization. Either 'maximize' or 'minimize'.
        (e.g. for the metric 'roc_auc' the direction is 'maximize')

    optuna_sampler: optuna.samplers.BaseSampler
        Sampler to be used for the optuna (maxi-)minimisation.
        If None, default TPESampler is used. For more information see:

    nfold: int
        Number of folds to calculate the cross validation error

    resume_study: str
        A string indicating the filename of the study to be resumed.
        If None, the study is not resumed.

    save_study: str
        A string indicating the filename of the study. If None,
        the study is not saved into a file.

    **kwargs: dict
        Optuna study parameters


        The obtuna object which stores the whole study. See Optuna's documentation for more details:

    n_classes = len(np.unique(data[1]))
    self._n_classes = n_classes
    if self.training_columns is None:
        self.training_columns = list(data[0].columns)

    x_train, y_train, _, _ = data

    def __get_int_or_uniform(hyperparams_ranges, trial):

        params = {}

        for key in hyperparams_ranges:
            if isinstance(hyperparams_ranges[key][0], int):
                params[key] = trial.suggest_int(
                    key, hyperparams_ranges[key][0], hyperparams_ranges[key][1])
            elif isinstance(hyperparams_ranges[key][0], float):
                params[key] = trial.suggest_uniform(
                    key, hyperparams_ranges[key][0], hyperparams_ranges[key][1])

        return params

    def __objective(trial):

        params = __get_int_or_uniform(hyperparams_ranges, trial)
        model_copy = deepcopy(self.model)
        model_copy.set_params(**{**self.model_params, **params})
        return np.mean(cross_val_score(model_copy, x_train[self.training_columns], y_train,
                                       cv=nfold, scoring=cross_val_scoring, n_jobs=1))
    if resume_study:
        with open(resume_study, 'rb') as resume_study_file:
            study = pickle.load(resume_study_file)
        study = optuna.create_study(
            direction=direction, sampler=optuna_sampler)

    study.optimize(__objective, **kwargs)

    if save_study:
        with open(save_study, 'wb') as study_file:
            pickle.dump(study, study_file)

    print(f"Number of finished trials: {len(study.trials)}")
    print("Best trial:")
    best_trial = study.best_trial

    print(f"Value: {best_trial.value}")
    print("Params: ")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")

    self.set_model_params({**self.model_params, **best_trial.params})
    self.optuna_cross_val_score = best_trial.value

    return study

Perform hyperparameter optimization of ModelHandler using the Optuna module. The model hyperparameters are automatically set as the ones that provided the best result during the optimization.


data : list
Contains respectively: training set dataframe, training label array, test set dataframe, test label array
hyperparams_ranges : dict
Hyperparameter ranges (in tuples or list). If a parameter is not in a tuple or a list it will be considered constant. Important: the type of the params must be preserved when passing the ranges. For example: dict={ 'max_depth':(10,100) 'learning_rate': (0.01,0.03) 'n_jobs': 8 }
cross_val_scoring : string, callable or None
Score metrics used for the cross-validation. A string (see sklearn model evaluation documentation: or a scorer callable object / function with signature scorer(estimator, X, y) which should return only a single value. In binary classification 'roc_auc' is suggested. In multi-classification one between ‘roc_auc_ovr’, ‘roc_auc_ovo’, ‘roc_auc_ovr_weighted’ and ‘roc_auc_ovo_weighted’ is suggested. For more information see
direction : str
The direction of optimization. Either 'maximize' or 'minimize'. (e.g. for the metric 'roc_auc' the direction is 'maximize')
optuna_sampler : optuna.samplers.BaseSampler
Sampler to be used for the optuna (maxi-)minimisation. If None, default TPESampler is used. For more information see:
nfold : int
Number of folds to calculate the cross validation error
resume_study : str
A string indicating the filename of the study to be resumed. If None, the study is not resumed.
save_study : str
A string indicating the filename of the study. If None, the study is not saved into a file.
**kwargs : dict
Optuna study parameters


study :
The obtuna object which stores the whole study. See Optuna's documentation for more details:
def predict(self, x_test, output_margin=True, **kwargs)
Expand source code
def predict(self, x_test, output_margin=True, **kwargs):
    Return model prediction for the array x_test
    x_test: hipe4ml tree_handler, array-like, sparse matrix
        The input sample.

    output_margin: bool
        Whether to output the raw untransformed margin value. If False model
        probabilities are returned. Not used when task_type is 'regression'.

        Extra kwargs passed on to the following model prediction function:
        if (task_type == 'classification')
        - predict() (XGBoost and LGBM) or decision_function() (sklearn) if output_margin==True
        - predict_proba() if output_margin==False
        if (task_type == 'regression')
        - predict()

    out: numpy array
        Model predictions
    if isinstance(x_test, hipe4ml.tree_handler.TreeHandler):
        x_test = x_test.get_data_frame()

    x_test = x_test[self.training_columns]

    # regression
    if self._task_type == 'regression':
        return self.model.predict(x_test, **kwargs)

    # classification
    if output_margin:
        if self.model_string == 'xgboost':
            return self.model.predict(x_test, output_margin=True, **kwargs)
        if self.model_string == 'lightgbm':
            return self.model.predict(x_test, raw_score=True, **kwargs)
        if self.model_string == 'sklearn':
            if not hasattr(self.model, 'decision_function'):
                raise ValueError(
                    "This Model does not support a decision_function(): use output_margin=False")
            return self.model.decision_function(x_test, **kwargs).ravel()

    pred = self.model.predict_proba(x_test, **kwargs)
    # in case of binary classification return only the scores of
    # the signal class
    if pred.shape[1] <= 2:
        pred = pred[:, 1]
    return pred

Return model prediction for the array x_test Parameters

x_test : hipe4ml tree_handler, array-like, sparse matrix
The input sample.
output_margin : bool
Whether to output the raw untransformed margin value. If False model probabilities are returned. Not used when task_type is 'regression'.

**kwargs: Extra kwargs passed on to the following model prediction function: if (task_type == 'classification') - predict() (XGBoost and LGBM) or decision_function() (sklearn) if output_margin==True - predict_proba() if output_margin==False if (task_type == 'regression') - predict()


out : numpy array
Model predictions
def set_model_params(self, model_params)
Expand source code
def set_model_params(self, model_params):
    Set the model (hyper-)parameters

    model_params: dict
        Model hyper-parameter values. For
        example (XGBoost): max_depth, learning_rate,
        n_estimators, gamma, min_child_weight, ...
    self.model_params = model_params

Set the model (hyper-)parameters


model_params : dict
Model hyper-parameter values. For example (XGBoost): max_depth, learning_rate, n_estimators, gamma, min_child_weight, …
def set_training_columns(self, training_columns)
Expand source code
def set_training_columns(self, training_columns):
    Set the features used for the training process

    training_columns: list
        Contains the name of the features used for the training.
        Example: ['dEdx', 'pT', 'ct']
    self.training_columns = training_columns

Set the features used for the training process


training_columns : list
Contains the name of the features used for the training. Example: ['dEdx', 'pT', 'ct']
def train_test_model(self,
Expand source code
def train_test_model(self, data, return_prediction=False, output_margin=False, average='macro',
                     multi_class_opt='raise', **kwargs):
    Perform the training and the testing of the model. The model performance is estimated
    using the ROC AUC metric for classification and the MSE for regression.

    data: list
        Contains respectively: training
        set dataframe, training label array,
        test set dataframe, test label array

    return_prediction: bool
        If True Model predictions on the test set are

    output_margin: bool
        Whether to output the raw untransformed margin value. If False model
        probabilities are returned. Not used when task_type is 'regression'.

    average: string
        Option for the average of ROC AUC scores used only in case of multi-classification.
        You can choose between 'macro' and 'weighted'. For more information see

    multi_class_opt: string
        Option to compute ROC AUC scores used only in case of multi-classification.
        The one-vs-one 'ovo' and one-vs-rest 'ovr' approaches are available

    **kwargs: dict
        Extra kwargs passed on to the model fit method

    out: numpy array or None
        If return_prediction==True, Model predictions on the test set are


    # get number of classes
    n_classes = len(np.unique(data[1]))
    self._n_classes = n_classes
    print(f"Training {self.model_string} model for {self._task_type}")
    if self._task_type == 'classification':
        print('Number of detected classes:', n_classes)

    # final training with the optimized hyperparams
    print('Training the model: ...')[0], data[1], **kwargs)
    print('Training the model: Done!')
    print('Testing the model: ...')
    y_pred = self.predict(data[2], output_margin=output_margin)
    if self._task_type == 'classification':
        roc_score = roc_auc_score(
            data[3], y_pred, average=average, multi_class=multi_class_opt)
        print(f'ROC_AUC_score: {roc_score:.6f}')
        mse_score = mean_squared_error(data[3], y_pred)
        print(f'Mean squared error: {mse_score:.6f}')
    print('Testing the model: Done!')
    if return_prediction:
        return y_pred
    return None

Perform the training and the testing of the model. The model performance is estimated using the ROC AUC metric for classification and the MSE for regression.


data : list
Contains respectively: training set dataframe, training label array, test set dataframe, test label array
return_prediction : bool
If True Model predictions on the test set are returned
output_margin : bool
Whether to output the raw untransformed margin value. If False model probabilities are returned. Not used when task_type is 'regression'.
average : string
Option for the average of ROC AUC scores used only in case of multi-classification. You can choose between 'macro' and 'weighted'. For more information see
multi_class_opt : string
Option to compute ROC AUC scores used only in case of multi-classification. The one-vs-one 'ovo' and one-vs-rest 'ovr' approaches are available
**kwargs : dict
Extra kwargs passed on to the model fit method


out : numpy array or None
If return_prediction==True, Model predictions on the test set are returned