Now you'll compare the models we've covered using a single dataset — the **breast cancer dataset**. The target variable is the `'diagnosis'` column, where `1` represents malignant and `0` represents benign cases.

You will apply `GridSearchCV` to each model to find the best parameters. In this task, you'll use **recall** as the scoring metric because **minimizing false negatives** is crucial. To have `GridSearchCV` select the best parameters based on recall, set `scoring='recall'`.

import unittest
import importlib


def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)


class TestUserCode(unittest.TestCase):

    def test_knn_params_is_correct(self):
        import user_code

        expected_value = {'n_neighbors': [3, 5, 7, 12]}

        variable = 'knn_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_lr_params_is_correct(self):
        import user_code

        expected_value = {'C': [0.1, 1, 10]}

        variable = 'lr_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_dt_params_is_correct(self):
        import user_code

        expected_value = {'max_depth': [2, 4, 6, 10], 'min_samples_leaf': [1, 2, 4, 7]}

        variable = 'dt_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_rf_params_is_correct(self):
        import user_code

        expected_value = {'max_depth': [2, 4, 6], 'n_estimators': [20, 50, 100]}

        variable = 'rf_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_knn_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.neighbors import KNeighborsClassifier

        param_grid = {'n_neighbors': [3, 5, 7, 12]}

        variable = 'knn_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   KNeighborsClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=KNeighborsClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=knn` and `param_grid=knn_grid`.",
            failure_message
        )

    def test_lr_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.linear_model import LogisticRegression

        param_grid = {'C': [0.1, 1, 10]}

        variable = 'lr_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   LogisticRegression) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=LogisticRegression()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=lr` and `param_grid=lr_grid`.",
            failure_message
        )

    def test_dt_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.tree import DecisionTreeClassifier

        param_grid = {'max_depth': [2, 4, 6, 10], 'min_samples_leaf': [1, 2, 4, 7]}

        variable = 'dt_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   DecisionTreeClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=DecisionTreeClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=dt` and `param_grid=dt_grid`.",
            failure_message
        )

    def test_rf_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.ensemble import RandomForestClassifier

        param_grid = {'max_depth': [2, 4, 6], 'n_estimators': [20, 50, 100]}

        variable = 'rf_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   RandomForestClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=RandomForestClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=rf` and `param_grid=rf_grid`.",
            failure_message
        )


if __name__ == '__main__':
    unittest.main()

test_main.py

In machine learning, classification is used in predictive modeling to assign input data with a class label. Sounds difficult? Don't worry! Let's cope with this! Welcome to the ML!

Challenge: Comparing Models

Solution

Awesome!

Challenge: Comparing Models

Solution

Awesome!