import unittest
import re
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.exceptions import NotFittedError

def _dynamic_test(tc, cond, ok, bad):
    if cond:
        tc._testMethodName = ok
        tc.assertTrue(True, ok)
    else:
        tc._testMethodName = bad
        tc.fail(bad)

class TestUserCode(unittest.TestCase):
    """Structural tests: verify steps exist and objects are properly built.
    No numeric equality checks.
    """

    def test_required_variables_declared(self):
        import user_code
        names = [
            "df", "numeric_features", "categorical_features",
            "X", "y", "X_train", "X_test", "y_train", "y_test",
            "preprocess", "pipe", "y_pred", "test_accuracy"
        ]
        cond = all(hasattr(user_code, n) for n in names)
        _dynamic_test(
            self, cond,
            "All required variables are declared.",
            f"Expected variables {names} to be declared."
        )

    def test_split_types(self):
        import user_code
        cond = (
            isinstance(user_code.X_train, pd.DataFrame) and
            isinstance(user_code.X_test, pd.DataFrame)
        )
        _dynamic_test(
            self, cond,
            "Train/test splits exist with DataFrame features.",
            "Expected X_train and X_test to be pandas DataFrames."
        )

    def test_preprocess_is_columntransformer(self):
        import user_code
        cond = isinstance(user_code.preprocess, ColumnTransformer)
        _dynamic_test(
            self, cond,
            "`preprocess` is a ColumnTransformer.",
            "Expected `preprocess` to be sklearn.compose.ColumnTransformer."
        )

    def test_pipeline_is_built(self):
        import user_code
        cond = isinstance(user_code.pipe, Pipeline) and "preprocess" in user_code.pipe.named_steps
        _dynamic_test(
            self, cond,
            "`pipe` is a Pipeline with a `preprocess` step.",
            "Expected a sklearn Pipeline with a `preprocess` step."
        )

    def test_columntransformer_contains_scaler_and_ohe(self):
        import user_code
        # Look for any StandardScaler and any OneHotEncoder inside transformers
        tr = user_code.preprocess.transformers
        has_scaler = any(isinstance(t[1], StandardScaler) for t in tr if isinstance(t[1], (StandardScaler, OneHotEncoder)))
        has_ohe    = any(isinstance(t[1], OneHotEncoder) for t in tr if isinstance(t[1], (StandardScaler, OneHotEncoder)))
        # Also allow nested pipelines inside ColumnTransformer
        for _, est, _cols in tr:
            if isinstance(est, Pipeline):
                if any(isinstance(s[1], StandardScaler) for s in est.steps):
                    has_scaler = True
                if any(isinstance(s[1], OneHotEncoder) for s in est.steps):
                    has_ohe = True
        cond = has_scaler and has_ohe
        _dynamic_test(
            self, cond,
            "The ColumnTransformer includes scaling for numeric and one-hot for categoricals.",
            "Expected StandardScaler and OneHotEncoder to be present in `preprocess`."
        )

    def test_pipeline_is_fitted_and_predicts(self):
        import user_code
        try:
            preds = user_code.pipe.predict(user_code.X_test)
            cond = preds is not None and len(preds) == len(user_code.X_test)
        except NotFittedError:
            cond = False
        except Exception:
            cond = False
        _dynamic_test(
            self, cond,
            "The pipeline is fitted and can predict on the test set.",
            "Expected `pipe.fit(X_train, y_train)` before predicting on X_test."
        )

    def test_accuracy_is_computed_and_printed(self):
        import user_code
        cond = hasattr(user_code, "test_accuracy")
        _dynamic_test(
            self, cond,
            "`test_accuracy` is computed.",
            "Expected `test_accuracy = accuracy_score(y_test, y_pred)` to be computed."
        )

    def test_no_whole_dataset_fit_hint(self):
        """Soft guard against leakage: ensure train_test_split is used in source."""
        with open("user_code.py", "r") as f:
            src = f.read()
        cond = "train_test_split" in src
        _dynamic_test(
            self, cond,
            "Train/test split is present in the code (hint against leakage).",
            "Expected a train/test split before fitting."
        )

if __name__ == "__main__":
    unittest.main()


test_main.py

Focus exclusively on feature scaling and normalization using Python's scikit-learn. Learn why scaling is necessary, how to apply StandardScaler, MinMaxScaler, and MaxAbsScaler, and gain hands-on practice with real data preprocessing challenges. Ideal for preparing data for distance-based algorithms and robust unsupervised learning workflows.

Challenge: Build a Preprocessing Pipeline

Lösning