import unittest
import re

def _dynamic_test(test_case, condition, success, failure):
    if condition:
        test_case._testMethodName = success
        test_case.assertTrue(True, success)
    else:
        test_case._testMethodName = failure
        test_case.fail(failure)


class TestUserCode(unittest.TestCase):

    def test_cleaned_reviews_declared(self):
        import user_code
        _dynamic_test(
            self,
            hasattr(user_code, "cleaned_reviews"),
            "The `cleaned_reviews` list is declared.",
            "Expected `cleaned_reviews` to be declared."
        )

    def test_cleaned_reviews_is_list(self):
        import user_code
        _dynamic_test(
            self,
            isinstance(user_code.cleaned_reviews, list),
            "The `cleaned_reviews` variable is a list.",
            "Expected `cleaned_reviews` to be a list."
        )

    def test_re_sub_used(self):
        with open("user_code.py", "r") as f:
            src = f.read()
        _dynamic_test(
            self,
            re.search(r"re\.sub", src),
            "The `re.sub()` function is used.",
            "Expected `re.sub()` to be used."
        )

    def test_stemming_used(self):
        with open("user_code.py", "r") as f:
            src = f.read()
        _dynamic_test(
            self,
            re.search(r"stemmer\.stem", src),
            "The `stemmer.stem()` method is used for stemming.",
            "Expected `stemmer.stem()` to be used."
        )

    def test_cleaning_output_structure(self):
        import user_code

        # We don't assert EXACT tokens (natural language cleaning varies).
        # We only verify:
        # - same number of reviews
        # - no uppercase
        # - no emojis/mentions
        # - normalized repeated letters

        try:
            reviews = user_code.cleaned_reviews
            cond_count = len(reviews) == 5
            cond_lower = all(r == r.lower() for r in reviews)
            cond_repeat = all(not re.search(r"(.)\1\1", r) for r in reviews)
            condition = cond_count and cond_lower and cond_repeat

            failure_message = (
                f"Cleaned reviews do not meet structural requirements: {reviews}"
            )
        except Exception:
            condition = False
            failure_message = "The `cleaned_reviews` list is incorrect or missing."

        _dynamic_test(
            self,
            condition,
            "The cleaned reviews follow the structural cleaning rules.",
            failure_message
        )


if __name__ == "__main__":
    unittest.main()


test_main.py

Master sophisticated data cleaning strategies in Python, including fuzzy matching, deduplication, record linkage, advanced text normalization, anomaly detection, and repairing corrupted datasets. This course provides hands-on, practical approaches using Python and popular libraries to ensure your data is accurate, consistent, and ready for analysis.

Explore techniques for identifying and comparing similar but non-identical data entries using Python.

Learn to identify and remove duplicate records from datasets using advanced Python techniques.

Connect related records across multiple datasets using Python-based record linkage methods.

Master advanced techniques for cleaning and normalizing text data in Python.

Challenge: Clean Messy Reviews

Løsning