import unittest
import re

# Helper for dynamic test messages
def _dynamic_test(test_case, condition, success, failure):
    if condition:
        test_case._testMethodName = success
        test_case.assertTrue(True, success)
    else:
        test_case._testMethodName = failure
        test_case.fail(failure)


class TestUserCode(unittest.TestCase):

    def test_matched_products_declared(self):
        import user_code
        _dynamic_test(
            self,
            hasattr(user_code, "matched_products"),
            "The `matched_products` dictionary is declared.",
            "Expected `matched_products` to be declared."
        )

    def test_matched_products_is_dict(self):
        import user_code
        _dynamic_test(
            self,
            isinstance(user_code.matched_products, dict),
            "The `matched_products` variable is a dictionary.",
            "Expected `matched_products` to be a dictionary."
        )

    def test_sequence_matcher_used(self):
        # Must check only that SequenceMatcher is used at least once
        with open("user_code.py", "r") as file:
            src = file.read()

        _dynamic_test(
            self,
            re.search(r"SequenceMatcher", src),
            "The `SequenceMatcher` class is used for similarity scoring.",
            "Expected `SequenceMatcher` to be used."
        )

    def test_correct_matching(self):
        import user_code

        expected_pairs = {
            "Iphone14": "Apple iPhone 14",
            "Galaxy S-22": "Samsung Galaxy S22",
            "Sony 1000 XM5": "Sony WH-1000XM5",
            "Dell Inspiron15": "Dell Inspiron 15"
        }

        try:
            condition = (user_code.matched_products == expected_pairs)
            failure_message = (
                f"Expected `matched_products` to be `{expected_pairs}`, "
                f"but got `{user_code.matched_products}`."
            )
        except Exception:
            condition = False
            failure_message = "The `matched_products` dictionary is missing or incorrect."

        _dynamic_test(
            self,
            condition,
            "The `matched_products` dictionary contains correct matches.",
            failure_message
        )


if __name__ == "__main__":
    unittest.main()


test_main.py

Master sophisticated data cleaning strategies in Python, including fuzzy matching, deduplication, record linkage, advanced text normalization, anomaly detection, and repairing corrupted datasets. This course provides hands-on, practical approaches using Python and popular libraries to ensure your data is accurate, consistent, and ready for analysis.

Explore techniques for identifying and comparing similar but non-identical data entries using Python.

Learn to identify and remove duplicate records from datasets using advanced Python techniques.

Connect related records across multiple datasets using Python-based record linkage methods.

Master advanced techniques for cleaning and normalizing text data in Python.

Challenge: Fuzzy Match Product Names

Lösung