import unittest
import re

# Helper for dynamic naming
def _dynamic_test(test_case, condition, success, failure):
    if condition:
        test_case._testMethodName = success
        test_case.assertTrue(True, success)
    else:
        test_case._testMethodName = failure
        test_case.fail(failure)


class TestUserCode(unittest.TestCase):

    def test_linked_records_declared(self):
        import user_code
        _dynamic_test(
            self,
            hasattr(user_code, "linked_records"),
            "The `linked_records` list is declared.",
            "Expected `linked_records` to be declared."
        )

    def test_linked_records_is_list(self):
        import user_code
        _dynamic_test(
            self,
            isinstance(user_code.linked_records, list),
            "The `linked_records` variable is a list.",
            "Expected `linked_records` to be a list."
        )

    def test_sequence_matcher_used(self):
        with open("user_code.py", "r") as file:
            src = file.read()

        _dynamic_test(
            self,
            re.search(r"SequenceMatcher", src),
            "The `SequenceMatcher` class is used for similarity scoring.",
            "Expected `SequenceMatcher` to be used."
        )

    def test_correct_linking(self):
        import user_code

        # Because SequenceMatcher may return ≈0.78 for Bob/Robert,
        # accept either 2 or 3 correct matches.
        valid_expected = [
            [
                {"index_df1": 0, "index_df2": 0},
                {"index_df1": 1, "index_df2": 1},
                {"index_df1": 2, "index_df2": 2}
            ],
            [
                {"index_df1": 0, "index_df2": 0},
                {"index_df1": 2, "index_df2": 2}
            ]
        ]

        try:
            actual_pairs = [
                {"index_df1": r["index_df1"], "index_df2": r["index_df2"]}
                for r in user_code.linked_records
            ]

            condition = actual_pairs in valid_expected
            failure_message = (
                "Expected linked record pairs to be one of "
                f"{valid_expected}, but got {actual_pairs}."
            )

        except Exception:
            condition = False
            failure_message = "The `linked_records` structure is incorrect."

        _dynamic_test(
            self,
            condition,
            "The linked records contain valid employee index pairs.",
            failure_message
        )


if __name__ == "__main__":
    unittest.main()


test_code.py

Master sophisticated data cleaning strategies in Python, including fuzzy matching, deduplication, record linkage, advanced text normalization, anomaly detection, and repairing corrupted datasets. This course provides hands-on, practical approaches using Python and popular libraries to ensure your data is accurate, consistent, and ready for analysis.

Explore techniques for identifying and comparing similar but non-identical data entries using Python.

Learn to identify and remove duplicate records from datasets using advanced Python techniques.

Connect related records across multiple datasets using Python-based record linkage methods.

Master advanced techniques for cleaning and normalizing text data in Python.

Challenge: Link Employee Records

Ratkaisu