import unittest
import user_code as uc


def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)


class TestCleaningChallenge(unittest.TestCase):

    def test_null_delay_count(self):
        _dynamic_test(self,
            isinstance(uc.null_delay_count, int) and uc.null_delay_count >= 0,
            "The `null_delay_count` is a non-negative integer.",
            "Expected `null_delay_count` to be a non-negative integer.")

    def test_cleaned_count(self):
        _dynamic_test(self,
            isinstance(uc.cleaned_count, int) and uc.cleaned_count > 0,
            "The `cleaned_count` is a positive integer.",
            "Expected `cleaned_count` to be a positive integer.")

    def test_timeofday_column_exists(self):
        _dynamic_test(self,
            "TimeOfDay" in uc.cleaned_df.columns,
            "The `cleaned_df` contains the `TimeOfDay` column.",
            "Expected `cleaned_df` to contain the `TimeOfDay` column.")

    def test_no_nulls_in_delay(self):
        from pyspark.sql.functions import col
        null_count = uc.cleaned_df.filter(col("Delay").isNull()).count()
        _dynamic_test(self,
            null_count == 0,
            "The `cleaned_df` has no null values in `Delay`.",
            f"Expected no nulls in `Delay`, but found {null_count}.")


if __name__ == "__main__":
    unittest.main()

test_main.py

Master the essentials of data processing using PySpark, focusing on reading and writing data, cleaning and transforming datasets, leveraging Spark SQL, and optimizing performance for large-scale analytics.

Core concepts and practical skills for data processing with PySpark, from data ingestion to advanced SQL analytics and performance optimization.

Challenge: Cleaning a Real-World Dataset

解答