In bioinformatics, it is common to work with large batches of DNA sequences. Often, you need to generate the reverse complement of each sequence before further analysis—such as primer design or motif searching. Manually processing each sequence would be tedious and error-prone, especially when handling hundreds or thousands of entries. Automating this process allows you to efficiently prepare your data for downstream genomics applications.


import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import importlib

class TestTask(unittest.TestCase):
    def test_basic_batch(self):
        import user_code
        importlib.reload(user_code)
        batch = ["ATCG", "ggta", "ACGTN", "xyz", "AAGCCTT"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["CGAT", "TACC", "ACGT", "", "AAGGCTT"][::-1]
        expected = ["CGAT", "TACC", "ACGT", "", "AAGGCTT"]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Correct reverse complements for {batch}",
            f"Expected {expected}, got {result}",
        )

    def test_empty_and_invalid(self):
        import user_code
        importlib.reload(user_code)
        batch = ["", "NNNN", "AGCTAGC"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["", "", "GCTAGCT"]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Handles empty and invalid-only sequences for {batch}",
            f"Expected {expected}, got {result}",
        )

    def test_case_insensitivity(self):
        import user_code
        importlib.reload(user_code)
        batch = ["atcg", "GtCa", "gGtT"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["CGAT", "TGAC", "AACC"]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Processes both uppercase and lowercase bases for {batch}",
            f"Expected {expected}, got {result}",
        )

    def test_only_invalid_characters(self):
        import user_code
        importlib.reload(user_code)
        batch = ["xyz", "123", "@!#"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["", "", ""]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Returns empty strings for only-invalid character sequences",
            f"Expected {expected}, got {result}",
        )

    def test_mixed_valid_invalid(self):
        import user_code
        importlib.reload(user_code)
        batch = ["A1T2C3G4", "NAGTCN", "g@t#c"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["CGAT", "GACT", "GAC"]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Ignores invalid characters in mixed-content sequences",
            f"Expected {expected}, got {result}",
        )

    def test_order_preserved(self):
        import user_code
        importlib.reload(user_code)
        batch = ["ATCG", "GATTACA", "CCGG"]
        result = user_code.reverse_complement_batch(batch)
        expected = ["CGAT", "TGTAATC", "CCGG"]
        _dynamic_test(
            self,
            isinstance(result, list) and result == expected,
            f"Maintains output order corresponding to input list",
            f"Expected {expected}, got {result}",
        )

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

Learn how Python is used in biology for analyzing DNA sequences, processing biological data, and visualizing research results. Includes hands-on examples with bioinformatics libraries.

Explore how Python can be used to analyze DNA and other biological sequences, including searching for motifs, calculating GC content, and basic sequence manipulations.

Delve into protein sequences, amino acid composition, and basic protein analysis using Python.

Learn how to visualize biological data using Python, including plotting sequence statistics and creating informative charts for research.

Challenge: Reverse Complement Batch Processing

Рішення