%%file calc_gc.py
def calc_gc(sequence):
    sequence = sequence.upper()                    # make all chars uppercase
    n = sequence.count('T') + sequence.count('A')  # count only A, T,
    m = sequence.count('G') + sequence.count('C')  # C, and G -- nothing else (no Ns, Rs, Ws, etc.)
    if n + m == 0:
        return 0.                                  # avoid divide-by-zero
    return float(m) / float(n + m)

def test_1():
    result = round(calc_gc('ATGGCAT'), 2)
    print 'hello, this is a test; the value of result is', result
    assert result == 0.43
    
def test_2(): # test handling N
    result = round(calc_gc('NATGC'), 2)
    assert result == 0.5, result
    
def test_3(): # test handling lowercase
    result = round(calc_gc('natgc'), 2)
    assert result == 0.5, result


!nosetests calc_gc.py

!nosetests -v calc_gc.py

%%file gc-of-seqs.py
import sys
import screed
import calc_gc

filename = sys.argv[1]    # take the sequence filename in from the command line
total_gc = []
for record in screed.open(filename):
    gc = calc_gc.calc_gc(record.sequence)
    total_gc.append(gc)
    
print sum(total_gc) / float(len(total_gc))

# run the script and look at the output -- then write that output into the following file.
!python gc-of-seqs.py 25k.fq.gz

%%file test_gc_script.py
import subprocess

correct_output = "0.607911191366\n"   # this is taken from the previous exec'd cell

# the following function checks to see if running this script at the command line
# returns the right result.  make sure you're running this from *within* the python/ subdirectory
# of the 2012-11-scripps/ repository.
def test_run():
    p = subprocess.Popen('python gc-of-seqs.py 25k.fq.gz', shell=True, stdout=subprocess.PIPE)
    (stdout, stderr) = p.communicate()
    assert stdout == correct_output
    

!nosetests test_gc_script.py