# composition.py - Script to calculate the percentage base compostion of a file of DNA 
# sequence data.  Assumes that lines in the file which aren't DNA sequences begin with 
# a ">".  

# trim newline
# check percentage


aCount = 0
cCount = 0
tCount = 0
gCount = 0
oCount = 0

f = open("Pbar_1.0_scaffolds.fa", "r")
for line in f:
	if line[0] == ">":
		continue
	else:
		for x in line:
			if x == 'A':
				aCount = aCount + 1
			elif x == 'C':
				cCount = cCount + 1
			elif x == 'T':
				tCount = tCount + 1
			elif x == 'G':
				gCount = gCount + 1
			elif x !=  'A' or 'C' or 'T' or 'G': 
				oCount = oCount + 1
    				
total_of_ACTG = (aCount + cCount + tCount + gCount)

print "A percentage:", (float(aCount) / total_of_ACTG) * 100
print "C percentage:", (float(cCount) / total_of_ACTG) * 100
print "T percentage:", (float(tCount) / total_of_ACTG) * 100
print "G percentage:", (float(gCount) / total_of_ACTG) * 100
print "Other characters:", (oCount)
print "There are ", total_of_ACTG,  "characters in the file."

