DEBUG = False
import string
from sets import Set
K = 4 # Number of nucleotides to choose from

# DNA - Array of DNA sequences
# l - length of the l-mer
def GreedyMotifSearch(DNA, l):
	t = len(DNA) 		# Number of DNA sequences
	n = len(DNA[0]) 	# Length of a single sequence (all should be the same)
	s = [0] * t 		# List of starting positions for the ith DNA sequence
	BestMotif = [0] * t # [0, 0, ... , 0]
	for s[0] in range(0, n - l + 1):
		for s[1] in range(0, n - 1 + 1):
			if Score(s, 2, DNA, l) > Score(BestMotif, 2, DNA, l):
				BestMotif[0] = s[0]
				BestMotif[1] = s[1]
	s[0] = BestMotif[0]
	s[1] = BestMotif[1]
	for i in range(2, t):
		for s[i] in range(0, n - l + 1):
			if Score(s, i+1, DNA, l) > Score(BestMotif, i+1, DNA, l):
				BestMotif[i] = s[i]
		s[i] = BestMotif[i]
	return BestMotif
	
def Score(s, i, DNA, l):
	profile = []
	s = s[0:i]
	for j in range(0,i):
		profile += [DNA[j][s[j]:s[j]+l]]
	profile = Transpose(profile)
	score = 0
	for row in profile:
		score += Count(row)
	return score
	
def Count(row):
	if row == []: return 0
	uniqueSet = Set()
	for i in range(len(row)):
		if isinstance(row[i],str): 	# Convert all characters to lowercase
			row[i] = row[i].lower() 
		uniqueSet.add(row[i]) 		# Get a set of unique elements
	counts = [row.count(item) for item in uniqueSet]
	counts.sort(lambda x, y:  -1 * cmp(x,y)) # Descending sort
	return counts[0] # Highest count
	
def Transpose(matrix):
	if not matrix: return []
	return map(lambda *row: list(row), *matrix)
		
def Nucleotides(s):
	nucleotide = ['a','c','g','t']
	string = ''
	for c in s:
		if (c == 0): break
		string += nucleotide[c-1]
	return string

def PrintConsensus(s, DNA, l):
	for i in range(0,len(s)):
		print DNA[i][s[i]:s[i]+l]
	
DNA = [ "CGGGGCTATGCAACTGGGTCGTCACATTCCCCTTTCGATA",
		"TTTGAGGGTGCCCAATAAATGCAACTCCAAAGCGGACAAA",
		"GGATGCAACTGATGCCGTTTGACGACCTAAATCAACGGCC",
		"AAGGATGCAACTCCAGGAGCGCCTTTGCTGGTTCTACCTG",
		"AATTTTCTAAAAAGATTATAATGTCGGTCCATGCAACTTC",
		"CTGCTGTACAACTGAGATCATGCTGCATGCAACTTTCAAC",
		"TACATGATCTTTTGATGCAACTTGGATGAGGGAATGATGC" ]
s =  GreedyMotifSearch(DNA, 8)
PrintConsensus(s, DNA, 8)

DNA = ["CGGGGCTATcCAgCTGGGTCGTCACATTCCCCTTTCGATA" 
		"TTTGAGGGTGCCCAATAAggGCAACTCCAAAGCGGACAAA",
		"GGATGgAtCTGATGCCGTTTGACGACCTAAATCAACGGCC",
		"AAGGAaGCAACcCCAGGAGCGCCTTTGCTGGTTCTACCTG",
		"AATTTTCTAAAAAGATTATAATGTCGGTCCtTGgAACTTC",
		"CTGCTGTACAACTGAGATCATGCTGCATGCcAtTTTCAAC",
		"TACATGATCTTTTGATGgcACTTGGATGAGGGAATGATGC"]
s =  GreedyMotifSearch(DNA, 8)
PrintConsensus(s, DNA, 8)

if DEBUG:
	print "Test Transpose Function"
	Matrix1 = [[1,0,0],[0,1,0],[0,0,1]]
	Matrix2 = [[1,2,3],[4,5,6]]
	Matrix3 = [[1,4],[2,5],[3,6]]
	print Transpose(Matrix1) == Matrix1
	print Transpose(Matrix2) == Matrix3
	print Transpose(Matrix3) == Matrix2
	print "Testing Nucleotides Function"
	print Nucleotides([0,0,0,0]) == ""
	print Nucleotides([1,2,3,4]) == "acgt"
	print Nucleotides([4,3,2,1]) == "tgca"
	print "Testing Count Function"
	List1 = ['a','b','b','c','c','c']
	List2 = ['a','b','c','d']
	List3 = ['a','A','a','B','b','c',]
	List4 = [1,2,3,4]
	List5 = []
	print Count(List1) == 3
	print Count(List2) == 1
	print Count(List3) == 3
	print Count(List4) == 1
	print Count(List5) == 0
	print "Testing Score Function"
	DNA1 = [[1,2,3,4]] * 2
	s = [0,0]
	l,t = 4, 2
	print Score(s,2,DNA1,l) == l * t