import matplotlib.pyplot as plt
from collections import deque
import heapq
import time


## Limit the number of reads to load, -1 for unlimited
MAXREADS = -1

## Limit the number of reads to plot
MAX_READS_LAYOUT = 500

## Path to reads file
READFILE = "/Users/mschatz/build/planesweep/readid.start.stop.txt"
print "Loading reads from " + READFILE
f = open(READFILE)

## This will have a list of tuples containing (readid, start, end, rc)
## rc is 0 if the read was originally oriented forward, 1 for reverse

reads = []
totallen = 0
readidx = 0

for line in f:
  line = line.rstrip()
  fields = line.split()
  readid  = fields[0]
  start   = int(fields[1])
  end     = int(fields[2])
  rc      = 0

  if (start > end):
    start = int(fields[2])
    end   = int(fields[1])
    rc = 1

  readinfo = (readid, start, end, rc)
  reads.append(readinfo)

  if (end > totallen):
    totallen = end

  readidx += 1
  if (readidx == MAXREADS):
    break

print "Loaded layout information for %d reads" % (len(reads))

for i in xrange(3):
  print "  %d: %s [%d - %d] %d" % (i, reads[i][0], reads[i][1], reads[i][2], reads[i][3])

print "  ..."

for i in xrange(len(reads)-3, len(reads)):
  print "  %d: %s [%d - %d] %d" % (i, reads[i][0], reads[i][1], reads[i][2], reads[i][3])

print "\n\n"


## Note to keep it tractable, we only plot the first MAX_READ_LAYOUT reads

plt.figure(figsize=(15,4), dpi=100)

print "Plotting layout"

## draw the layout of reads
for i in xrange(min(MAX_READS_LAYOUT, len(reads))):
    r = reads[i]
    readid = r[0]
    start  = r[1]
    end    = r[2]
    rc     = r[3]
    color  = "blue"

    if (rc == 1): 
      color = "red"

    plt.plot ([start,end], [-2*i, -2*i], lw=4, color=color)

plt.draw()
plt.show()


print "Brute force computing coverage over %d bp" % (totallen)

starttime = time.time()
brutecov = [0] * totallen

for r in reads:
 # print " -- [%d, %d]" % (r[1], r[2])

  for i in xrange(r[1], r[2]):
    brutecov[i] += 1

brutetime = (time.time() - starttime) * 1000.0

print "  Brute force complete in %0.02f ms" % (brutetime)
print brutecov[0:10]

maxcov = 0
lowcov = 0
LOW_COV_THRESHOLD = 10

for c in brutecov:
  if c > maxcov:
    maxcov = c
  if c <= LOW_COV_THRESHOLD:
    lowcov += 1

print "max cov: %d, there are %d low coverage bases (<= %d depth)" % (maxcov, lowcov, LOW_COV_THRESHOLD)

print "\n\n"


  plt.figure()
  plt.hist(brutecov)
  plt.show()


print "Delta encoding coverage plot"

starttime = time.time()

deltacov = []
curcov = -1
for i in xrange(0, len(brutecov)):
  if brutecov[i] != curcov:
    curcov = brutecov[i]
    delta = (i, curcov)
    deltacov.append(delta)

## Finish up with the last position
deltacov.append((totallen, 0))

print "Delta encoding required only %d steps, saving %0.02f%% of the space in %0.02f ms" % (len(deltacov), (100.0*float(totallen-len(deltacov))/totallen), (time.time()-starttime) * 1000.0)

for i in xrange(3):
  print "  %d: [%d,%d]" % (i, deltacov[i][0], deltacov[i][1])

print "  ..."

for i in xrange(len(deltacov)-3, len(deltacov)):
  print "  %d: [%d,%d]" % (i, deltacov[i][0], deltacov[i][1])

print "\n\n"


  plt.figure(figsize=(15,4), dpi=100)

  print "Plotting coverage profile"
  
  ## expand the coverage profile by this amount so that it is easier to see
  YSCALE = 5

  ## draw the layout of reads
  for i in xrange(min(MAX_READS_LAYOUT, len(reads))):
    r = reads[i]
    readid = r[0]
    start  = r[1]
    end    = r[2]
    rc     = r[3]
    color  = "blue"

    if (rc == 1): 
      color = "red"

    plt.plot ([start,end], [-2*i, -2*i], lw=4, color=color)

  ## draw the base of the coverage plot
  plt.plot([0, totallen], [0,0], color="black")

  ## draw the coverage plot
  for i in xrange(len(deltacov)-1):
    x1 = deltacov[i][0]
    x2 = deltacov[i+1][0]
    y1  = YSCALE*deltacov[i][1]
    y2  = YSCALE*deltacov[i+1][1]

    ## draw the horizonal line
    plt.plot([x1, x2], [y1, y1], color="black")
    
    ## and now the right vertical to the new coverage level
    plt.plot([x2, x2], [y1, y2], color="black")

  plt.draw()
  plt.show()


print "Beginning list-based plane sweep over %d reads" % (len(reads))

starttime = time.time()

## record the delta encoded depth using a plane sweep
deltacovplane = []

## use a list to record the end positions of the elements currently in plane
planelist = []

## BEGIN SWEEP 
for r in reads:
  startpos = r[1]
  endpos   = r[2]

  ## clear out any positions from the plane that we have already moved past
  while (len(planelist) > 0):

    if (planelist[0] <= startpos):
      ## the coverage steps down, extract it from the front of the list 
      oldend = planelist.pop(0) 
      deltacovplane.append((oldend, len(planelist)))
    else:
      break

  ## Now insert the current endpos into the correct position into the list
  insertpos = -1
  for i in xrange(len(planelist)):
    if (endpos < planelist[i]):
        insertpos = i
        break

  if (insertpos > 0):
    planelist.insert(insertpos, endpos)
  else:
    planelist.append(endpos)


  ## Finally record that the coverage has increased
  deltacovplane.append((startpos, len(planelist)))


## Flush any remaining end positions
while (len(planelist) > 0):
  oldend = planelist.pop(0) 
  deltacovplane.append((oldend, len(planelist)))


## Report statistics
planelisttime = (time.time() - starttime) * 1000.0
print "Plane sweep found %d steps, saving %0.02f%% of the space in %0.2f ms (%0.02f speedup)!" % (len(deltacovplane), (100.0*float(totallen-len(deltacovplane))/totallen), planelisttime, brutetime/planelisttime)

for i in xrange(3):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "  ..."

for i in xrange(len(deltacovplane)-3, len(deltacovplane)):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "\n\n"


print "Checking for duplicates:"

lastpos = -1
for i in xrange(len(deltacovplane)):
  if deltacovplane[i][0] == lastpos:
    print "  Found duplicate: %d: [%d,%d] == %d: [%d,%d]" % (i-1, deltacovplane[i-1][0], deltacovplane[i-1][1], i, deltacovplane[i][0], deltacovplane[i][1])
  lastpos = deltacovplane[i][0]

print "\n\n"


print "Beginning correct list-based plane sweep over %d reads" % (len(reads))

starttime = time.time()

## record the delta encoded depth using a plane sweep
deltacovplane = []

## use a list to record the end positions of the elements currently in plane
planelist = []

## BEGIN SWEEP (note change to index based so can peek ahead)
for rr in xrange(len(reads)):
  r = reads[rr]
  startpos = r[1]
  endpos   = r[2]

  ## clear out any positions from the plane that we have already moved past
  while (len(planelist) > 0):

    if (planelist[0] <= startpos):
      ## the coverage steps down, extract it from the front of the list 
      oldend = planelist.pop(0) 

      nextend = -1
      if (len(planelist) > 0):
        nextend = planelist[0]

      ## only record this transition if it is not the same as a start pos
      ## and only if not the same as the next end point
      if ((oldend != startpos) and (oldend != nextend)):
        deltacovplane.append((oldend, len(planelist)))
    else:
      break

  ## Now insert the current endpos into the correct position into the list
  insertpos = -1
  for i in xrange(len(planelist)):
    if (endpos < planelist[i]):
        insertpos = i
        break

  if (insertpos > 0):
    planelist.insert(insertpos, endpos)
  else:
    planelist.append(endpos)


  ## Finally record that the coverage has increased
  ## But make sure the current read does not start at the same position as the next
  if ((rr == len(reads)-1) or (startpos != reads[rr+1][1])):
    deltacovplane.append((startpos, len(planelist)))

  ## if it is at the same place, it will get reported in the next cycle

## Flush any remaining end positions
while (len(planelist) > 0):
  oldend = planelist.pop(0) 
  deltacovplane.append((oldend, len(planelist)))


## Report statistics
planelisttime = (time.time() - starttime) * 1000.0
print "Plane sweep found %d steps, saving %0.02f%% of the space in %0.2f ms (%0.02f speedup)!" % (len(deltacovplane), (100.0*float(totallen-len(deltacovplane))/totallen), planelisttime, brutetime/planelisttime)

for i in xrange(3):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "  ..."

for i in xrange(len(deltacovplane)-3, len(deltacovplane)):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "\n\n"

print "Checking for duplicates in corrected version:"

lastpos = -1
for i in xrange(len(deltacovplane)):
  if deltacovplane[i][0] == lastpos:
    print "  Found duplicate: %d: [%d,%d] == %d: [%d,%d]" % (i-1, deltacovplane[i-1][0], deltacovplane[i-1][1], i, deltacovplane[i][0], deltacovplane[i][1])
  lastpos = deltacovplane[i][0]

print "\n\n"


print "Beginning heap-based plane sweep over %d reads" % (len(reads))

starttime = time.time()

## record the delta encoded depth using a plane sweep
deltacovplane = []

## use a list to record the end positions of the elements currently in plane
planeheap = []

## BEGIN SWEEP (note change to index based so can peek ahead)
for rr in xrange(len(reads)):
  r = reads[rr]
  startpos = r[1]
  endpos   = r[2]

  ## clear out any positions from the plane that we have already moved past
  while (len(planeheap) > 0):

    if (planeheap[0] <= startpos):
      ## the coverage steps down, extract it from the front of the list 
      ## oldend = planelist.pop(0) 
      oldend = heapq.heappop(planeheap)

      nextend = -1
      if (len(planeheap) > 0):
        nextend = planeheap[0]

      ## only record this transition if it is not the same as a start pos
      ## and only if not the same as the next end point
      if ((oldend != startpos) and (oldend != nextend)):
        deltacovplane.append((oldend, len(planeheap)))
    else:
      break

  ## Now insert the current endpos into the correct position into the list

  ##insertpos = -1
  ##for i in xrange(len(planelist)):
  ##  if (endpos < planelist[i]):
  ##      insertpos = i
  ##      break

  ##if (insertpos > 0):
  ##  planelist.insert(insertpos, endpos)
  ##else:
  ##  planelist.append(endpos)

  heapq.heappush(planeheap, endpos)


  ## Finally record that the coverage has increased
  ## But make sure the current read does not start at the same position as the next
  if ((rr == len(reads)-1) or (startpos != reads[rr+1][1])):
    deltacovplane.append((startpos, len(planeheap)))

  ## if it is at the same place, it will get reported in the next cycle


## Flush any remaining end positions
while (len(planeheap) > 0):
  ##oldend = planelist.pop(0) 
  oldend = heapq.heappop(planeheap)
  deltacovplane.append((oldend, len(planeheap)))


## Report statistics
planeheaptime = (time.time()-starttime) * 1000.0
print "Heap-Plane sweep found %d steps, saving %0.02f%% of the space in %0.02f ms (%0.02f speedup)!" % (len(deltacovplane), (100.0*float(totallen-len(deltacovplane))/totallen), planeheaptime, brutetime/planeheaptime)

for i in xrange(3):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "  ..."

for i in xrange(len(deltacovplane)-3, len(deltacovplane)):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "\n\n"

print "Checking for duplicates in heap-ified version:"

lastpos = -1
for i in xrange(len(deltacovplane)):
  if deltacovplane[i][0] == lastpos:
    print "  Found duplicate: %d: [%d,%d] == %d: [%d,%d]" % (i-1, deltacovplane[i-1][0], deltacovplane[i-1][1], i, deltacovplane[i][0], deltacovplane[i][1])
  lastpos = deltacovplane[i][0]

print "\n\n"


print "Beginning heap-based plane sweep over %d reads to search for max reads" % (len(reads))

starttime = time.time()

## record the delta encoded depth using a plane sweep
deltacovplane = []

## use a list to record the end positions of the elements currently in plane
planeheap = []

## use a list to record the ids at max coverage
maxcov = -1
maxcovpos = -1
maxcovreads = []

## BEGIN SWEEP (note change to index based so can peek ahead)
for rr in xrange(len(reads)):
  r = reads[rr]
  readid   = r[0]
  startpos = r[1]
  endpos   = r[2]

  ## clear out any positions from the plane that we have already moved past
  while (len(planeheap) > 0):

    if (planeheap[0][0] <= startpos):
      ## the coverage steps down, extract it from the front of the list 
      oldend = heapq.heappop(planeheap)[0]

      nextend = -1
      if (len(planeheap) > 0):
        nextend = planeheap[0][0]

      ## only record this transition if it is not the same as a start pos
      ## and only if not the same as the next end point
      if ((oldend != startpos) and (oldend != nextend)):
        deltacovplane.append((oldend, len(planeheap)))
    else:
      break

  ## Now insert the current endpos into the correct position into the list
  heapq.heappush(planeheap, (endpos, readid))


  ## Finally record that the coverage has increased
  ## But make sure the current read does not start at the same position as the next
  if ((rr == len(reads)-1) or (startpos != reads[rr+1][1])):
    cov = len(planeheap)
    if (cov > maxcov):
      maxcov = cov
      maxcovpos = startpos
      maxcovreads = []
      for rr in planeheap:
        maxcovreads.append(rr[1])

    deltacovplane.append((startpos, len(planeheap)))


## Flush any remaining end positions
while (len(planeheap) > 0):
  oldend = heapq.heappop(planeheap)[0]
  deltacovplane.append((oldend, len(planeheap)))


## Report statistics
planeheaptime = (time.time()-starttime) * 1000.0
print "Heap-Plane sweep found %d steps, saving %0.02f%% of the space in %0.02f ms (%0.02f speedup)!" % (len(deltacovplane), (100.0*float(totallen-len(deltacovplane))/totallen), planeheaptime, brutetime/planeheaptime)

print "The %d reads at the position of maximum depth (%d) are:" % (maxcov, maxcovpos)
print maxcovreads

print "\n\n"

for i in xrange(3):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "  ..."

for i in xrange(len(deltacovplane)-3, len(deltacovplane)):
  print "  %d: [%d,%d]" % (i, deltacovplane[i][0], deltacovplane[i][1])

print "\n\n"

print "Checking for duplicates in heap-ified version:"

lastpos = -1
for i in xrange(len(deltacovplane)):
  if deltacovplane[i][0] == lastpos:
    print "  Found duplicate: %d: [%d,%d] == %d: [%d,%d]" % (i-1, deltacovplane[i-1][0], deltacovplane[i-1][1], i, deltacovplane[i][0], deltacovplane[i][1])
  lastpos = deltacovplane[i][0]

print "\n\n"


  f = plt.figure(figsize=(15,4), dpi=100)
    
  print "Plotting max depth\n\n"

  lastend = -1
  lastidx = -1
    
  ## draw the layout of reads
  for i in xrange(min(MAX_READS_LAYOUT, len(reads))):
    r = reads[i]
    readid = r[0]
    start  = r[1]
    end    = r[2]
    rc     = r[3]
    color  = "blue"

    if (rc == 1): 
      color = "red"

    plt.plot ([start,end], [-2*i, -2*i], lw=4, color=color)
    lastend = end
    lastidx = i

  ## draw the base of the coverage plot
  plt.plot([0, lastend], [0,0], color="black")

  ## draw the coverage profile
  for i in xrange(len(deltacov)-1):
    x1 = deltacov[i][0]
    x2 = deltacov[i+1][0]
    y1  = YSCALE * deltacov[i][1]
    y2  = YSCALE * deltacov[i+1][1]

    ## draw the horizonal line
    plt.plot([x1, x2], [y1, y1], color="black")
    
    ## and now the right vertical to the new coverage level
    plt.plot([x2, x2], [y1, y2], color="black")
    if (x2 >= lastend):
        break

  ## draw a vertical bar with the max coverage
  plt.plot([maxcovpos, maxcovpos], [YSCALE*maxcov, -(2*lastidx+20)], color="green", lw=2)
  plt.draw()
  plt.show()