Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.util.topsort
"""
Topological sort.
From Tim Peters, see:
http://mail.python.org/pipermail/python-list/1999-July/006660.html
topsort takes a list of pairs, where each pair (x, y) is taken to
mean that x <= y wrt some abstract partial ordering. The return
value is a list, representing a total ordering that respects all
the input constraints.
E.g.,
topsort( [(1,2), (3,3)] )
Valid topological sorts would be any of (but nothing other than)
[3, 1, 2]
[1, 3, 2]
[1, 2, 3]
... however this variant ensures that 'key' order (first element of
tuple) is preserved so the following will be result returned:
[1, 3, 2]
because those are the permutations of the input elements that
respect the "1 precedes 2" and "3 precedes 3" input constraints.
Note that a constraint of the form (x, x) is really just a trick
to make sure x appears *somewhere* in the output list.
If there's a cycle in the constraints, say
topsort( [(1,2), (2,1)] )
then CycleError is raised, and the exception object supports
many methods to help analyze and break the cycles. This requires
a good deal more code than topsort itself!
"""
from galaxy.util.odict import odict as OrderedDict
[docs]class CycleError(Exception):
[docs] def __init__(self, sofar, numpreds, succs):
Exception.__init__(self, "cycle in constraints",
sofar, numpreds, succs)
self.preds = None
# return as much of the total ordering as topsort was able to
# find before it hit a cycle
# return remaining elt -> count of predecessors map
# return remaining elt -> list of successors map
# return remaining elements (== those that don't appear in
# get_partial())
# Return a list of pairs representing the full state of what's
# remaining (if you pass this list back to topsort, it will raise
# CycleError again, and if you invoke get_pairlist on *that*
# exception object, the result will be isomorphic to *this*
# invocation of get_pairlist).
# The idea is that you can use pick_a_cycle to find a cycle,
# through some means or another pick an (x,y) pair in the cycle
# you no longer want to respect, then remove that pair from the
# output of get_pairlist and try topsort again.
[docs] def get_pairlist(self):
succs = self.get_succs()
answer = []
for x in self.get_elements():
if x in succs:
for y in succs[x]:
answer.append((x, y))
else:
# make sure x appears in topsort's output!
answer.append((x, x))
return answer
# return remaining elt -> list of predecessors map
[docs] def get_preds(self):
if self.preds is not None:
return self.preds
self.preds = preds = OrderedDict()
remaining_elts = self.get_elements()
for x in remaining_elts:
preds[x] = []
succs = self.get_succs()
for x in remaining_elts:
if x in succs:
for y in succs[x]:
preds[y].append(x)
if __debug__:
for x in remaining_elts:
assert len(preds[x]) > 0
return preds
# return a cycle [x, ..., x] at random
[docs] def pick_a_cycle(self):
remaining_elts = self.get_elements()
# We know that everything in remaining_elts has a predecessor,
# but don't know that everything in it has a successor. So
# crawling forward over succs may hit a dead end. Instead we
# crawl backward over the preds until we hit a duplicate, then
# reverse the path.
preds = self.get_preds()
from random import choice
x = choice(remaining_elts)
answer = []
index = OrderedDict()
in_answer = index.has_key
while not in_answer(x):
index[x] = len(answer) # index of x in answer
answer.append(x)
x = choice(preds[x])
answer.append(x)
answer = answer[index[x]:]
answer.reverse()
return answer
def _numpreds_and_successors_from_pairlist(pairlist):
numpreds = OrderedDict() # elt -> # of predecessors
successors = OrderedDict() # elt -> list of successors
for first, second in pairlist:
# make sure every elt is a key in numpreds
if first not in numpreds:
numpreds[first] = 0
if second not in numpreds:
numpreds[second] = 0
# if they're the same, there's no real dependence
if first == second:
continue
# since first < second, second gains a pred ...
numpreds[second] = numpreds[second] + 1
# ... and first gains a succ
if first in successors:
successors[first].append(second)
else:
successors[first] = [second]
return numpreds, successors
[docs]def topsort(pairlist):
numpreds, successors = _numpreds_and_successors_from_pairlist(pairlist)
# suck up everything without a predecessor
answer = [x for x in numpreds.keys() if numpreds[x] == 0]
# for everything in answer, knock down the pred count on
# its successors; note that answer grows *in* the loop
for x in answer:
assert numpreds[x] == 0
del numpreds[x]
if x in successors:
for y in successors[x]:
numpreds[y] = numpreds[y] - 1
if numpreds[y] == 0:
answer.append(y)
# following "del" isn't needed; just makes
# CycleError details easier to grasp
del successors[x]
if numpreds:
# everything in numpreds has at least one predecessor ->
# there's a cycle
if __debug__:
for x in numpreds.keys():
assert numpreds[x] > 0
raise CycleError(answer, numpreds, successors)
return answer
[docs]def topsort_levels(pairlist):
numpreds, successors = _numpreds_and_successors_from_pairlist(pairlist)
answer = []
while 1:
# Suck up everything without a predecessor.
levparents = [x for x in numpreds.keys() if numpreds[x] == 0]
if not levparents:
break
answer.append(levparents)
for levparent in levparents:
del numpreds[levparent]
if levparent in successors:
for levparentsucc in successors[levparent]:
numpreds[levparentsucc] -= 1
del successors[levparent]
if numpreds:
# Everything in num_parents has at least one child ->
# there's a cycle.
raise CycleError(answer, numpreds, successors)
return answer