| Previous CloneSet | Next CloneSet | Back to Main Report |
| Clone Mass | Clones in CloneSet | Parameter Count | Clone Similarity | Syntax Category [Sequence Length] |
|---|---|---|---|---|
| 180 | 2 | 2 | 0.999 | file_input_element_list[4] |
| Clone Abstraction | Parameter Bindings |
| Clone Instance (Click to see clone) | Line Count | Source Line | Source File |
|---|---|---|---|
| 1 | 180 | 451 | Bio/MEME/Parser.py |
| 2 | 178 | 535 | Bio/Motif/Parsers/MEME.py |
| ||||
class MASTParser(AbstractParser):
'''
Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
A MASTParser takes a file handle for a MAST text output file and
returns a MASTRecord, containing the hits between motifs and
sequences. The parser does some unusual line buffering to parse out
match diagrams. Really complex diagrams often lead to an error message
and p-values not being parsed for a given line.
Methods:
parse (handle): parses the data from the file handle passed to it.
Example:
f = open("mast_file.txt")
parser = MASTParser()
mast_record = parser.parse(f)
for motif in mast_record.motifs:
for instance in motif.instances:
print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
'''
def __init__ (self):
self._consumer = _MASTConsumer( )
self._scanner = _MASTScanner( )
def parse (self,handle):
self._scanner.feed(handle,self._consumer)
return self._consumer.data
class _MASTScanner:
"""
Scanner for MAST text output.
"""
def feed (self,handle,consumer):
if isinstance(handle,File.UndoHandle):
uhandle = handle
else:
uhandle = File.UndoHandle(handle)
self._scan_header(uhandle,consumer)
self._scan_matches(uhandle,consumer)
self._scan_annotated_matches(uhandle,consumer)
def _scan_header (self,uhandle,consumer):
try :
read_and_call_until(uhandle,consumer.noevent,contains = "MAST version")
except ValueError:
raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
read_and_call(uhandle,consumer._version,contains = "MAST version")
read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS")
read_and_call(uhandle,consumer.noevent,start = "DATABASE")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer._database,contains = "DATABASE")
read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH")
read_and_call(uhandle,consumer.noevent,contains = "MOTIF")
read_and_call(uhandle,consumer.noevent,contains = "----")
read_and_call_until(uhandle,consumer._add_motif,blank = 1)
read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:")
def _scan_matches (self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "---")
# read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
read_and_call_until(uhandle,consumer.noevent,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
def _scan_annotated_matches (self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call_until(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call_until(uhandle,consumer.noevent,start = "*****")
read_and_call(uhandle,consumer.noevent)
read_and_call_while(uhandle,consumer.noevent,blank = 1)
readMatches = 1
while readMatches==1:
if consumer._current_seq:
if consumer._buffer_size!=0:
consumer._parse_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer._set_current_seq)
read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM")
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
consumer._add_diagram_from_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer.noevent,blank = 1)
while 1:
line = safe_peekline(uhandle)
if line.startswith("****"):
consumer._parse_buffer(None)
readMatches = 0
break
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
consumer._collapse_buffer(None)
if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):
break
elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):
consumer._parse_buffer(None)
consumer._blank_buffer(None)
readMatches = 0
break
class MASTRecord:
"""The class for holding the results from a MAST run.
A MASTRecord holds data about matches between motifs and sequences.
The motifs held by the MASTRecord are objects of the class MEMEMotif.
Methods:
get_motif_matches_for_sequence(sequence_name): returns all of the
motif matches within a given sequence. The matches are objects of
the class MEME.Motif.Instance
get_motif_matches (motif_name): returns all of the matches for a motif
in the sequences searched. The matches returned are of class
MEME.Motif.Instance
get_motif_by_name (motif_name): returns a MEMEMotif with the given
name.
"""
def __init__ (self):
self.sequences = [ ]
self.version = ""
self.matches = [ ]
self.database = ""
self.diagrams = { }
self.alphabet = None
self.motifs = [ ]
def _version (self,version):
self.version = version
def _alphabet (self,alphabet):
if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna:
self.alphabet = alphabet
else:
return -1
def _database(self,database):
self.database = database
def get_motif_matches_for_sequence (self,seq):
insts = [ ]
for m in self.motifs:
for i in m.instances:
if i.sequence_name==seq:
insts.append(i)
insts.sort( lambda x,y:cmp(x.start,y.start))
return insts
def get_motif_matches (self,motif):
m = self.get_motif_by_name (motif.name)
return m.instances
def _add_diagram_for_sequence (self,diagram,seq):
self.diagrams[seq] = diagram
def _add_match (self,match):
self.matches.append(match)
def _add_sequence (self,sequence):
self.sequences.append(sequence)
def _add_motif (self,motif):
self.motifs.append(motif)
def get_motif_by_name (self,name):
for m in self.motifs:
if m.name==name:
return m
|
| ||||
class MASTParser(AbstractParser):
'''
Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
A MASTParser takes a file handle for a MAST text output file and
returns a MASTRecord, containing the hits between motifs and
sequences. The parser does some unusual line buffering to parse out
match diagrams. Really complex diagrams often lead to an error message
and p-values not being parsed for a given line.
Methods:
parse (handle): parses the data from the file handle passed to it.
Example:
>>>f = open("mast_file.txt")
>>>parser = MASTParser()
>>>mast_record = parser.parse(f)
>>>for motif in mast_record.motifs:
>>> for instance in motif.instances:
>>> print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
'''
def __init__ (self):
self._consumer = _MASTConsumer( )
self._scanner = _MASTScanner( )
def parse (self,handle):
self._scanner.feed(handle,self._consumer)
return self._consumer.data
class _MASTScanner:
"""
Scanner for MAST text output.
"""
def feed (self,handle,consumer):
if isinstance(handle,File.UndoHandle):
uhandle = handle
else:
uhandle = File.UndoHandle(handle)
self._scan_header(uhandle,consumer)
self._scan_matches(uhandle,consumer)
self._scan_annotated_matches(uhandle,consumer)
def _scan_header (self,uhandle,consumer):
try :
read_and_call_until(uhandle,consumer.noevent,contains = "MAST version")
except ValueError:
raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
read_and_call(uhandle,consumer._version,contains = "MAST version")
read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS")
read_and_call(uhandle,consumer.noevent,start = "DATABASE")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer._database,contains = "DATABASE")
read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH")
read_and_call(uhandle,consumer.noevent,contains = "MOTIF")
read_and_call(uhandle,consumer.noevent,contains = "----")
read_and_call_until(uhandle,consumer._add_motif,blank = 1)
read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:")
def _scan_matches (self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "---")
# read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
read_and_call_until(uhandle,consumer.noevent,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
def _scan_annotated_matches (self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call_until(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call_until(uhandle,consumer.noevent,start = "*****")
read_and_call(uhandle,consumer.noevent)
read_and_call_while(uhandle,consumer.noevent,blank = 1)
readMatches = 1
while readMatches==1:
if consumer._current_seq:
if consumer._buffer_size!=0:
consumer._parse_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer._set_current_seq)
read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM")
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
consumer._add_diagram_from_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer.noevent,blank = 1)
while 1:
line = safe_peekline(uhandle)
if line.startswith("****"):
consumer._parse_buffer(None)
readMatches = 0
break
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
consumer._collapse_buffer(None)
if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):
break
elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):
consumer._parse_buffer(None)
consumer._blank_buffer(None)
readMatches = 0
break
class MASTRecord:
"""The class for holding the results from a MAST run.
A MASTRecord holds data about matches between motifs and sequences.
The motifs held by the MASTRecord are objects of the class MEMEMotif.
Methods:
get_motif_matches_for_sequence(sequence_name): returns all of the
motif matches within a given sequence. The matches are objects of
the class MEMEInstance
get_motif_matches (motif_name): returns all of the matches for a motif
in the sequences searched. The matches returned are of class
MEMEInstance
get_motif_by_name (motif_name): returns a MEMEMotif with the given
name.
"""
def __init__ (self):
self.sequences = [ ]
self.version = ""
self.matches = [ ]
self.database = ""
self.diagrams = { }
self.alphabet = None
self.motifs = [ ]
def _version (self,version):
self.version = version
def _alphabet (self,alphabet):
if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna:
self.alphabet = alphabet
else:
return -1
def _database(self,database):
self.database = database
def get_motif_matches_for_sequence (self,seq):
insts = [ ]
for m in self.motifs:
for i in m.instances:
if i.sequence_name==seq:
insts.append(i)
insts.sort( lambda x,y:cmp(x.start,y.start))
return insts
def get_motif_matches (self,motif):
m = self.get_motif_by_name (motif.name)
return m.instances
def _add_diagram_for_sequence (self,diagram,seq):
self.diagrams[seq] = diagram
def _add_match (self,match):
self.matches.append(match)
def _add_sequence (self,sequence):
self.sequences.append(sequence)
def _add_motif (self,motif):
self.motifs.append(motif)
def get_motif_by_name (self,name):
for m in self.motifs:
if m.name==name:
return m
|
| |||
class MASTParser(AbstractParser):
[[#variable2d9a26c0]]
def __init__(self):
self._consumer = _MASTConsumer( )
self._scanner = _MASTScanner( )
def parse(self,handle):
self._scanner.feed(handle,self._consumer)
return self._consumer.data
class _MASTScanner:
"""
Scanner for MAST text output.
"""
def feed(self,handle,consumer):
if isinstance(handle,File.UndoHandle):
uhandle = handle
else:
uhandle = File.UndoHandle(handle)
self._scan_header(uhandle,consumer)
self._scan_matches(uhandle,consumer)
self._scan_annotated_matches(uhandle,consumer)
def _scan_header(self,uhandle,consumer):
try :
read_and_call_until(uhandle,consumer.noevent,contains = "MAST version")
except ValueError:
raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
read_and_call(uhandle,consumer._version,contains = "MAST version")
read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS")
read_and_call(uhandle,consumer.noevent,start = "DATABASE")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer._database,contains = "DATABASE")
read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH")
read_and_call(uhandle,consumer.noevent,contains = "MOTIF")
read_and_call(uhandle,consumer.noevent,contains = "----")
read_and_call_until(uhandle,consumer._add_motif,blank = 1)
read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:")
def _scan_matches(self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME")
read_and_call(uhandle,consumer.noevent,start = "---")
# read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
read_and_call_until(uhandle,consumer.noevent,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
def _scan_annotated_matches(self,uhandle,consumer):
read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call(uhandle,consumer.noevent,start = "SECTION III:")
read_and_call_until(uhandle,consumer.noevent,start = "****")
read_and_call(uhandle,consumer.noevent,start = "****")
read_and_call_until(uhandle,consumer.noevent,start = "*****")
read_and_call(uhandle,consumer.noevent)
read_and_call_while(uhandle,consumer.noevent,blank = 1)
readMatches = 1
while readMatches==1:
if consumer._current_seq:
if consumer._buffer_size!=0:
consumer._parse_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer._set_current_seq)
read_and_call_until(uhandle,consumer.noevent,start = " DIAGRAM")
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
consumer._add_diagram_from_buffer(None)
consumer._blank_buffer(None)
read_and_call(uhandle,consumer.noevent,blank = 1)
while 1:
line = safe_peekline(uhandle)
if line.startswith("****"):
consumer._parse_buffer(None)
readMatches = 0
break
read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1)
read_and_call(uhandle,consumer.noevent,blank = 1)
consumer._collapse_buffer(None)
if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):
break
elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):
consumer._parse_buffer(None)
consumer._blank_buffer(None)
readMatches = 0
break
class MASTRecord:
[[#variable2d9a26a0]]
def __init__(self):
self.sequences = [ ]
self.version = ""
self.matches = [ ]
self.database = ""
self.diagrams = { }
self.alphabet = None
self.motifs = [ ]
def _version(self,version):
self.version = version
def _alphabet(self,alphabet):
if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna:
self.alphabet = alphabet
else:
return -1
def _database(self,database):
self.database = database
def get_motif_matches_for_sequence(self,seq):
insts = [ ]
for m in self.motifs:
for i in m.instances:
if i.sequence_name==seq:
insts.append(i)
insts.sort( lambda x,y:cmp(x.start,y.start))
return insts
def get_motif_matches(self,motif):
m = self.get_motif_by_name(motif.name)
return m.instances
def _add_diagram_for_sequence(self,diagram,seq):
self.diagrams[seq] = diagram
def _add_match(self,match):
self.matches.append(match)
def _add_sequence(self,sequence):
self.sequences.append(sequence)
def _add_motif(self,motif):
self.motifs.append(motif)
def get_motif_by_name(self,name):
for m in self.motifs:
if m.name==name:
return m
|
| CloneAbstraction |
| Parameter Index | Clone Instance | Parameter Name | Value |
|---|---|---|---|
| 1 | 1 | [[#2d9a26c0]] | '''
Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
A MASTParser takes a file handle for a MAST text output file and
returns a MASTRecord, containing the hits between motifs and
sequences. The parser does some unusual line buffering to parse out
match diagrams. Really complex diagrams often lead to an error message
and p-values not being parsed for a given line.
Methods:
parse (handle): parses the data from the file handle passed to it.
Example:
f = open("mast_file.txt")
parser = MASTParser()
mast_record = parser.parse(f)
for motif in mast_record.motifs:
for instance in motif.instances:
print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
''' |
| 1 | 2 | [[#2d9a26c0]] | '''
Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
A MASTParser takes a file handle for a MAST text output file and
returns a MASTRecord, containing the hits between motifs and
sequences. The parser does some unusual line buffering to parse out
match diagrams. Really complex diagrams often lead to an error message
and p-values not being parsed for a given line.
Methods:
parse (handle): parses the data from the file handle passed to it.
Example:
>>>f = open("mast_file.txt")
>>>parser = MASTParser()
>>>mast_record = parser.parse(f)
>>>for motif in mast_record.motifs:
>>> for instance in motif.instances:
>>> print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
''' |
| 2 | 1 | [[#2d9a26a0]] | """The class for holding the results from a MAST run.
A MASTRecord holds data about matches between motifs and sequences.
The motifs held by the MASTRecord are objects of the class MEMEMotif.
Methods:
get_motif_matches_for_sequence(sequence_name): returns all of the
motif matches within a given sequence. The matches are objects of
the class MEME.Motif.Instance
get_motif_matches (motif_name): returns all of the matches for a motif
in the sequences searched. The matches returned are of class
MEME.Motif.Instance
get_motif_by_name (motif_name): returns a MEMEMotif with the given
name.
""" |
| 2 | 2 | [[#2d9a26a0]] | """The class for holding the results from a MAST run.
A MASTRecord holds data about matches between motifs and sequences.
The motifs held by the MASTRecord are objects of the class MEMEMotif.
Methods:
get_motif_matches_for_sequence(sequence_name): returns all of the
motif matches within a given sequence. The matches are objects of
the class MEMEInstance
get_motif_matches (motif_name): returns all of the matches for a motif
in the sequences searched. The matches returned are of class
MEMEInstance
get_motif_by_name (motif_name): returns a MEMEMotif with the given
name.
""" |