From 0017181392b45ad0b924298aad88adfbe55d71f9 Mon Sep 17 00:00:00 2001
From: sweng66 <sweng@stanford.edu>
Date: Thu, 8 Jul 2021 09:49:22 -0700
Subject: [PATCH] adding Go perl lib

---
 .../GO/AnnotationProvider/AnnotationParser.pm | 2097 ++++++++++++
 www/lib/GO/AnnotationProvider/MetaData.pm     | 1539 +++++++++
 www/lib/GO/OntologyProvider/OboParser.pm      | 1042 ++++++
 www/lib/GO/README                             |    1 +
 www/lib/GO/TermFinder.pm                      | 2134 +++++++++++++
 www/lib/GO/TermFinderReport/Html.pm           |  419 +++
 www/lib/GO/TermFinderReport/Text.pm           |  283 ++
 www/lib/GO/Utils/General.pm                   |  112 +
 www/lib/GO/View.pm                            | 2809 +++++++++++++++++
 9 files changed, 10436 insertions(+)
 create mode 100644 www/lib/GO/AnnotationProvider/AnnotationParser.pm
 create mode 100644 www/lib/GO/AnnotationProvider/MetaData.pm
 create mode 100644 www/lib/GO/OntologyProvider/OboParser.pm
 create mode 100644 www/lib/GO/README
 create mode 100644 www/lib/GO/TermFinder.pm
 create mode 100644 www/lib/GO/TermFinderReport/Html.pm
 create mode 100644 www/lib/GO/TermFinderReport/Text.pm
 create mode 100644 www/lib/GO/Utils/General.pm
 create mode 100644 www/lib/GO/View.pm

diff --git a/www/lib/GO/AnnotationProvider/AnnotationParser.pm b/www/lib/GO/AnnotationProvider/AnnotationParser.pm
new file mode 100644
index 0000000..cf53f0f
--- /dev/null
+++ b/www/lib/GO/AnnotationProvider/AnnotationParser.pm
@@ -0,0 +1,2097 @@
+package GO::AnnotationProvider::AnnotationParser;
+
+# File       : AnnotationParser.pm
+# Authors    : Elizabeth Boyle; Gavin Sherlock
+# Date Begun : Summer 2001
+# Rewritten  : September 25th 2002
+
+# $Id: AnnotationParser.pm,v 1.13 2015/02/09 18:36:55 mark Exp $
+
+# Copyright (c) 2003 Gavin Sherlock; Stanford University
+
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+=pod
+
+=head1 NAME
+
+GO::AnnotationProvider::AnnotationParser - parses a gene annotation file
+
+=head1 SYNOPSIS
+
+GO::AnnotationProvider::AnnotationParser - reads a Gene Ontology gene
+associations file, and provides methods by which to retrieve the GO
+annotations for the an annotated entity.  Note, it is case
+insensitive, with some caveats - see documentation below.
+
+    my $annotationParser = GO::AnnotationProvider::AnnotationParser->new(annotationFile => "data/gene_association.sgd");
+
+    my $geneName = "AAT2";
+
+    print "GO associations for gene: ", join (" ", $annotationParser->goIdsByName(name   => $geneName,
+										  aspect => 'P')), "\n";
+
+    print "Database ID for gene: ", $annotationParser->databaseIdByName($geneName), "\n";
+
+    print "Database name: ", $annotationParser->databaseName(), "\n";
+
+    print "Standard name for gene: ", $annotationParser->standardNameByName($geneName), "\n";
+
+    my $i;
+
+    my @geneNames = $annotationParser->allStandardNames();
+
+    foreach $i (0..10) {
+
+        print "$geneNames[$i]\n";
+
+    }
+
+=head1 DESCRIPTION
+
+GO::AnnotationProvider::AnnotationParser is a concrete subclass of
+GO::AnnotationProvider, and creates a data structure mapping gene
+names to GO annotations by parsing a file of annotations provided by
+the Gene Ontology Consortium.
+
+This package provides object methods for retrieving GO annotations
+that have been parsed from a 'gene associations' file, provided by
+the gene ontology consortium.  The format for the file is:
+
+Lines beginning with a '!' character are comment lines.
+
+    Column  Cardinality   Contents          
+    ------  -----------   -------------------------------------------------------------
+        0       1         Database abbreviation for the source of annotation (e.g. SGD)
+        1       1         Database identifier of the annotated entity
+        2       1         Standard name of the annotated entity
+        3       0,1       NOT (if a gene is specifically NOT annotated to the term)
+        4       1         GOID of the annotation     
+        5       1,n       Reference(s) for the annotation 
+        6       1         Evidence code for the annotation
+        7       0,n       With or From (a bit mysterious)
+        8       1         Aspect of the Annotation (C, F, P)
+        9       0,1       Name of the product being annotated
+       10       0,n       Alias(es) of the annotated product
+       11       1         type of annotated entity (one of gene, transcript, protein)
+       12       1,2       taxonomic id of the organism encoding and/or using the product
+       13       1         Date of annotation YYYYMMDD
+       14       1         Assigned_by : The database which made the annotation
+
+Columns are separated by tabs.  For those entries with a cardinality
+greater than 1, multiple entries are pipe , |, delimited.
+
+Further details can be found at:
+
+http://www.geneontology.org/doc/GO.annotation.html#file
+
+The following assumptions about the file are made (and should be true):
+
+    1.  All aliases appear for all entries of a given annotated product
+    2.  The database identifiers are unique, in that two different
+        entities cannot have the same database id.
+
+=head1 TODO
+
+Also see the TODO list in the parent, GO::AnnotationProvider.
+
+ 1.  Add in methods that will allow retrieval of evidence codes with
+     the annotations for a particular entity.
+
+ 2.  Add in methods that return all the annotated entities for a
+     particular GOID.
+
+ 3.  Add in the ability to request only annotations either including
+     or excluding particular evidence codes.  Such evidence codes
+     could be provided as an anonymous array as the value of a named
+     argument.
+
+ 4.  Same as number 3, except allow the retrieval of annotated
+     entities for a particular GOID, based on inclusion or exclusion
+     of certain evidence codes.
+
+ These first four items will require a reworking of how data are
+ stored on the backend, and thus the parsing code itself, though it
+ should not affect any of the already existing API.
+
+ 5.  Instead of 'use'ing Storable, 'require' it instead, only at the
+     point of use, which will mean that AnnotationParser can be
+     happily used in the absence of Storable, just without those
+     functions that need it.
+
+ 6.  Extend the ValidateFile class method to check that an entity
+     should never be annotated to the same node twice, with the same
+     evidence, with the same reference.
+
+ 7.  An additional checker, that uses an AnnotationProvider in
+     conjunction with an OntologyProvider, would be useful, that
+     checks that some of the annotations themselves are valid, ie
+     that no entities are annotated to the 'unknown' node in a
+     particular aspect, and also to another node within that same
+     aspect.  Can annotations be redundant? ie, if an entity is
+     annotated to a node, and an ancestor of the node, is that
+     annotation redundant?  Does it depend on the evidence codes and
+     references.  Or are such annotations reinforcing?  These things
+     are useful to consider when formulating the confidence which can
+     be attributed to an annotation.
+
+=cut
+
+use strict;
+use warnings;
+use diagnostics;
+
+use Storable qw (nstore);
+use IO::File;
+
+use vars qw (@ISA $PACKAGE $VERSION);
+
+use GO::AnnotationProvider;
+@ISA = qw (GO::AnnotationProvider);
+
+$PACKAGE = "GO::AnnotationProvider::AnnotationParser";
+$VERSION = "0.15";
+
+# CLASS Attributes
+#
+# These should be considered as constants, and are initialized here
+
+my $DEBUG = 0;
+
+# constants for instance attribute name
+
+my $kDatabaseName           = $PACKAGE.'::__databaseName';           # stores the name of the annotating database
+my $kFileName               = $PACKAGE.'::__fileName';               # stores the name of the file used to instantiate the object
+my $kNameToIdMapInsensitive = $PACKAGE.'::__nameToIdMapInsensitive'; # stores a case insensitive map of all unambiguous names for a gene to the database id
+my $kNameToIdMapSensitive   = $PACKAGE.'::__nameToIdMapSensitive';   # stores a case sensitive map of all names where a particular casing is unambiguous for a gene to the database id
+my $kAmbiguousNames         = $PACKAGE.'::__ambiguousNames';         # stores the database id's for all ambiguous names
+my $kIdToStandardName       = $PACKAGE.'::__idToStandardName';       # stores a map of database id's to standard names of all entities
+my $kStandardNameToId       = $PACKAGE.'::__StandardNameToId';       # stores a map of standard names to their database id's
+my $kUcIdToId               = $PACKAGE.'::__ucIdToId';               # stores a map of uppercased databaseIds to the databaseId
+my $kUcStdNameToStdName     = $PACKAGE.'::__ucStdNameToStdName';     # stores a map of uppercased standard names to the standard name
+my $kNameToCount            = $PACKAGE.'::__nameToCount';            # stores a case sensitive map of the number of times a name has been seen
+my $kGoids                  = $PACKAGE.'::__goids';                  # stores all the goid annotations
+my $kNumAnnotatedGenes      = $PACKAGE.'::__numAnnotatedGenes';      # stores number of genes with annotations, per aspect
+
+my $kAmbiguousNamesSensitive = $PACKAGE.'::__ambiguousNamesSensitive'; # names (case sensitive) that are ambiguous
+
+my $kTotalNumAnnotatedGenes = $PACKAGE.'::__totalNumAnnotatedGenes'; # total number of annotated genes
+
+# added to keep track of number of each evidence code - mark
+my $kEvidenceCodeCount = $PACKAGE.'::__evidenceCodeCount';
+my $kCVSVersion = $PACKAGE."::__cvsVersion";  # mark
+my $kValidationDate = $PACKAGE."::__validationDate";  # mark
+
+# constants to describe what is in which column in the annotation file
+
+my $kDatabaseNameColumn = 0;
+my $kDatabaseIdColumn   = 1;
+my $kStandardNameColumn = 2;
+my $kNotColumn          = 3;
+my $kGoidColumn         = 4;
+my $kReferenceColumn    = 5;
+my $kEvidenceColumn     = 6;
+my $kWithColumn         = 7;
+my $kAspectColumn       = 8;
+my $kNameColumn         = 9;
+my $kAliasesColumn      = 10;
+my $kEntityTypeColumn   = 11;
+my $kTaxonomicIDColumn  = 12;
+my $kDateColumn         = 13;
+my $kAssignedByColumn   = 14;
+
+# the following hash of anonymous arrays indicates for each column
+# what the maximum and minimum number of entries per column can be.
+# If no maximum is indicated, then the maximum is equal to the
+# minimum, and exactly that number of entries must exist.
+
+my %kColumnsToCardinality = ($kDatabaseNameColumn => [1     ],
+			     $kDatabaseIdColumn   => [1     ],
+			     $kStandardNameColumn => [1     ],
+			     $kNotColumn          => [0,   1],
+			     $kGoidColumn         => [1     ],
+			     $kReferenceColumn    => [1, "n"],
+			     $kEvidenceColumn     => [1     ],
+			     $kWithColumn         => [0, "n"],
+			     $kAspectColumn       => [1     ],
+			     $kNameColumn         => [0,   1],
+			     $kAliasesColumn      => [0, "n"],
+			     $kEntityTypeColumn   => [1     ],
+			     $kTaxonomicIDColumn  => [1,   2],
+			     $kDateColumn         => [1     ],
+			     $kAssignedByColumn   => [1     ]);
+
+my $kNumColumnsInFile = scalar keys %kColumnsToCardinality;
+
+=pod
+
+=head1 Class Methods
+
+=cut
+
+############################################################################
+sub Usage{
+############################################################################
+=pod
+
+=head2 Usage
+
+This class method simply prints out a usage statement, along with an
+error message, if one was passed in.
+
+Usage :
+
+    GO::AnnotationProvider::AnnotationParser->Usage();
+
+=cut
+
+    my ($class, $message) = @_;
+
+    defined $message && print $message."\n\n";
+
+    print 'The constructor expects one of two arguments, either a
+\'annotationFile\' argument, or and \'objectFile\' argument.  When
+instantiated with an annotationFile argument, it expects it to
+correspond to an annotation file created by one of the GO consortium
+members, according to their file format.  When instantiated with an
+objectFile argument, it expects to open a previously created
+annotationParser object that has been serialized to disk (see the
+serializeToDisk method).
+
+Usage:
+
+    my $annotationParser = '.$PACKAGE.'->new(annotationFile => $file);
+
+    my $annotationParser = '.$PACKAGE.'->new(objectFile => $file);
+';
+
+}
+
+############################################################################
+sub ValidateFile{
+############################################################################
+=pod
+
+=head2 ValidateFile
+
+This class method reads an annotation file, and returns a reference to
+an array of errors that are present within the file.  The errors are
+simply strings, each beginning with "Line $lineNo : " where $lineNo is
+the number of the line in the file where the error was found.
+
+Usage:
+
+    my $errorsRef = GO::AnnotationProvider::AnnotationParser->ValidateFile(annotationFile => $file);
+
+=cut
+
+    my ($class, %args) = @_;
+    
+    my $file = $args{'annotationFile'} || $class->_handleMissingArgument(argument => 'annotationFile');
+    
+    my $annotationsFh = IO::File->new($file, q{<} )|| die "$PACKAGE cannot open $file : $!";
+
+    my (@errors, @line);
+
+    my ($databaseId, $standardName, $aliases);
+    my (%idToName, %idToAliases);
+
+    my $lineNo = 0;
+    
+    while (<$annotationsFh>){
+	
+	++$lineNo;
+	
+	next if $_ =~ m/^!/; # skip comment lines
+	
+	chomp;
+
+	next unless $_; # skip an empty line, if there is one
+	
+	@line = split("\t", $_, -1);
+	
+	if (scalar @line != $kNumColumnsInFile){ # doesn't have the correct number of columns
+	    
+	    push (@errors, "Line $lineNo has ". scalar @line. "columns, instead of $kNumColumnsInFile.");
+	    
+	}
+	
+	$class->__CheckCardinalityOfColumns(\@errors, \@line, $lineNo);
+	
+	# now want to deal with sanity checks...
+	
+	($databaseId, $standardName, $aliases) = @line[$kDatabaseIdColumn, $kStandardNameColumn, $kAliasesColumn];
+	
+	next if ($databaseId eq ""); # will have given incorrect cardinality, but nothing more we can do with it
+
+	if (!exists $idToName{$databaseId}){
+
+	    $idToName{$databaseId} = $standardName;
+
+	}elsif ($idToName{$databaseId} ne $standardName){
+
+	    push (@errors, "Line $lineNo : $databaseId has more than one standard name : $idToName{$databaseId} and $standardName.");
+
+	}
+
+	if (!exists $idToAliases{$databaseId}){
+
+	    $idToAliases{$databaseId} = $aliases;
+
+	}elsif($idToAliases{$databaseId} ne $aliases){
+
+	    push (@errors, "Line $lineNo : $databaseId has more than one collections of aliases : $idToAliases{$databaseId} and $aliases.");
+
+	}   
+
+    }
+    
+    $annotationsFh->close || die "$PACKAGE cannot close $file : $!";
+    
+    return \@errors;
+
+}
+
+############################################################################
+sub __CheckCardinalityOfColumns{
+############################################################################
+# This method checks the cardinality of each column on a line
+#
+# Usage:
+#
+#    $class->__CheckCardinalityOfColumns(\@errors, \@line, $lineNo);
+    
+    my ($class, $errorsRef, $lineRef, $lineNo) = @_;
+    
+    my ($cardinality, $min, $max);
+    
+    foreach my $column (sort {$a<=>$b} keys %kColumnsToCardinality){
+	
+	($min, $max) = @{$kColumnsToCardinality{$column}}[0,1];
+	
+	$cardinality = $class->__GetCardinality($lineRef->[$column], $errorsRef, $lineNo);
+	
+	if (!defined $max){ # must have a defined number of entries
+
+	    if ($cardinality != $min){
+		
+		push (@{$errorsRef}, "Line $lineNo : column $column has a cardinality of $cardinality, instead of $min.");
+		
+	    }
+	    
+	}else{ # there's a range of allowed number of entries
+	    
+	    if ($cardinality < $min){ # check if less than minimum
+		
+		push (@{$errorsRef}, "Line $lineNo : column $column has a cardinality of $cardinality, which is less than the required $min.");
+		
+	    }elsif ($kColumnsToCardinality{$column}->[1] ne 'n' &&
+		    $cardinality > $max){ # check if more than maximum
+		
+		push (@{$errorsRef}, "Line $lineNo : column $column has a cardinality of $cardinality, which is more than the allowed $max.");
+		
+	    }
+	    
+	}
+	
+    }
+
+}
+
+############################################################################
+sub __GetCardinality{
+############################################################################
+# This private method returns an integer that indicates the
+# cardinality of a text string, where multiple entries are assumed to
+# be seperated by the pipe character (|).  In addition, it checks
+# whether there are null or whitespace only entries.
+#
+# Usage:
+#
+#    my $cardinality = $class->__GetCardinality($string);
+
+    my ($class, $string, $errorsRef, $lineNo) = @_;
+
+    my $cardinality;
+
+    if (!defined $string || $string eq ""){
+
+	$cardinality = 0;
+
+    }else{
+
+	my @entries = split(/\|/, $string, -1);
+
+	foreach my $entry (@entries){
+
+	    if (!defined $entry){
+
+		push (@{$errorsRef}, "Line $lineNo : There is an undefined value in the string $string.");
+
+	    }elsif ($entry =~ /^\s+$/){
+
+		push (@{$errorsRef}, "Line $lineNo : There is a white-space only value in the string $string.");
+
+	    }
+
+	}
+
+	$cardinality = scalar @entries;
+
+    }
+
+    return $cardinality;
+
+}
+
+############################################################################
+#
+# Constructor, and initialization methods.
+#
+# All initialization methods are private, except, of course, for the
+# new() method.
+#
+############################################################################
+
+############################################################################
+sub new{
+############################################################################
+=pod
+
+=head1 Constructor
+
+=head2 new
+
+This is the constructor for an AnnotationParser object.
+
+The constructor expects one of two arguments, either a
+'annotationFile' argument, or and 'objectFile' argument.  When
+instantiated with an annotationFile argument, it expects it to
+correspond to an annotation file created by one of the GO consortium
+members, according to their file format.  When instantiated with an
+objectFile argument, it expects to open a previously created
+annotationParser object that has been serialized to disk (see the
+serializeToDisk method).
+
+Usage:
+
+    my $annotationParser = GO::AnnotationProvider::AnnotationParser->new(annotationFile => $file);
+
+    my $annotationParser = GO::AnnotationProvider::AnnotationParser->new(objectFile => $file);
+
+=cut
+
+
+    my ($class, %args) = @_;
+
+    my $self;
+
+    if (exists($args{'annotationFile'})){
+
+	$self = {};
+
+	bless $self, $class;
+
+#	$self->__init($args{'annotationFile'});
+#	if (!$self->__init($args{'annotationFile'}, $args{'evidenceCodes'})) { return undef; }  # mark
+	if (!$self->__init(%args)) { return undef; }  # mark
+
+    }elsif (exists($args{'objectFile'})){
+
+	$self = Storable::retrieve($args{'objectFile'}) || die "Could not instantiate $PACKAGE object from objectFile : $!";
+
+	$self->__setFile($args{'objectFile'});
+
+    }else{
+
+	$class->Usage("An annotationFile or objectFile argument must be provided.");
+	die;
+
+    }
+
+    # now, we have to make some alteration to some hashes to support
+    # our API for case insensitivity.  The API says that if a name is
+    # supplied that would otherwise be ambiguous, but has a unique
+    # casing, then we will accept it as that unique cased version.
+    # Thus, we need to make sure that our $kNameToIdMapSensitive hash
+    # only tracks those names that were unique in a particular case
+
+    foreach my $name (keys %{$self->{$kNameToCount}}){
+
+	# go through the has that has a count of each name
+
+	if ($self->{$kNameToCount}{$name} > 1 || exists $self->{$kNameToIdMapInsensitive}{uc($name)}){
+
+	    # if it was seen more than once, or is known to be unique
+	    # in a case insensitive fashion, then delete it.  This
+	    # will leave just those that are unique in a case
+	    # sensitive fashion
+
+	    delete $self->{$kNameToIdMapSensitive}{$name};
+
+	}
+
+    }
+
+    return ($self);
+
+}
+
+############################################################################
+sub __init{
+############################################################################
+# This private method initializes the object by reading in the data
+# from the annotation file.
+#
+# Usage :
+#
+#    $self->__init($file);
+#
+
+    my ($self, %args) = @_;
+
+    my $file = $args{annotationFile};
+    my $evidenceCodes = $args{evidenceCodes};
+    my $evidenceCodesNot = $args{evidenceCodesNot};
+    my $aspectIn = $args{aspect};
+
+    $self->__setFile($file);
+
+    my $annotationsFh = IO::File->new($file, q{<} )|| die "$PACKAGE cannot open $file : $!";
+
+    # now read through annotations file
+
+    my (@line, $databaseId, $goid, $aspect, $standardName, $aliases);
+    
+    while (<$annotationsFh>){
+
+	my $in = $_;
+
+	if ($in =~ m/^!(.*)\s+\$$/) {
+	    my @info = split(/\s*\:\s*/, $1);
+	    if ($#info > 0) {
+		if (($info[0] eq "CVS Version") && ($info[1] eq "Revision") && ($info[2] =~ /^\s*([\d\.]+)(\s|$)/)) {
+		    $self->{$kCVSVersion} = $1;
+		} elsif (($info[0] eq "GOC Validation Date") && ($info[1] =~ /^\s*([\d\/]+)(\s|$)/)) {
+		    $self->{$kValidationDate} = $1;
+		}
+	    }
+	}
+	next if $in =~ m/^!/; # skip commented lines
+
+	chomp($in);
+
+	next unless $in; # skip an empty line, if there is one
+
+	@line = split("\t", $in, -1);
+
+	next if uc($line[$kNotColumn]) eq 'NOT'; # skip annotations NOT to a GOID
+
+	($databaseId, $goid, $aspect) = @line[$kDatabaseIdColumn, $kGoidColumn, $kAspectColumn];
+	($standardName, $aliases)     = @line[$kStandardNameColumn, $kAliasesColumn];
+
+	if ($databaseId eq ""){
+
+	    print "On line $. there is a missing databaseId, so it will be ignored.\n";
+	    next;
+
+	}
+
+	# EXPERIMENTAL
+	if (($aspectIn) && ($aspect ne $aspectIn)) {
+	    next;
+	}
+
+	# if filtering on evidence code, ignore those that don't match the list - mark
+	# we can use one of two modes, include only given codes or exclude all given codes
+	my $evidenceCode = $line[$kEvidenceColumn];
+	$self->{$kEvidenceCodeCount}->{FOUND}->{$evidenceCode}++;
+	if ($evidenceCodes) {
+	    unless ($evidenceCodes->{$evidenceCode}) {
+		$self->{$kEvidenceCodeCount}->{SKIPPED}->{$evidenceCode}++;
+		next;
+	    }
+	} elsif ($evidenceCodesNot) {
+	    if ($evidenceCodesNot->{$evidenceCode}) {
+		$self->{$kEvidenceCodeCount}->{SKIPPED}->{$evidenceCode}++;
+		next;
+	    }
+	}
+	$self->{$kEvidenceCodeCount}->{USING}->{$evidenceCode}++;
+	$self->{$kEvidenceCodeCount}->{TOTAL_USING}++;
+
+	# record the source of the annotation
+	
+	$self->{$kDatabaseName} = $line[$kDatabaseNameColumn] if (!exists($self->{$kDatabaseName}));
+
+	# now map the standard name and all aliases to the database id
+	    
+#	$self->__mapNamesToDatabaseId($databaseId, $standardName, $aliases);
+	if (!$self->__mapNamesToDatabaseId($databaseId, $standardName, $aliases)) { return 0; };  # mark
+
+	# and store the GOID
+	
+	$self->__storeGOID($databaseId, $goid, $aspect);
+
+    }
+
+    $annotationsFh->close || die "AnnotationParser can't close $file: $!";
+
+    # now count up how many annotated things we have
+
+    foreach my $databaseId (keys %{$self->{$kGoids}}){
+
+	$self->{$kTotalNumAnnotatedGenes}++;
+
+	foreach my $aspect (keys %{$self->{$kGoids}{$databaseId}}){
+
+	    $self->{$kNumAnnotatedGenes}{$aspect}++;
+
+	}
+
+    }
+
+    return 1;   # mark
+}
+
+
+sub version {
+    my $self = shift;
+    return $self->{$kCVSVersion};
+}
+
+
+sub validationDate {
+    my $self = shift;
+    return $self->{$kValidationDate};
+}
+
+
+sub evidenceCodeCounts {
+    my ($self, $code) = @_;
+
+    my ($found, $skipped, $using) = ($self->{$kEvidenceCodeCount}->{FOUND}->{$code}, $self->{$kEvidenceCodeCount}->{SKIPPED}->{$code}, $self->{$kEvidenceCodeCount}->{USING}->{$code});
+    $found = 0 if (!$found);
+    $skipped = 0 if (!$skipped);
+    $using = 0 if (!$using);
+    return ($found, $skipped, $using);
+}
+
+
+sub evidenceCodesUsed {
+    my $self = shift;
+    return $self->{$kEvidenceCodeCount}->{USING};
+}
+sub evidenceCodesFound {
+    my $self = shift;
+    return $self->{$kEvidenceCodeCount}->{FOUND};
+}
+sub evidenceCodesSkipped {
+    my $self = shift;
+    return $self->{$kEvidenceCodeCount}->{SKIPPED};
+}
+
+sub totalEvidenceCodesUsed {
+    my $self = shift;
+    return $self->{$kEvidenceCodeCount}->{TOTAL_USING};
+}
+
+
+############################################################################
+sub __setFile{
+############################################################################
+# This method sets the name of the file used for construction.
+# 
+# Usage:
+#
+#    $self->__setFile($file);
+#
+
+    my ($self, $file) = @_;
+
+    $self->{$kFileName} = $file;
+
+}
+
+############################################################################
+sub __mapNamesToDatabaseId{
+############################################################################
+# This private method maps all names and aliases to the databaseId of
+# an entity.  It also maps the databaseId to itself, to facilitate a
+# single way of mapping any identifier to the database id.
+#
+# This mapping is done so that it can be queried in a case insensitive
+# fashion, and thus allow clients to be able to retrieve annotations
+# without necessarily knowing the correct casing of any particular
+# identifier.
+#
+# We have to keep the following considerations in mind:
+#
+# 1. Any identifier may be non-unique with respect to casing, that is,
+#    it is possible that there is ABC1 and abc1
+#
+# 2. We want to be able to returns names and identifiers in their correct
+#    casing, irrespective of the casing that is provided in the query
+#
+# 3. In the situation when a name that is ambiguous when considered case
+#    insensitively is provided, we should check to see whether that casing
+#    corresponds to a know correct casing, and assume that that is the one 
+#    that they meant.
+#
+# Usage :
+#
+#    $self->__mapNamesToDatabaseId($databaseId, $standardName, $aliases);
+#
+# where $aliases is a pipe-delimited list of aliases
+
+    my ($self, $databaseId, $standardName, $aliases) = @_;
+
+    if (exists $self->{$kIdToStandardName}{$databaseId}){ # we've already seen this databaseId
+
+	if ($self->{$kIdToStandardName}{$databaseId} ne $standardName){
+
+	    # there is a problem in the file - there should only be
+	    # one standard name for a given database id, so we'll die
+	    # here
+
+#	    die "databaseId $databaseId maps to more than one standard name : $self->{$kIdToStandardName}{$databaseId} ; $standardName\n";
+	    # this does sometimes happen, and we would like to handle it - mark
+	    print STDERR "WARNING: databaseId $databaseId maps to more than one standard name : $self->{$kIdToStandardName}{$databaseId} ; $standardName\n";
+	    return 0;
+
+	}else{
+
+	    # we can simply return, as we've already processed
+	    # information for this databaseId
+
+	    return 1;
+
+	}
+
+    }
+
+    # we haven't see this databaseId before, so process the data
+
+    my @aliases = split(/\|/, $aliases);
+
+    my %seen; # sometimes an alias will be the same as the standard name
+
+    foreach my $name ($databaseId, $standardName, @aliases){
+
+        # here, we simply store, in case sensitive fashion, a mapping
+        # of the name to databaseId.  Later, this map will be
+        # modified, so it only contains those names where the case
+        # sensitive version is unique.  We need this map to fulfill
+        # the API requirements that if databaseIdByName() is called
+        # with a name that is ambiguous, but the casing is unique,
+        # then it will correctly determine the casing match
+
+	$self->{$kNameToIdMapSensitive}{$name} = $databaseId;
+
+	my $ucName = uc($name); # cache uppercased version for efficiency
+
+	# occasionally, a standard name is also listed in the aliases,
+	# so we will skip the name if we've already seen it.
+
+	# note that for now, we are doing this case sensitively - it
+	# is possible that a gene is referred to by the same name
+	# twice but with different casing - however, if those are the
+	# only times that those particular versions are seen, then
+	# they will still be treated unambiguously.
+
+	next if exists ($seen{$name});
+
+	# let's keep a count of every time a name with the same casing
+	# is seen, across all genes
+
+	$self->{$kNameToCount}{$name}++;
+
+	# now we have to deal with the name, depending on whether we
+	# newly determine it is ambiguous, whether we already know
+	# that name is ambiguous, or whether (so far) the name appears
+	# to be unique
+
+	# for something to be newly ambiguous, the case insensitive
+	# version of its name must have been seen associated with some
+	# other database id already.
+
+	# if the case insensitive version of the name has already been
+	# seen with the same database id, it is still not ambiguous
+
+	if (exists $self->{$kNameToIdMapInsensitive}{$ucName} && $self->{$kNameToIdMapInsensitive}{$ucName} ne $databaseId){
+
+	    # so record what it maps to
+
+	    # current databaseId
+
+	    push (@{$self->{$kAmbiguousNames}{$ucName}}, $databaseId);
+	    
+	    # and previously seen databaseId
+
+	    push (@{$self->{$kAmbiguousNames}{$ucName}}, $self->{$kNameToIdMapInsensitive}{$ucName});
+
+	    # and now delete the previously seen databaseId from the unambiguous mapping
+
+	    delete $self->{$kNameToIdMapInsensitive}{$ucName};
+
+	}elsif (exists $self->{$kAmbiguousNames}{$ucName}){ # we already know it's ambiguous
+
+	    # so add in this new databaseId
+
+	    push (@{$self->{$kAmbiguousNames}{$ucName}}, $databaseId);
+
+	}else{ # otherwise simply map it unambiguously for now, as we haven't see the name before
+
+	    $self->{$kNameToIdMapInsensitive}{$ucName} = $databaseId;
+
+	}
+
+	$seen{$name} = undef; # remember that we've seen the name for this row
+
+    }
+
+    # now we need to record some useful mappings
+
+    # map databaseId and standardName to each other - these should
+    # always be unique when treated case sensitively
+
+    $self->{$kIdToStandardName}{$databaseId}   = $standardName; # record the standard name for the database id
+    $self->{$kStandardNameToId}{$standardName} = $databaseId;   # also make the reverse look up
+
+    # Now map upper cased versions of the databaseId and name to their original form
+    # These are not guaranteed to be unique, so we use arrays instead
+
+    push (@{$self->{$kUcIdToId}{uc($databaseId)}},             $databaseId);
+    push (@{$self->{$kUcStdNameToStdName}{uc($standardName)}}, $standardName);
+
+    return 1;   # mark
+}
+
+############################################################################
+sub __storeGOID{
+############################################################################
+# This private method stores a GOID for a given databaseId, on a per
+# aspect basis, in a hash.
+#
+# Usage:
+#
+#    $self->__storeGOID($databaseId, $goid, $aspect);
+#
+
+    my ($self, $databaseId, $goid, $aspect) = @_;
+
+    $self->{$kGoids}{$databaseId}{$aspect}{$goid} = undef;
+
+}
+
+=pod
+
+=head1 Public instance methods
+
+=head1 Some methods dealing with ambiguous names
+
+Because there are many names by which an annotated entity may be
+referred to, that are non-unique, there exist a set of methods for
+determining whether a name is ambiguous, and to what database
+identifiers such ambiguous names may refer.
+
+Note, that the AnnotationParser is now case insensitive, but with some
+caveats.  For instance, you can use 'cdc6' to retrieve data for CDC6.
+However, This if gene has been referred to as abc1, and another
+referred to as ABC1, then these are treated as different, and
+unambiguous.  However, the text 'Abc1' would be considered ambiguous,
+because it could refer to either.  On the other hand, if a single gene
+is referred to as XYZ1 and xyz1, and no other genes have that name (in
+any casing), then Xyz1 would still be considered unambiguous.
+
+=cut
+
+##############################################################################
+sub nameIsAmbiguous{
+##############################################################################
+
+=pod
+
+=head2 nameIsAmbiguous
+
+This public method returns a boolean to indicate whether a name is
+ambiguous, i.e. whether the name might map to more than one entity (and
+therefore more than one databaseId).  
+
+NB: API change:
+
+nameIsAmbiguous is now case insensitive - that is, if there is a name
+that is used twice using different casing, that will be treated as
+ambiguous.  Previous versions would have not treated these as
+ambiguous.  In the case that a name is provided in a certain casing,
+which was encountered only once, then it will be treated as
+unambiguous.  This is the price of wanting a case insensitive
+annotation parser...
+
+Usage:
+
+    if ($annotationParser->nameIsAmbiguous($name)){
+
+        do something useful....or not....
+
+    }
+
+=cut
+
+    my ($self, $name) = @_;
+
+    die "You must supply a name to nameIsAmbiguous" if !defined ($name);
+
+    # a name might appear in the hash of ambiguous names - however,
+    # it is possible that the provided name matches the case of one of
+    # the provided versions exactly, and thus may not be ambiguous
+
+    # of course, it is also possible that there were actually more than
+    # one copy of that alias, with exactly the same casing, which would 
+    # be ambiguous
+
+    # thus, we need to find out whether the provided name matches the case
+    # of a something exactly, which refers to only one entity
+
+    # a name being ambiguous boils down to whether it has been seen
+    # more than once in that exact case, or in the case that it has
+    # not been seen at all in that exact case, whether it is ambiguous
+    # in upper case form.
+
+    my $isAmbiguous;
+
+    if (!exists $self->{$kNameToCount}{$name}){
+
+	# we haven't seen this casing at all, so see if it's ambiguous
+	# in the uppercased version
+
+	$isAmbiguous = exists $self->{$kAmbiguousNames}{uc($name)};
+	
+    }elsif ($self->{$kNameToCount}{$name} > 1){
+
+	# we've seen this exact casing more than once, so it has to be
+	# ambiguous
+
+	$isAmbiguous = 1;
+
+#	# but if it is unambiguous as a common name, we will use that
+#	if ($annotation->nameIsStandardName($gene)) {
+#	    if ($annotation->databaseIdByStandardName($gene)) {
+#		$isAmbiguous = 0;
+#	    }
+#	}
+
+    }else{
+
+	# it must only have ever been seen once in this exact casing, 
+	# so it's unambiguous
+
+	$isAmbiguous = 0;
+
+    }
+
+#     if ($isAmbiguous && $self->databaseIdByStandardName($name)) {
+#         # actually ambiguity is resolved since it is unambiguous as a standard name
+# 	$isAmbiguous = 0;
+#     }
+
+    return $isAmbiguous;
+
+}
+
+############################################################################
+sub databaseIdsForAmbiguousName{
+############################################################################
+=pod
+
+=head2 databaseIdsForAmbiguousName
+
+This public method returns an array of database identifiers for an
+ambiguous name.  If the name is not ambiguous, an empty list will be
+returned.
+
+NB: API change:
+
+databaseIdsForAmbiguousName is now case insensitive - that is, if
+there is a name that is used twice using different casing, that will
+be treated as ambiguous.  Previous versions would have not treated
+these as ambiguous.  However, if the name provided is of the exact
+casing as a name that appeared only once with that exact casing, then
+it is treated as unambiguous. This is the price of wanting a case
+insensitive annotation parser...
+
+Usage:
+
+    my @databaseIds = $annotationParser->databaseIdsForAmbiguousName($name);
+
+=cut
+
+    my ($self, $name) = @_;
+
+    die "You must supply a name to databaseIdsForAmbiguousName" if !defined ($name);
+    
+    if ($self->nameIsAmbiguous($name)){
+	
+	return @{$self->{$kAmbiguousNames}{uc($name)}};
+
+    }else{
+
+	return ();
+
+    }
+
+}
+
+############################################################################
+sub ambiguousNames{
+############################################################################
+=pod
+
+=head2 ambiguousNames
+
+This method returns an array of names, which from the annotation file
+have been deemed to be ambiguous.
+
+Note - even though we have made the annotation parser case
+insensitive, if something appeared in the annotations file as BLAH1
+and blah1, we would not deem either of these to be ambiguous.
+However, if it appeared as blah1 twice, referring to two different
+genes, then blah1 would be ambiguous.
+
+Usage:
+
+    my @ambiguousNames = $annotationParser->ambiguousNames();
+
+=cut
+
+    my $self = shift;
+
+    # we can simply generate a list of case-sensitive names that have
+    # appeared more than once - we'll cache them so they don't have to
+    # be recalculated in the event that they're asked for again
+
+    if (!exists ($self->{$kAmbiguousNamesSensitive})){
+
+	my @names;
+
+	foreach my $name (keys %{$self->{$kNameToCount}}){
+
+	    push(@names, $name) if ($self->{$kNameToCount}{$name} > 1);
+
+	}
+
+	$self->{$kAmbiguousNamesSensitive} = \@names;
+
+    }
+
+    return @{$self->{$kAmbiguousNamesSensitive}};
+
+}
+
+=pod
+
+=head1 Methods for retrieving GO annotations for entities
+
+=cut
+
+############################################################################
+sub goIdsByDatabaseId{
+############################################################################
+=pod
+
+=head2 goIdsByDatabaseId
+
+This public method returns a reference to an array of GOIDs that are
+associated with the supplied databaseId for a specific aspect.  If no
+annotations are associated with that databaseId in that aspect, then a
+reference to an empty array will be returned.  If the databaseId is
+not recognized, then undef will be returned. In the case that a
+databaseId is ambiguous (for instance the same databaseId exists but
+with different casings) then if the supplied database id matches the
+exact case of one of those supplied, then that is the one it will be
+treated as.  In the case where the databaseId matches none of the
+possibilities by case, then a fatal error will occur, because the
+provided databaseId was ambiguous.
+
+Usage:
+
+    my $goidsRef = $annotationParser->goIdsByDatabaseId(databaseId => $databaseId,
+							aspect     => <P|F|C>);
+
+=cut
+
+    my ($self, %args) = @_;
+
+    my $aspect     = $args{'aspect'}     || $self->_handleMissingArgument(argument => 'aspect');
+    my $databaseId = $args{'databaseId'} || $self->_handleMissingArgument(argument => 'databaseId');
+
+    my $mappedId; # will store the id as listed in the annotations file
+
+    if (exists $self->{$kUcIdToId}{uc($databaseId)}){ # we recognize it
+
+	if (scalar (@{$self->{$kUcIdToId}{uc($databaseId)}}) == 1){
+
+	    # it's unambiguous
+
+	    $mappedId = $self->{$kUcIdToId}{uc($databaseId)}[0];
+
+	}else{
+
+	    # it may be ambiguous, but we'll check to see if the provided one
+	    # is of exactly the correct case
+
+	    foreach my $id (@{$self->{$kUcIdToId}{uc($databaseId)}}){
+		
+		if ($databaseId eq $id){ # we have a match
+
+		    $mappedId = $id;
+		    last;
+
+		}
+
+	    }
+
+	    if (!defined $mappedId){
+
+		# we got no perfect match, so it's ambiguous, and we die
+
+		die "ERROR: $databaseId is ambiguous as a databaseId, and could be used to refer to one of:\n\n".
+		    join("\n", @{$self->{$kUcIdToId}{uc($databaseId)}});
+
+	    }
+
+	}
+
+    }else{ # we don't recognize it
+	
+	return ; # note return here
+
+    }
+
+    # if we get here, then we have a recognized, and unambiguous database id 
+
+    return  $self->_goIdsByMappedDatabaseId(databaseId => $mappedId,
+					    aspect     => $aspect);
+
+}
+
+############################################################################
+sub _goIdsByMappedDatabaseId{
+############################################################################
+# This protected method returns a reference to an array of GOIDs that
+# are associated with the supplied databaseId for a specific aspect.
+# If no annotations are associated with that databaseId in that
+# aspect, then a reference to an empty array will be returned.  If the
+# databaseId is not recognized, then undef will be returned.  The
+# supplied databaseId must NOT be ambiguous, i.e. it must be a real
+# databaseId known to exist.  If it is possibly ambiguous, use the
+# goIdsByDatabaseId method instead.
+#
+# Usage:
+#
+#    my $goidsRef = $annotationParser->_goIdsByMappedDatabaseId(databaseId => $databaseId,
+#							        aspect     => <P|F|C>);
+
+
+    my ($self, %args) = @_;
+
+    my $aspect     = $args{'aspect'}     || $self->_handleMissingArgument(argument => 'aspect');
+    my $mappedId   = $args{'databaseId'} || $self->_handleMissingArgument(argument => 'databaseId');
+
+    if (exists $self->{$kGoids}{$mappedId}{$aspect}){ # it has annotations
+
+	return [keys %{$self->{$kGoids}{$mappedId}{$aspect}}];
+
+    }else{ # it has no annotations
+	    
+	return []; # reference to empty array
+
+    }
+
+}
+
+############################################################################
+sub goIdsByStandardName{
+############################################################################
+=pod
+
+=head2 goIdsByStandardName
+
+This public method returns a reference to an array of GOIDs that are
+associated with the supplied standardName for a specific aspect.  If
+no annotations are associated with the entity with that standard name
+in that aspect, then a reference to an empty list will be returned.
+If the supplied name is not used as a standard name, then undef will
+be returned.  In the case that the supplied standardName is ambiguous
+(for instance the same standardName exists but with different casings)
+then if the supplied standardName matches the exact case of one of
+those supplied, then that is the one it will be treated as.  In the
+case where the standardName matches none of the possibilities by case,
+then a fatal error will occur, because the provided standardName was
+ambiguous.
+
+Usage:
+
+    my $goidsRef = $annotationParser->goIdsByStandardName(standardName =>$standardName,
+                                                          aspect       =><P|F|C>);
+
+=cut
+
+    my ($self, %args) = @_;
+
+    my $aspect       = $args{'aspect'}       || $self->_handleMissingArgument(argument => 'aspect');
+    my $standardName = $args{'standardName'} || $self->_handleMissingArgument(argument => 'standardName');
+
+    # now we have to determine if the standardName is ambiguous or not
+
+    # first, return if there is no standard name for the supplied string
+
+    return undef if !exists $self->{$kUcStdNameToStdName}{uc($standardName)};
+
+    # now see if we have 1 or more mappings
+
+    my $mappedName;
+
+    if (scalar @{$self->{$kUcStdNameToStdName}{uc($standardName)}} == 1){
+
+	# we have a single mapping
+
+	$mappedName = $self->{$kUcStdNameToStdName}{uc($standardName)}[0];
+
+    }else{
+
+	# there's more than one, so see if the case matched exactly
+
+	foreach my $name (@{$self->{$kUcStdNameToStdName}{uc($standardName)}}){
+
+	    if ($name eq $standardName){
+
+		$mappedName = $name;
+		last;
+
+	    }
+
+	}
+
+	if (!defined $mappedName){
+
+	    # we got no perfect match, so it's ambiguous, and we die
+
+	    die "ERROR: $standardName is ambiguous as a standardName, and could be used to refer to one of:\n\n".
+		    join("\n", @{$self->{$kUcStdNameToStdName}{uc($standardName)}});
+
+	}
+
+    }
+
+    # now we're here, we know we have a mapped standard name, which
+    # must thus map to a databaseId
+
+    my $databaseId = $self->_databaseIdByMappedStandardName($mappedName);
+
+    return $self->_goIdsByMappedDatabaseId(databaseId => $databaseId,
+					   aspect     => $aspect);
+
+}
+
+############################################################################
+sub goIdsByName{
+############################################################################
+=pod
+
+=head2 goIdsByName
+
+This public method returns a reference to an array of GO IDs that are
+associated with the supplied name for a specific aspect.  If there are
+no GO associations for the entity corresponding to the supplied name
+in the provided aspect, then a reference to an empty list will be
+returned.  If the supplied name does not correspond to any entity,
+then undef will be returned.  Because the name can be any of the
+databaseId, the standard name, or any of the aliases, it is possible
+that the name might be ambiguous.  Clients of this object should first
+test whether the name they are using is ambiguous, using the
+nameIsAmbiguous() method, and handle it accordingly.  If an ambiguous
+name is supplied, then it will die.
+
+NB: API change:
+
+goIdsByName is now case insensitive - that is, if there is a name that
+is used twice using different casing, that will be treated as
+ambiguous.  Previous versions would have not treated these as
+ambiguous.  This is the price of wanting a case insensitive annotation
+parser.  In the event that a name is provided that is ambiguous
+because of case, if it matches exactly the case of one of the possible
+matches, it will be treated unambiguously.
+
+Usage:
+
+    my $goidsRef = $annotationParser->goIdsByName(name   => $name,
+						  aspect => <P|F|C>);
+
+=cut
+
+    my ($self, %args) = @_;
+
+    my $aspect = $args{'aspect'} || $self->_handleMissingArgument(argument => 'aspect');
+    my $name   = $args{'name'}   || $self->_handleMissingArgument(argument => 'name');
+
+    die "You have supplied an ambiguous name to goIdsByName" if ($self->nameIsAmbiguous($name));
+
+    # if we get here, the name is not ambiguous, so it's safe to call
+    # databaseIdByName
+
+    my $databaseId = $self->databaseIdByName($name);
+
+    return undef if !defined $databaseId; # there is no such name
+
+    # we should have a databaseId in the correct casing now
+
+    return $self->_goIdsByMappedDatabaseId(databaseId => $databaseId,
+					   aspect     => $aspect);
+
+}
+
+=pod
+
+=head1 Methods for mapping different types of name to each other
+
+=cut
+
+############################################################################
+sub standardNameByDatabaseId{
+############################################################################
+=pod
+
+=head2 standardNameByDatabaseId
+
+This method returns the standard name for a database id.
+
+NB: API change
+
+standardNameByDatabaseId is now case insensitive - that is, if there
+is a databaseId that is used twice (or more) using different casing,
+it will be treated as ambiguous.  Previous versions would have not
+treated these as ambiguous.  This is the price of wanting a case
+insensitive annotation parser.  In the event that a name is provided
+that is ambiguous because of case, if it matches exactly the case of
+one of the possible matches, it will be treated unambiguously.
+
+Usage:
+
+    my $standardName = $annotationParser->standardNameByDatabaseId($databaseId);
+
+=cut
+
+    my ($self, $databaseId) = @_;
+
+    die "You must supply a databaseId to standardNameByDatabaseId" if !defined ($databaseId);
+
+    # first return if there is no databaseId for the supplied string
+
+    return undef if (!exists $self->{$kUcIdToId}{uc($databaseId)});
+
+    # now, check whether it's ambiguous as a databaseId
+
+    my $mappedId;
+
+    if (scalar(@{$self->{$kUcIdToId}{uc($databaseId)}}) == 1){
+
+	# we have a single mapping
+
+	$mappedId = $self->{$kUcIdToId}{uc($databaseId)}[0];
+
+    }else{
+
+	# there's more than one, so see if the provided case matches
+	# exactly one of them
+
+	foreach my $id (@{$self->{$kUcIdToId}{uc($databaseId)}}){
+
+	    if ($databaseId eq $id){
+
+		$mappedId = $id;
+		last;
+
+	    }
+
+	}
+
+	if (!defined $mappedId){
+
+	    # we got no perfect match, so it's ambiguous, and we die
+
+	    die "ERROR: $databaseId is ambiguous as a databaseId, and could be used to refer to one of:\n\n".
+		join("\n", @{$self->{$kUcIdToId}{uc($databaseId)}});
+
+	}
+
+    }
+
+
+    return ($self->{$kIdToStandardName}{$mappedId});
+
+}
+
+############################################################################
+sub databaseIdByStandardName{
+############################################################################
+=pod
+
+=head2 databaseIdByStandardName
+
+This method returns the database id for a standard name.
+
+NB: API change
+
+databaseIdByStandardName is now case insensitive - that is, if there
+is a standard name that is used twice (or more) using different
+casing, it will be treated as ambiguous.  Previous versions would have
+not treated these as ambiguous.  This is the price of wanting a case
+insensitive annotation parser.  In the event that a name is provided
+that is ambiguous because of case, if it matches exactly the case of
+one of the possible matches, it will be treated unambiguously.
+
+Usage:
+
+    my $databaseId = $annotationParser->databaseIdByStandardName($standardName);
+
+=cut
+
+    my ($self, $standardName) = @_;
+
+    die "You must supply a standardName to databaseIdByStandardName" if !defined ($standardName);
+
+    # first return if there is no standard name for the supplied string
+
+    return undef if (!exists $self->{$kUcStdNameToStdName}{uc($standardName)});
+
+    # now see if it's ambiguous or not
+
+    my $mappedStandardName;
+
+    if (scalar(@{$self->{$kUcStdNameToStdName}{uc($standardName)}}) == 1){
+
+	# it's not ambiguous
+
+	$mappedStandardName = $self->{$kUcStdNameToStdName}{uc($standardName)}[0];
+
+    }else{
+
+	# there's more than one, so see if the supplied name matches
+	# the case of one of them exactly
+
+	foreach my $name (@{$self->{$kUcStdNameToStdName}{uc($standardName)}}){
+
+	    if ($standardName eq $name){
+
+                # added a check to detect more than one instance
+                # of this standard name in this exact case -- mark
+
+		if ($mappedStandardName) {
+		    # more than one
+		    $mappedStandardName = undef;
+		    last;
+		}
+
+		$mappedStandardName = $name;
+#		last;
+
+	    }
+
+	}
+
+	if (!defined $mappedStandardName){
+
+#	    die "$standardName is ambiguous as a standard name, and could be used to refer to one of:\n\n".
+#		join("\n", @{$self->{$kUcStdNameToStdName}{uc($standardName)}});
+#	    print STDERR "$standardName is ambiguous as a standard name, and does not exactly match the case of any name. It could be one of:\n",
+#	    join("\n", @{$self->{$kUcStdNameToStdName}{uc($standardName)}}), "\n";
+	    print STDERR "$standardName is ambiguous as a standard name\n";
+	    return undef;
+
+	}
+
+    }
+
+#    return ($self->{$kStandardNameToId}{$standardName});
+#! I think this is correct - otherwise a different casing for a non-ambiguous name will return undef - mark
+#print "XX standardName $standardName, mapped $mappedStandardName, ID from mapped ".$self->{$kStandardNameToId}{$mappedStandardName}."\n" if (!$self->{$kStandardNameToId}{$standardName});
+    return ($self->{$kStandardNameToId}{$mappedStandardName});
+
+}
+
+############################################################################
+sub _databaseIdByMappedStandardName{
+############################################################################
+# This protected method returns the database id for a standard name that is
+# guaranteed to be non-ambiguous, and in the correct casing
+#
+# Usage:
+#
+#    my $databaseId = $annotationParser->_databaseIdByMappedStandardName($standardName);
+#
+
+    my ($self, $standardName) = @_;
+
+    die "You must supply a standardName to _databaseIdByMappedStandardName" if !defined ($standardName);
+
+    return ($self->{$kStandardNameToId}{$standardName});
+
+}
+
+############################################################################
+sub databaseIdByName{
+############################################################################
+=pod
+
+=head2 databaseIdByName
+
+This method returns the database id for any identifier for a gene
+(e.g. by databaseId itself, by standard name, or by alias).  If the
+used name is ambiguous, then the program will die.  Thus clients
+should call the nameIsAmbiguous() method, prior to using this method.
+If the name does not map to any databaseId, then undef will be
+returned.
+
+NB: API change
+
+databaseIdByName is now case insensitive - that is, if there is a name
+that is used twice using different casing, that will be treated as
+ambiguous.  Previous versions would have not treated these as
+ambiguous.  This is the price of wanting a case insensitive annotation
+parser.  In the event that a name is provided that is ambiguous
+because of case, if it matches exactly the case of one of the possible
+matches, it will be treated unambiguously.
+
+Usage:
+
+    my $databaseId = $annotationParser->databaseIdByName($name);
+
+=cut
+
+    my ($self, $name) = @_;
+
+    die "You must supply a name to databaseIdByName" if !defined ($name);
+
+#    die "You have supplied an ambiguous name to databaseIdByName" if ($self->nameIsAmbiguous($name));
+    return undef if ($self->nameIsAmbiguous($name));
+
+    # give them the case insensitive unique map, or if there is none,
+    # then the case sensitive version
+
+    my $databaseId = $self->{$kNameToIdMapInsensitive}{uc($name)} || $self->{$kNameToIdMapSensitive}{$name};
+
+    return $databaseId;
+
+}
+
+############################################################################
+sub standardNameByName{
+############################################################################
+=pod
+
+=head2 standardNameByName
+
+This public method returns the standard name for the the gene
+specified by the given name.  Because a name may be ambiguous, the
+nameIsAmbiguous() method should be called first.  If an ambiguous name
+is supplied, then it will die with an appropriate error message.  If
+the name does not map to a standard name, then undef will be returned.
+
+NB: API change
+
+standardNameByName is now case insensitive - that is, if there is a
+name that is used twice using different casing, that will be treated
+as ambiguous.  Previous versions would have not treated these as
+ambiguous.  This is the price of wanting a case insensitive annotation
+parser.
+
+Usage:
+
+    my $standardName = $annotationParser->standardNameByName($name);
+
+=cut
+
+    my ($self, $name) = @_;
+
+    die "You must supply a name to standardNameByName" if !defined ($name);
+
+    die "You have supplied an ambiguous name to standardNameByName" if ($self->nameIsAmbiguous($name));
+
+    my $databaseId = $self->databaseIdByName($name);
+
+    if (defined $databaseId){
+
+	return $self->{$kIdToStandardName}{$databaseId};
+
+    }else{
+
+	return undef;
+	
+    }
+
+}
+
+=pod
+
+=head1 Other methods relating to names
+
+=cut
+
+############################################################################
+sub nameIsStandardName{
+############################################################################
+=pod
+
+=head2 nameIsStandardName
+
+This method returns a boolean to indicate whether the supplied name is
+used as a standard name.
+
+NB : API change.
+
+This is now case insensitive.  If you provide abC1, and ABc1 is a
+standard name, then it will return true.
+
+Usage :
+
+    if ($annotationParser->nameIsStandardName($name)){
+
+	# do something
+
+    }
+
+=cut
+
+    my ($self, $name) = @_;
+
+    die "You must supply a name to nameIsStandardName" if !defined($name);
+
+    return exists ($self->{$kUcStdNameToStdName}{uc($name)});
+
+}
+
+############################################################################
+sub nameIsDatabaseId{
+############################################################################
+=pod
+
+=head2 nameIsDatabaseId
+
+This method returns a boolean to indicate whether the supplied name is
+used as a database id.
+
+NB : API change.
+
+This is now case insensitive.  If you provide abC1, and ABc1 is a
+database id, then it will return true.
+
+Usage :
+
+    if ($annotationParser->nameIsDatabaseId($name)){
+
+	# do something
+
+    }
+
+=cut
+
+
+    my ($self, $databaseId) = @_;
+
+    die "You must supply a potential databaseId to nameIsDatabaseId" if !defined($databaseId);
+
+    return exists ($self->{$kUcIdToId}{uc($databaseId)});
+
+}
+
+############################################################################
+sub nameIsAnnotated{
+############################################################################
+=pod
+
+=head2 nameIsAnnotated
+
+This method returns a boolean to indicate whether the supplied name has any 
+annotations, either when considered as a databaseId, a standardName, or
+an alias.  If an aspect is also supplied, then it indicates whether that
+name has any annotations in that aspect only.
+
+NB: API change.
+
+This is now case insensitive.  If you provide abC1, and ABc1 has
+annotation, then it will return true.
+
+Usage :
+
+    if ($annotationParser->nameIsAnnotated(name => $name)){
+
+	# blah
+
+    }
+
+or:
+
+    if ($annotationParser->nameIsAnnotated(name   => $name,
+					   aspect => $aspect)){
+
+	# blah
+
+    }
+
+
+=cut
+
+    my ($self, %args) = @_;
+
+    my $name = $args{'name'} || die "You must supply a name to nameIsAnnotated";
+    
+    my $aspect = $args{'aspect'};
+
+    my $isAnnotated = 0;
+
+    my $ucName = uc($name);
+
+    if (!defined ($aspect)){ # if there's no aspect
+
+	$isAnnotated = (exists ($self->{$kNameToIdMapInsensitive}{$ucName}) || exists ($self->{$kAmbiguousNames}{$ucName}));
+	
+    }else{
+
+	if ($self->nameIsDatabaseId($name) && @{$self->goIdsByDatabaseId(databaseId => $name,
+									 aspect     => $aspect)}){
+
+	    $isAnnotated = 1;
+
+	}elsif ($self->nameIsStandardName($name) && @{$self->goIdsByStandardName(standardName => $name,
+										 aspect       => $aspect)}){
+
+	    $isAnnotated = 1;
+
+	}elsif (!$self->nameIsAmbiguous($name)){
+
+	    my $goidsRef = $self->goIdsByName(name   => $name,
+					      aspect => $aspect);
+
+	    if (defined $goidsRef && @{$goidsRef}){
+
+		$isAnnotated = 1;
+
+	    }
+
+	}else { # MUST be an ambiguous name, that's not used as a standard name
+	
+	    foreach my $databaseId ($self->databaseIdsForAmbiguousName($name)){
+
+		if (@{$self->goIdsByDatabaseId(databaseId => $name,
+					       aspect     => $aspect)}){
+
+		    $isAnnotated = 1;
+		    last; # as soon as we know, we can finish
+
+		}
+
+	    }
+
+	}
+
+    }
+
+    return $isAnnotated;
+
+}
+
+=pod
+
+=head1 Other public methods
+
+=cut
+
+############################################################################
+sub databaseName{
+############################################################################
+=pod
+
+=head2 databaseName
+
+This method returns the name of the annotating authority from the file
+that was supplied to the constructor.
+
+Usage :
+
+    my $databaseName = $annotationParser->databaseName();
+
+=cut
+
+    my $self = shift;
+
+    return $self->{$kDatabaseName};
+
+}
+
+############################################################################
+sub numAnnotatedGenes{
+############################################################################
+=pod
+
+=head2 numAnnotatedGenes
+
+This method returns the number of entities in the annotation file that
+have annotations in the supplied aspect.  If no aspect is provided,
+then it will return the number of genes with an annotation in at least
+one aspect of GO.
+
+Usage:
+
+    my $numAnnotatedGenes = $annotationParser->numAnnotatedGenes();
+
+    my $numAnnotatedGenes = $annotationParser->numAnnotatedGenes($aspect);
+
+=cut
+
+    my ($self, $aspect) = @_;
+
+    if (defined ($aspect)){
+
+	return $self->{$kNumAnnotatedGenes}{$aspect};
+
+    }else{
+
+	return $self->{$kTotalNumAnnotatedGenes};
+
+    }
+
+}
+
+############################################################################
+sub allDatabaseIds{
+############################################################################
+=pod
+
+=head2 allDatabaseIds
+
+This public method returns an array of all the database identifiers
+
+Usage:
+
+    my @databaseIds = $annotationParser->allDatabaseIds();
+
+=cut
+
+    my $self = shift;
+
+    return keys (%{$self->{$kIdToStandardName}});
+
+}
+
+############################################################################
+sub allStandardNames{
+############################################################################
+=pod
+
+=head2 allStandardNames
+
+This public method returns an array of all standard names.
+
+Usage:
+
+    my @standardNames = $annotationParser->allStandardNames();
+
+=cut
+
+    my $self = shift;
+
+    return keys(%{$self->{$kStandardNameToId}});
+
+}
+
+=pod
+
+=head1 Methods to do with files
+
+=cut
+
+############################################################################
+sub file{
+############################################################################
+=pod
+
+=head2 file
+
+This method returns the name of the file that was used to instantiate
+the object.
+
+Usage:
+
+    my $file = $annotationParser->file;
+
+=cut
+
+    return $_[0]->{$kFileName};
+
+}
+
+############################################################################
+sub serializeToDisk{
+############################################################################
+=pod
+
+=head2 serializeToDisk
+
+This public method saves the current state of the Annotation Parser
+Object to a file, using the Storable package.  The data are saved in
+network order for portability, just in case.  The name of the object
+file is returned.  By default, the name of the original file will be
+used to make the name of the object file (including the full path from
+where the file came), or the client can instead supply their own
+filename.
+
+Usage:
+
+    my $fileName = $annotationParser->serializeToDisk;
+
+    my $fileName = $annotationParser->serializeToDisk(filename => $filename);
+
+=cut
+
+    my ($self, %args) = @_;
+
+    my $fileName;
+
+    if (exists ($args{'filename'})){ # they supply their own filename
+
+	$fileName = $args{'filename'};
+
+    }else{ # we build a name from the file used to instantiate ourselves
+
+	$fileName = $self->file;
+	
+	if ($fileName !~ /\.obj$/){ # if we weren't instantiated from an object
+	    
+	    $fileName .= ".obj"; # add a .obj suffix to the name
+	    
+	}
+
+    }
+
+    nstore ($self, $fileName) || die "$PACKAGE could not serialize itself to $fileName : $!";
+
+    return ($fileName);
+
+}
+
+1; # to keep perl happy
+
+############################################################################
+#               MORE P O D   D O C U M E N T A T I O N                     #
+############################################################################
+
+=pod
+
+=head1 Modifications
+
+ # This script was modified by mark to filter by evidence code. Updates
+ # to the module were merged.
+
+CVS info is listed here:
+
+ # $Author: mark $
+ # $Date: 2015/02/09 18:36:55 $
+ # $Log: AnnotationParser.pm,v $
+ # Revision 1.13  2015/02/09 18:36:55  mark
+ # databaseIdByName now returns undef for ambiguous names instead of dying
+ #
+ # Revision 1.12  2014/10/15 20:08:18  mark
+ # correct TAIR gene link
+ #
+ # Revision 1.11  2012/11/21 21:31:52  mark
+ # Fixed bug where ambiguity was not detected when standard name of exact same case mapped to more than one database ID
+ #
+ # Revision 1.10  2011/06/03 18:01:40  mark
+ # added validation date parsing and methods
+ #
+ # Revision 1.9  2011/04/20 20:23:18  mark
+ # don't die if ambiguous standard name, return undef instead
+ #
+ # Revision 1.8  2010/03/09 21:29:13  mark
+ # added error detection and reporting to progress
+ #
+ # Revision 1.7  2010/03/08 03:01:59  mark
+ # fix background bug
+ #
+ # Revision 1.6  2009/08/12 05:07:50  mark
+ # more evidence code counting
+ #
+ # Revision 1.5  2009/07/29 05:19:20  mark
+ # fixed bug with uninitialized value when printing evidence code counts, added version method and parsing
+ #
+ # Revision 1.4  2009/07/14 21:08:51  mark
+ # print evidence codes, return evidence code counts
+ #
+ # Revision 1.3  2009/05/18 22:10:17  mark
+ # more evidence code support, added REPLACE_DBID support
+ #
+ # Revision 1.2  2009/05/05 17:53:37  mark
+ # use obo files, remove duplicate IDs
+ #
+ # Revision 1.35  2008/05/13 23:06:16  sherlock
+ # updated to fix bug with querying with a name that was unambiguous when
+ # taking its casing into account.
+ #
+ # Revision 1.34  2007/03/18 03:09:05  sherlock
+ # couple of PerlCritic suggested improvements, and an extra check to
+ # make sure that the cardinality between standard names and database ids
+ # is 1:1
+ #
+ # Revision 1.33  2006/07/28 00:02:14  sherlock
+ # fixed a couple of typos
+ #
+ # Revision 1.32  2004/07/28 17:12:10  sherlock
+ # bumped version
+ #
+ # Revision 1.31  2004/07/28 17:03:49  sherlock
+ # fixed bugs when calling goidsByDatabaseId instead of goIdsByDatabaseId
+ # on lines 1592 and 1617 - thanks to lfriedl@cs.umass.edu for spotting this.
+ #
+ # Revision 1.30  2003/11/26 18:44:28  sherlock
+ # finished making all the changes that were required to make it case
+ # insensitive, and modified POD accordingly.  It appears to all work as
+ # expected...
+ #
+ # Revision 1.29  2003/11/22 00:05:05  sherlock
+ # made a very large number of changes to make much of it
+ # case-insensitive, such that using CDC6 or cdc6 amounts to the same
+ # query, as long as both versions of that name don't exist in the
+ # annotations file.  Still needs a little work to allow names that are
+ # potentially ambiguous to be not ambiguous, if their casing matches
+ # exactly one form of the name that has been seen.  Have started to
+ # update test suite to check all the case insensitive stuff, but is not
+ # yet finished.
+ #
+ #
+
+=head1 AUTHORS
+
+Elizabeth Boyle, ell@mit.edu
+
+Gavin Sherlock,  sherlock@genome.stanford.edu
+
+=cut
diff --git a/www/lib/GO/AnnotationProvider/MetaData.pm b/www/lib/GO/AnnotationProvider/MetaData.pm
new file mode 100644
index 0000000..844e3d1
--- /dev/null
+++ b/www/lib/GO/AnnotationProvider/MetaData.pm
@@ -0,0 +1,1539 @@
+package GO::AnnotationProvider::MetaData;
+
+##############################################################################
+# FILE: MetaData.pm
+# DATE CREATED: August 2004
+# AUTHOR: Linda McMahan <lmcmahan@genomics.princeton.edu>
+#
+##############################################################################
+
+##############################################################################
+# POD (PLAIN OLD DOCUMENTATION)
+##############################################################################
+
+=pod
+
+=head1 NAME
+
+GO::AnnotationProvider::MetaData - Provide attributes and methods to deal with gene annotation files available at GO ftp site.
+
+=head1 SYNOPSIS
+
+=over 4
+
+  use GO::AnnotationProvider::AnnotationParser;
+  use go::AnnotationProvider::MetaData;
+
+
+   my $metaData = GO::AnnotationProvider::MetaData->new();
+
+   # Get this organism's total annotated gene number from its gene association file
+   my $annotionFilePath = '/usr/local/go/lib/GO/gene_association.sgd';
+   my $annotationParser = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$annotionFilePath);
+   my @databaseIds = $annotationParser->allDatabaseIds();
+   my $annotateGeneNum = scalar(@databaseIds);
+
+   my $annotionFile = 'gene_association.sgd';
+
+   # Call these mutator methods to set values of these attributes for this organism
+   $metaData->setANNOTATION_FILE($file, $annotationFile);
+   $metaData->setANNOTATE_GENE_NUM($annotateGeneNum, $annotationFile);
+   $metaData->setESTIMATE_GENE_NUM($annotationFile);
+
+   # Call these accessor methods to return values of these attributes for this organism
+   my ($organismGeneUrl) = $metaData->getGENE_URL($annotationFile);
+   my ($organismCommonName) = $metaData->getCOMMON_NAME($annotationFile);
+   my ($ogranismScientificName) = $metaData->getORGANISM($annotationFile);
+   my ($organismDatabaseName) = $metaData->getORGANISM_DB_NAME($annotationFile);
+   my ($organismDatabaseUrl) = $metaData->getORGANISM_DB_URL($annotationFile);
+
+   print "Organism Gene Url: ", $organismGeneUrl, "\n",
+         "Organism Common Name: ", $organismCommonName, "\n",
+         "Organism Scientific Name: ", $ogranismScientificName, "\n",
+         "Organism Database Name: ", $organismDatabaseName, "\n",
+         "Organism Database URL: ", $organismDatabaseUrl, "\n";
+
+   # Call this accessor method to return a reference of all attributes for this organism
+   my ($organismInfoHashRef) = $metaData->getOrganismInfo($annotationFile);
+   # @organismInfoArray contains references to the info about all the organisms
+   my @organismsInfoArray; 
+   push(@organismsInfoArray, $organismInfoHashRef);
+
+   # Call this method to create an html table of gene association file for these files
+   my $filepath = "/Genomics/curtal/www/go/html/GOTermMapper/MetaData.html";
+   $metaData->createGeneAsscTable(\@organismsInfoArray, $filepath);
+
+   # Call this method to get the values and labels of gene annotation files.  These returned values and 
+   # labels can then be used to create cgi script's pull-down menu of gene annotation files 
+   my ($geneAssValuesArrayRef, $geneAssLabelsHashRef) = $metaData->geneAnnotationFilesMenu;
+
+   # Call this method to get the estimate total gene number for this organism
+   my ($estimateGeneNum) = $metaData->organismEstimateTotalGeneNum($annotionFile);
+
+   # Call this method to get the gene url for this organism   
+   my $opt_u = 'http://genome-www4.stanford.edu/cgi-bin/SGD/locus.pl?locus=';
+   my ($geneUrl) = $metaData->organismGeneUrl($annotationFile, $opt_u);
+
+   # Call this method to check if the gene annotation file exists for this organism
+   my ($organismFileExist) = $metaData->organismFileExist($annotationFile);
+
+=back
+
+=cut
+
+##############################################################################
+
+
+use strict;
+use warnings;
+use vars '$AUTOLOAD'; #keep 'use strict' happy
+use Carp;
+
+use CGI qw/:all :html3/;
+
+use vars qw (@ISA @EXPORT_OK %dir);  #keep 'use strict' happy 
+use Exporter;
+@ISA = ('Exporter');
+@EXPORT_OK = qw(createGeneAsscTable geneAnnotationFilesMenu geneAnnotationSlimFilesMenu organismEstimateTotalGeneNum organismGeneUrl organismFileExist);
+
+
+my $DEBUG = 0; #debugging variable; set to 1 for printed feedback (i.e. to print debug statements)
+
+
+###############################################################################
+## ATTRIBUTES OF ORGANISMS
+###############################################################################
+## we annotate gene product to the organism rather than to the
+## annotation file, on the premise that there can be multiple
+## annotations files per organism (sources, custom, etc,)
+##
+
+my %organismEstimateTotalGeneNum = (
+#                                    'Arabidopsis thaliana' => 52816,
+                                    'Bacillus anthracis' => 5507,
+				    'Bos taurus' => 37225,
+                                    'Caenorhabditis elegans' => 22246,
+#                                    'Candida albicans' => 6840,
+                                    'Coxiella burnetii' => 2095,
+                                    'Danio rerio' => 22409,
+                                    'Dictyostelium discoideum' => 12098,
+                                    'Drosophila melanogaster'  => 16085,
+				    'Escherichia coli' => 7187,
+				    'Gallus gallus' => 30837,
+                                    'Geobacter sulfurreducens PCA' => 3533,
+#                                    'Homo sapiens' => 23531,
+#                                    'Mus musculus' => 33884,
+                                    'Oryza sativa' => 41521,
+				    'Plasmodium falciparum' => 5400,
+                                    'Pseudomonas syringae' => 5763,
+#                                    'Rattus norvegicus' => 23751,
+                                    'Saccharomyces cerevisiae' => 7166,
+                                    'Shewanella oneidensis' => 4843,
+                                    'Trypanosoma brucei' => 708,
+                                    'Vibrio cholerae' => 3885,  
+				    );
+
+
+###############################################################################
+## ATTRIBUTES OF GO GENE ANNOTATION FILES
+###############################################################################
+## associations files have the following STATIC attributes (defined):
+#
+#
+#    GENE_URL - a template URL to link to for more inforamtion on a gene
+#    COMMON_NAME (Note, this should probably be an attribute of the organism; oh well...)
+#    ORGANISM - Genus and species (must match keys of organismEstimateTotalGeneNum, above.)
+#    ORGANISM_DB_NAME  - annotations authority or file source
+#    ORGANISM_DB_URL - URL of authority
+#    DEFAULT_GENE_URL - a sample link
+#    README_URL - README file from authority located at GO site, http://www.geneontology.org/gene-associations/readme/
+#    IDENTIFIER_2 - type of identifier at column 2 of association file (e.g. 'systematic name', 'gene name', 'gene symbol')
+#    IDENTIFIER_2_EXAMPLE - example identifier at column 2 of association file
+#    IDENTIFIER_3 - type of identifier at column 3 of association file
+#    IDENTIFIER_3_EXAMPLE  - example identifier at column 3 of association file
+#    IDENTIFIER_11 - type of identifier at column 11 of association file
+#    IDENTIFIER_11_EXAMPLE - example identifier at column 11 of association file
+#    SAMPLE_GENE_LIST - sample list of identifiers, usable by the tools
+#    CONVERSION_TOOL  - {a reference to an hash of label => url}, suggested to assist in identifier conversion
+#    ANNOTATION_FILE => the full name of the annotation file
+#
+## the following attributes are only set is the gene assocaition files
+## are usable by the web applications, GOTermFinder, GOTermMapper
+#    MENU_LABEL       => the menu label the the GOTermFinder will display
+#    SLIM_MENU_LABEL  => the menu label the the GOTermMapper will display
+#
+## the following attribute is set AFTER the file as been parsed
+#    ANNOTATE_GENE_NUM - the number of annotated gene products in the association file
+#
+## and the following attribute may be set if it is defined in % organismEstimateTotalGeneNum, above
+#    ESTIMATE_GENE_NUM - the estimated number of gene products in the organism, both annotated and unannotated
+#
+
+my $uniprot = {'UniProt Batch Retrieval' => 'http://www.uniprot.org/batch/',
+	       'CNIO Clone/Gene ID converter' => 'http://idconverter.bioinfo.cnio.es/'};
+
+my %_annotationFiles = (
+			GeneDB_Lmajor => {
+			    GENE_URL => undef,
+			    COMMON_NAME => 'Skin parasite',
+			    ORGANISM => 'Leishmania major',
+			    ORGANISM_DB_NAME => 'L. major GeneDB',
+			    ORGANISM_DB_URL => 'http://www.genedb.org/Homepage/Lmajor',
+			    DEFAULT_GENE_URL => '',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/GeneDB_Lmajor.README',
+			    IDENTIFIER_2 => 'Systematic_ID',
+			    IDENTIFIER_3 => 'Systematic_ID',
+			    IDENTIFIER_11 => '',
+			    IDENTIFIER_2_EXAMPLE => 'LM5.39',
+			    IDENTIFIER_3_EXAMPLE => 'L6071.09',
+			    IDENTIFIER_11_EXAMPLE => '',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/geneDB_Lmajor.txt',
+			    ANNOTATION_FILE => 'gene_association.GeneDB_Lmajor',
+			    MENU_LABEL => 'GeneDB_Lmajor',
+			    SLIM_MENU_LABEL => 'GeneDB_Lmajor (Generic GO slim)'
+			    },
+			GeneDB_Pfalciparum => {
+			    GENE_URL => 'http://www.genedb.org/genedb/Search?organism=malaria&name=',
+			    COMMON_NAME => 'Malaria parasite',
+			    ORGANISM => 'Plasmodium falciparum',
+			    ORGANISM_DB_NAME => 'P. falciparum GeneDB',
+			    ORGANISM_DB_URL => 'http://www.genedb.org/Homepage/Pfalciparum',
+			    DEFAULT_GENE_URL => 'http://www.genedb.org/genedb/Search?organism=malaria&name'.'=PFF1145c',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/GeneDB_Pfalciparum.README',
+			    IDENTIFIER_2 => 'Systematic Name',
+			    IDENTIFIER_3 => 'Systematic Name',
+			    IDENTIFIER_11 => '',
+			    IDENTIFIER_2_EXAMPLE => 'PFF1145c',
+			    IDENTIFIER_3_EXAMPLE => 'MAL6P1.191',
+			    IDENTIFIER_11_EXAMPLE => '',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/geneDB_pfalciparum.txt',
+			    ANNOTATION_FILE => 'gene_association.GeneDB_Pfalciparum',
+			    MENU_LABEL  => 'GeneDB_Pfalciparum',
+			    SLIM_MENU_LABEL  => 'GeneDB_Pfalciparum (Generic GO slim)'
+			    },
+			# GeneDB_Spombe => {
+			#     GENE_URL => 'http://www.genedb.org/genedb/Search?organism=pombe&name=',
+			#     COMMON_NAME => 'Yeast',
+			#     ORGANISM => 'Schizosaccharomyces pombe',
+			#     ORGANISM_DB_NAME => 'S. pombe GeneDB',
+			#     ORGANISM_DB_URL => 'http://www.genedb.org/genedb/pombe/',
+			#     DEFAULT_GENE_URL => 'http://www.genedb.org/genedb/Search?organism'.'=pombe&name'.'=clp1',
+			#     README_URL => 'http://www.geneontology.org/gene-associations/readme/GeneDB_Spombe.README',
+			#     IDENTIFIER_2 => 'Systematic Name',
+			#     IDENTIFIER_3 => 'Gene Name',
+			#     IDENTIFIER_11 => 'Gene Synonym',
+			#     IDENTIFIER_2_EXAMPLE => 'SPAC1782.09c',
+			#     IDENTIFIER_3_EXAMPLE => 'clp1',
+			#     IDENTIFIER_11_EXAMPLE => 'flp1',
+			#     SAMPLE_GENE_LIST => '/sampleGeneFiles/geneDB_spombe.txt',
+			#     ANNOTATION_FILE => 'gene_association.GeneDB_Spombe',
+			#     MENU_LABEL  => 'GeneDB_Spombe',
+			#     SLIM_MENU_LABEL  => 'GeneDB_Spombe (S. pombe GO slim) (Process only)'
+			#     },
+			# GeneDB_Spombe_generic => {
+			#     ANNOTATION_FILE => 'gene_association.GeneDB_Spombe_generic',
+			#     SLIM_MENU_LABEL  => 'GeneDB_Spombe (Generic GO slim)'
+			# },
+			pombase => {
+#			    GENE_URL => 'http://www.genedb.org/genedb/Search?organism=pombe&name=',
+			    GENE_URL => 'http://www.pombase.org/search/ensembl/',
+			    COMMON_NAME => 'Yeast',
+			    ORGANISM => 'Schizosaccharomyces pombe',
+#			    ORGANISM_DB_NAME => 'S. pombe GeneDB',
+			    ORGANISM_DB_NAME => 'PomBase',
+#			    ORGANISM_DB_URL => 'http://www.genedb.org/genedb/pombe/',
+			    ORGANISM_DB_URL => 'http://www.pombase.org/',
+			    DEFAULT_GENE_URL => 'http://www.pombase.org/spombe/result/clp1',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/pombase.README',
+			    IDENTIFIER_2 => 'Systematic Name',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Gene Synonym',
+			    IDENTIFIER_2_EXAMPLE => 'SPAC1782.09c',
+			    IDENTIFIER_3_EXAMPLE => 'clp1',
+			    IDENTIFIER_11_EXAMPLE => 'flp1',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/pombase.txt',
+			    ANNOTATION_FILE => 'gene_association.pombase',
+			    MENU_LABEL  => 'PomBase',
+			    SLIM_MENU_LABEL  => 'PomBase (S. pombe GO slim) (Process only)'
+			    },
+			GeneDB_Spombe_generic => {
+			    ANNOTATION_FILE => 'gene_association.pombase_generic',
+			    SLIM_MENU_LABEL  => 'PomBase (Generic GO slim)'
+			},
+			GeneDB_Tbrucei => {
+			    GENE_URL => 'http://www.genedb.org/genedb/Search?organism=tryp&name=',
+			    COMMON_NAME => 'Trypanosome',
+			    ORGANISM => 'Tryanosoma brucei',
+			    ORGANISM_DB_NAME => 'T. brucei GeneDB',
+			    ORGANISM_DB_URL => 'http://www.genedb.org/Homepage/Tbruceibrucei927',
+			    DEFAULT_GENE_URL => 'http://www.genedb.org/genedb/Search?organism'.'=tryp&name'.'=Tb927.2.380',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/GeneDB_Tbrucei.README',
+			    IDENTIFIER_2 => 'Systematic Name',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Gene Synonym',
+			    IDENTIFIER_2_EXAMPLE => 'Tb927.2.380',
+			    IDENTIFIER_3_EXAMPLE => 'RHS2',
+			    IDENTIFIER_11_EXAMPLE => '3B10.145',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/geneDB_Tbrucei.txt',
+			    ANNOTATION_FILE => 'gene_association.GeneDB_Tbrucei',
+			    MENU_LABEL  => 'GeneDB_Tbrucei',
+			    SLIM_MENU_LABEL  => 'GeneDB_Tbrucei (Generic GO slim)'
+			    },
+			# GeneDB_tsetse => {
+			#     GENE_URL => 'http://www.genedb.org/genedb/Search?organism=glossina&name=',
+			#     COMMON_NAME => 'Tsetse fly',
+			#     ORGANISM => 'Glossina morsitans',
+			#     ORGANISM_DB_NAME => 'G. morsitans GeneDB',
+			#     ORGANISM_DB_URL => 'http://www.genedb.org/genedb/glossina/index.jsp',
+			#     DEFAULT_GENE_URL => 'http://www.genedb.org/genedb/Search?organism='.'glossina&name'.'=Gmm-1621',
+			#     README_URL => 'http://www.geneontology.org/gene-associations/readme/GeneDB_Tsetse.README',
+			#     IDENTIFIER_2 => 'Systematic Name',
+			#     IDENTIFIER_3 => '',
+			#     IDENTIFIER_11 => '',
+			#     IDENTIFIER_2_EXAMPLE => 'Gmm-1621',
+			#     IDENTIFIER_3_EXAMPLE => '',
+			#     IDENTIFIER_11_EXAMPLE => '',
+			#     SAMPLE_GENE_LIST => '/sampleGeneFiles/geneDB_tsetse.txt',
+			#     ANNOTATION_FILE => 'gene_association.GeneDB_tsetse',
+			#     MENU_LABEL  => 'GeneDB_tsetse',
+			#     SLIM_MENU_LABEL  => 'GeneDB_tsetse (Generic GO slim)'
+			#     },
+			cgd => {
+			    GENE_URL => 'http://www.candidagenome.org/cgi-bin/locus.pl?locus=',
+			    COMMON_NAME => 'Candida',
+			    ORGANISM => 'Candida albicans',
+			    ORGANISM_DB_NAME => 'CGD',
+			    ORGANISM_DB_URL => 'http://www.candidagenome.org/',
+			    DEFAULT_GENE_URL => 'http://www.candidagenome.org/cgi-bin/locus.pl?locus='.'=act1',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/cgd.README',
+			    IDENTIFIER_2 => 'CGD_ID',
+			    IDENTIFIER_3 => 'Standard Name',
+			    IDENTIFIER_11 => 'Systematic name',
+			    IDENTIFIER_2_EXAMPLE => 'CAL0005516',
+			    IDENTIFIER_3_EXAMPLE => 'AAF1',
+			    IDENTIFIER_11_EXAMPLE => 'orf19.7436',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/cgd.txt',
+			    ANNOTATION_FILE => 'gene_association.cgd',
+			    MENU_LABEL  => 'CGD (Candida - C. albicans)',
+			    SLIM_MENU_LABEL  => 'CGD - Candida (Generic GO slim)'
+			    },
+			dictyBase => {
+			    GENE_URL => 'http://dictybase.org/db/cgi-bin/dictyBase/locus.pl?locus=',
+			    COMMON_NAME => 'Slime mold',
+			    ORGANISM => 'Dictyostelium discoideum',
+			    ORGANISM_DB_NAME => 'DictyBase',
+			    ORGANISM_DB_URL => 'http://dictybase.org/',
+			    DEFAULT_GENE_URL => 'http://dictybase.org/db/cgi-bin/dictyBase/locus.pl?locus'.'=yakA',
+			    README_URL => '',
+			    IDENTIFIER_2 => 'DictyBase_ID',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Alias',
+			    IDENTIFIER_2_EXAMPLE => 'DDB_G0267450',
+			    IDENTIFIER_3_EXAMPLE => 'yakA',
+			    IDENTIFIER_11_EXAMPLE => 'dagB',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/dictyBase.txt',
+			    ANNOTATION_FILE => 'gene_association.dictyBase',
+			    MENU_LABEL  => 'DictyBase (Slime mold - D. discoideum)',
+			    SLIM_MENU_LABEL  => 'DictyBase (Generic GO slim)'
+			    },
+#			ddb => {
+#			    GENE_URL => 'http://dictybase.org/db/cgi-bin/dictyBase/locus.pl?locus=',
+#			    COMMON_NAME => 'Slime mold',
+#			    ORGANISM => 'Dictyostelium discoideum',
+#			    ORGANISM_DB_NAME => 'DictyBase',
+#			    ORGANISM_DB_URL => 'http://dictybase.org/',
+#			    DEFAULT_GENE_URL => 'http://dictybase.org/db/cgi-bin/dictyBase/locus.pl?locus'.'=yakA',
+#			    README_URL => '',
+#			    IDENTIFIER_2 => 'DictyBase_ID',
+#			    IDENTIFIER_3 => 'Gene Name',
+#			    IDENTIFIER_11 => 'Alias',
+#			    IDENTIFIER_2_EXAMPLE => 'DDB0191191',
+#			    IDENTIFIER_3_EXAMPLE => 'yakA',
+#			    IDENTIFIER_11_EXAMPLE => 'dagB',
+#			    SAMPLE_GENE_LIST => '/sampleGeneFiles/ddb.txt',
+#			    ANNOTATION_FILE => 'gene_association.ddb',
+#			    MENU_LABEL  => 'DictyBase (Slime mold - D. discoideum)',
+#			    SLIM_MENU_LABEL  => 'DictyBase (Generic GO slim)'
+#			    },
+			fb => {
+			    GENE_URL => 'http://flybase.bio.indiana.edu/.bin/fbidq.html?',
+			    COMMON_NAME => 'Fruit fly',
+			    ORGANISM => 'Drosophila melanogaster',
+			    ORGANISM_DB_NAME => 'FlyBase',
+			    ORGANISM_DB_URL => 'http://flybase.bio.indiana.edu/',
+			    DEFAULT_GENE_URL => 'http://flybase.bio.indiana.edu/.bin/fbidq.html?FBgn0013749',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/fb.README',
+			    IDENTIFIER_2 => 'FlyBase_ID',
+			    IDENTIFIER_3 => 'Gene Symbol',
+			    IDENTIFIER_11 => 'Gene Synonym',
+			    IDENTIFIER_2_EXAMPLE => 'FBgn0013749',
+			    IDENTIFIER_3_EXAMPLE => 'Arf102F',
+			    IDENTIFIER_11_EXAMPLE => 'ARF2',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/fb.txt',
+			    CONVERSION_TOOL => '',     
+			    ANNOTATION_FILE => 'gene_association.fb',  
+			    MENU_LABEL  => 'FlyBase (Fruit fly - D. melanogaster)',
+			    SLIM_MENU_LABEL  => 'FlyBase (Generic GO slim)'
+			    },
+			goa_arabidopsis => {
+			    ORGANISM => 'Arabidopsis thaliana',
+			    #  subsumed by TAIR file
+			},
+			goa_chicken => {
+			    ORGANISM => 'Gallus gallus',
+			    COMMON_NAME => 'Chicken',
+			    ORGANISM_DB_NAME => 'GOA @EBI',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_chicken.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_chicken',
+			    MENU_LABEL  => 'goa_chicken'
+			    },
+			goa_cow => {
+			    ORGANISM => 'Bos taurus',
+			    COMMON_NAME => 'Cow',
+			    ORGANISM_DB_NAME => 'GOA @EBI',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_cow.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_cow',
+			    MENU_LABEL  => 'goa_cow'
+			    },
+			goa_Ecoli => {
+			    ORGANISM => 'Escherichia coli',
+			    COMMON_NAME => 'Bacterium coli',
+			    ORGANISM_DB_NAME => 'GOA @EBI',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_Ecoli',
+			    MENU_LABEL  => 'goa_ecoli'
+			    },
+			goa_human => {
+			    GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene=',
+			    COMMON_NAME => 'Human',
+			    ORGANISM => 'Homo sapiens',
+			    ORGANISM_DB_NAME => 'GOA @EBI',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    DEFAULT_GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene='.'ENSG00000039600&db'.'=core',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index',
+			    IDENTIFIER_2_EXAMPLE => 'O94993',
+			    IDENTIFIER_3_EXAMPLE => 'SX30_HUMAN',
+			    IDENTIFIER_11_EXAMPLE => 'IPI00016680',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_human.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_human',
+			    MENU_LABEL  => 'goa_human',
+			    SLIM_MENU_LABEL  => 'goa_human (GOA GO slim)'
+			    },
+			goa_human_generic => {ANNOTATION_FILE => 'gene_association.goa_human_generic',
+					      SLIM_MENU_LABEL  => 'goa_human (Generic GO slim)'
+					      },
+ 			goa_human_hgnc => {
+			    # augmented goa_human
+#			    GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene=',
+			    GENE_URL => 'http://www.genenames.org/data/hgnc_data.php?hgnc_id=',
+			    COMMON_NAME => 'Human',
+			    ORGANISM => 'Homo sapiens',
+			    ORGANISM_DB_NAME => 'GOA @EBI + XREFs',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    DEFAULT_GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene='.'ENSG00000039600&db'.'=core',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index with additional crossreferenced gene symbols',
+			    IDENTIFIER_2_EXAMPLE => 'O94993',
+			    IDENTIFIER_3_EXAMPLE => 'SX30_HUMAN',
+			    IDENTIFIER_11_EXAMPLE => 'IPI00016680|SOX30',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_human_hgnc.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_human_hgnc',
+			    MENU_LABEL  => 'goa_human_hgnc',
+			    SLIM_MENU_LABEL  => 'goa_human_hgnc (GOA GO slim)'
+			    },
+			goa_human_hgnc_generic => {ANNOTATION_FILE => 'gene_association.goa_human_hgnc_generic',
+						   SLIM_MENU_LABEL  => 'goa_human_hgnc (Generic GO slim)'
+					      },
+ 			goa_human_ensembl => {
+			    # augmented goa_human
+			    GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene=',
+			    COMMON_NAME => 'Human',
+			    ORGANISM => 'Homo sapiens',
+			    ORGANISM_DB_NAME => 'GOA @EBI + Ensembl',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    DEFAULT_GENE_URL => 'http://www.ensembl.org/Homo_sapiens/geneview?gene='.'ENSG00000039600&db'.'=core',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index with additional crossreferenced gene symbols',
+			    IDENTIFIER_2_EXAMPLE => 'O94993',
+			    IDENTIFIER_3_EXAMPLE => 'SX30_HUMAN',
+			    IDENTIFIER_11_EXAMPLE => 'IPI00016680|SOX30',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_human_ensembl.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_human_ensembl',
+			    MENU_LABEL  => 'goa_human_ensembl',
+			    SLIM_MENU_LABEL  => 'goa_human_ensembl (GOA GO slim)'
+			    },
+			goa_human_ensembl_generic => {ANNOTATION_FILE => 'gene_association.goa_human_ensembl_generic',
+						      SLIM_MENU_LABEL  => 'goa_human_ensembl (Generic GO slim)'
+                            },
+			#     # subsumed by RGD?
+			#     GENE_URL => 'http://www.ensembl.org/Mus_musculus/geneview?gene=',
+			#     COMMON_NAME => 'Mouse',
+			#     ORGANISM => 'Mus musculus',
+			#     ORGANISM_DB_NAME => 'GOA @EBI',
+			#     ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			#     DEFAULT_GENE_URL => 'http://www.ensembl.org/Mus_musculus/geneview?gene='.'ENSMUSG00000055141&db'.'=core',
+			#     README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			#     IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			#     IDENTIFIER_3 => 'UniProt_Id (or Ensembl_ID)',
+			#     IDENTIFIER_11 => 'International Protein Index',
+			#     IDENTIFIER_2_EXAMPLE => 'P07628',
+			#     IDENTIFIER_3_EXAMPLE => 'KLK8_MOUSE',
+			#     IDENTIFIER_11_EXAMPLE => 'IPI00130801',
+			#     SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_mouse.txt',
+			#     CONVERSION_TOOL => $uniprot,
+			#     ANNOTATION_FILE => 'gene_association.goa_mouse',
+			#     },
+#			goa_mouse_generic => {ANNOTATION_FILE => 'gene_association.goa_mouse_generic',
+#					      SLIM_MENU_LABEL  => 'goa_mouse (Generic GO slim)'
+#					      },
+# 			goa_pdb => {
+# # UNUSED???
+# 			    GENE_URL => 'http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId=',
+# 			    COMMON_NAME => '',
+# 			    ORGANISM => 'Protein Data Bank',
+# 			    ORGANISM_DB_NAME => 'PDB',
+# 			    ORGANISM_DB_URL => 'http://www.rcsb.org/pdb/index.html',
+# 			    DEFAULT_GENE_URL => 'http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId'.'=104M',
+# 			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa_pdb.README',
+# 			    IDENTIFIER_2 => 'PDB_ID',
+# 			    IDENTIFIER_3 => '',
+# 			    IDENTIFIER_11 => '',
+# 			    IDENTIFIER_2_EXAMPLE => '104M',
+# 			    IDENTIFIER_3_EXAMPLE => '',
+# 			    IDENTIFIER_11_EXAMPLE => '',
+# 			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_pdb.txt',
+# 			    ANNOTATION_FILE => 'gene_association.goa_pdb',
+# 			    MENU_LABEL  => 'goa_pdb',
+# 			    SLIM_MENU_LABEL  => 'goa_pdb (GOA GO slim)'
+# 			    },
+# 			goa_pdb_generic => {ANNOTATION_FILE => 'gene_association.goa_pdb_generic',
+# 					    SLIM_MENU_LABEL  => 'goa_pdb (Generic GO slim)'
+# 					    },
+			goa_rat => {
+			    # subsumed by RGD?
+			    GENE_URL => 'http://www.ensembl.org/Rattus_norvegicus/geneview?gene=',
+			    COMMON_NAME => 'Rat',
+			    ORGANISM => 'Rattus norvegicus',
+			    ORGANISM_DB_NAME => 'GOA @EBI',
+			    ORGANISM_DB_URL => 'http://www.ebi.ac.uk/GOA/',
+			    DEFAULT_GENE_URL => 'http://www.ensembl.org/Rattus_norvegicus/geneview?gene'.'=ENSRNOG00000017002&db'.'=core',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/goa.README',
+			    IDENTIFIER_2 => 'UniProt_Accession (or Ensembl_ID)',
+			    IDENTIFIER_3 => 'UniProt_ID (or Ensembl_ID)',
+			    IDENTIFIER_11 => 'International Protein Index',
+			    IDENTIFIER_2_EXAMPLE => 'P18090',
+			    IDENTIFIER_3_EXAMPLE => 'B1AR_RAT',
+			    IDENTIFIER_11_EXAMPLE => 'IPI00188184',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/goa_rat.txt',
+			    CONVERSION_TOOL => $uniprot,
+			    ANNOTATION_FILE => 'gene_association.goa_rat',
+			    },
+#			goa_rat_generic => {ANNOTATION_FILE => 'gene_association.goa_rat_generic',
+#					    SLIM_MENU_LABEL  => 'goa_rat (Generic GO slim)'
+#					    },
+			goa_uniprot => {
+			    GENE_URL => undef,
+			    ORGANISM => 'undef',
+			    ANNOTATION_FILE => 'gene_association.goa_uniprot', 
+# UNUSED???
+#			    MENU_LABEL  => 'goa_uniprot',
+#			    SLIM_MENU_LABEL  => 'goa_unipro (Generic GO slim)'
+			    },
+			goa_zebrafish => {
+			    ORGANISM => 'Danio rerio',
+			    #  subsumed by ZFIN file
+			    },
+			gramene_oryza => {
+			    GENE_URL => 'http://www.gramene.org/perl/protein_search?acc=',
+			    COMMON_NAME => 'Rice',
+			    ORGANISM => 'Oryza sativa',
+			    ORGANISM_DB_NAME => 'Gramene',
+			    ORGANISM_DB_URL => 'http://www.gramene.org/',
+			    DEFAULT_GENE_URL => 'http://www.gramene.org/perl/protein_search?acc'.'=Q94HJ1',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/gramene_oryza.README',
+			    IDENTIFIER_2 => 'Swiss-Prot/TrEMBL_ID',
+			    IDENTIFIER_3 => 'Gene Name/Symbol',
+			    IDENTIFIER_11 => '',
+			    IDENTIFIER_2_EXAMPLE => 'Q94HJ1',
+			    IDENTIFIER_3_EXAMPLE => 'OJ991113_30.2',
+			    IDENTIFIER_11_EXAMPLE => '',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/gramene_oryza.txt',
+			    ANNOTATION_FILE => 'gene_association.gramene_oryza',
+			    MENU_LABEL  => 'Gramene (Rice - O. sativa)',
+			    SLIM_MENU_LABEL  => 'Gramene (Generic GO slim)'
+			    },
+			mgi => {
+			    GENE_URL => 'http://www.informatics.jax.org/searches/accession_report.cgi?id='.'<REPLACE_DBID>', 
+			    COMMON_NAME => 'Mouse',
+			    ORGANISM => 'Mus musculus',
+			    ORGANISM_DB_NAME => 'MGI',
+			    ORGANISM_DB_URL => 'http://www.informatics.jax.org/',
+			    DEFAULT_GENE_URL => 'http://www.informatics.jax.org/searches/accession_report.cgi?id'.'=MGI:1889008',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/mgi.README',
+			    IDENTIFIER_2 => 'MGI_ID',
+			    IDENTIFIER_3 => 'Gene Symbol',
+			    IDENTIFIER_11 => 'Gene_Symbol (old)',
+			    IDENTIFIER_2_EXAMPLE => 'MGI:1889008',
+			    IDENTIFIER_3_EXAMPLE => 'Atp2c1',
+			    IDENTIFIER_11_EXAMPLE => '1700121J11Rik',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/mgi.txt',
+			    ANNOTATION_FILE => 'gene_association.mgi',
+			    MENU_LABEL  => 'MGI (Mouse - M. musculus)',
+			    SLIM_MENU_LABEL  => 'MGI (Generic GO slim)'
+			    },
+			pseudocap => {
+			    GENE_URL => 'http://www.pseudomonas.com/AnnotationByPAU.asp?PA=',
+			    COMMON_NAME => 'Pseudomonas',
+			    ORGANISM => 'Pseudomonas aeruginosa PAO1',
+			    ORGANISM_DB_NAME => 'PseudoCAP',
+			    ORGANISM_DB_URL => 'http://v2.pseudomonas.com/',
+			    DEFAULT_GENE_URL => 'http://www.pseudomonas.com/AnnotationByPAU.asp?PA=PA4765',
+			    README_URL => '',
+			    IDENTIFIER_2 => 'PA#',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Alt. Gene Name (opt.)',
+			    IDENTIFIER_2_EXAMPLE => 'PA4765',
+			    IDENTIFIER_3_EXAMPLE => 'omlA',
+			    IDENTIFIER_11_EXAMPLE => 'oprX',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/pseudocap.txt',
+			    ANNOTATION_FILE => 'gene_association.pseudocap',
+			    MENU_LABEL  => 'PseudoCAP (P. aeruginosa)',
+			    SLIM_MENU_LABEL  => 'PseudoCAP (Generic GO slim)'
+			    },
+			rgd => {
+			    GENE_URL => 'http://rgd.mcw.edu/tools/genes/genes_view.cgi?id=',
+			    COMMON_NAME => 'Rat',
+			    ORGANISM => 'Rattus norvegicus',
+			    ORGANISM_DB_NAME => 'RGD',
+			    ORGANISM_DB_URL => 'http://rgd.mcw.edu/',
+			    DEFAULT_GENE_URL => 'http://rgd.mcw.edu/tools/genes/genes_view.cgi?id'.'=RGD:3696',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/rgd.README',
+			    IDENTIFIER_2 => 'RGD_ID (or Ensembl Id, or UniProt accession)',
+			    IDENTIFIER_3 => 'Gene Symbol (or UniProt Entry Name)',
+			    IDENTIFIER_11 => 'if GOA-provided, an International Protein Index identifier',
+			    IDENTIFIER_2_EXAMPLE => 'RGD:3696',
+			    IDENTIFIER_3_EXAMPLE => 'Slc1a1',
+			    IDENTIFIER_11_EXAMPLE => 'EAAC1',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/rgd.txt',
+			    ANNOTATION_FILE => 'gene_association.rgd',
+			    MENU_LABEL  => 'RGD (Rat - R. novegicus)',
+			    SLIM_MENU_LABEL  => 'RGD (Generic GO slim)'
+			    },
+#			rgd_synonyms => {
+#			    # augmented rgd
+#			    GENE_URL => 'http://rgd.mcw.edu/tools/genes/genes_view.cgi?id=',
+#			    COMMON_NAME => 'Rat',
+#			    ORGANISM => 'Rattus norvegicus',
+#			    ORGANISM_DB_NAME => 'RGD + XREFS',
+#			    ORGANISM_DB_URL => 'http://rgd.mcw.edu/',
+#			    DEFAULT_GENE_URL => 'http://rgd.mcw.edu/tools/genes/genes_view.cgi?id'.'=RGD:3696',
+#			    README_URL => 'http://www.geneontology.org/gene-associations/readme/rgd.README',
+#			    IDENTIFIER_2 => 'RGD_ID (or Ensembl Id, or UniProt accession)',
+#			    IDENTIFIER_3 => 'Gene Symbol (or UniProt Entry Name)',
+#			    IDENTIFIER_11 => 'International Protein Index identifier, additional synonyms',
+#			    IDENTIFIER_2_EXAMPLE => 'RGD:3696',
+#			    IDENTIFIER_3_EXAMPLE => 'Slc1a1',
+#			    IDENTIFIER_11_EXAMPLE => 'EAAC1',
+#			    SAMPLE_GENE_LIST => '/sampleGeneFiles/rgd_synonyms.txt',
+#			    ANNOTATION_FILE => 'gene_association.rgd_synonyms',
+#			    MENU_LABEL  => 'RGD with synonyms (Rat - R. novegicus)',
+#			    SLIM_MENU_LABEL  => 'RGD with synonyms (Generic GO slim)'
+#			    },
+			sgd => {
+			    GENE_URL => 'https://www.yeastgenome.org/locus/',
+			    COMMON_NAME => 'Yeast',
+			    ORGANISM => 'Saccharomyces cerevisiae',
+			    ORGANISM_DB_NAME => 'SGD',
+			    ORGANISM_DB_URL => 'https://www.yeastgenome.org/',
+			    DEFAULT_GENE_URL => 'https://www.yeastgenome.org/locus/YPL250C',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/sgd.README',
+			    IDENTIFIER_2 => 'SGD_ID',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Systematic ORF Name',
+			    IDENTIFIER_2_EXAMPLE => 'S0006171',
+			    IDENTIFIER_3_EXAMPLE => 'ICY2',
+			    IDENTIFIER_11_EXAMPLE => 'YPL250C',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/sgd.txt',
+			    ANNOTATION_FILE => 'gene_association.sgd',
+			    MENU_LABEL  => 'SGD (Yeast - S. cerevisiae)',
+			    SLIM_MENU_LABEL  => 'SGD (Yeast - S. cerevisiae GO slim)'
+			    },
+			sgd_generic => {
+			    ANNOTATION_FILE => 'gene_association.sgd_generic',
+			    SLIM_MENU_LABEL  => 'SGD (Generic GO slim)'
+			    },
+			tair  => {
+#			    GENE_URL => 'http://www.arabidopsis.org/servlets/TairObject?type=gene&name=',
+			    GENE_URL => 'http://www.arabidopsis.org/servlets/Search?type=general&search_action=detail&method=1&show_obsolete=F&sub_type=gene&SEARCH_EXACT=4&SEARCH_CONTAINS=1&name=',
+			    COMMON_NAME => 'Common wallcress',
+			    ORGANISM => 'Arabidopsis thaliana',
+			    ORGANISM_DB_NAME => 'TAIR',
+			    ORGANISM_DB_URL => 'http://www.arabidopsis.org/',
+#			    DEFAULT_GENE_URL => 'http://www.arabidopsis.org/servlets/TairObject?type'.'=gene&name'.'=AT3G52270.1',
+			    DEFAULT_GENE_URL => 'http://www.arabidopsis.org/servlets/Search?type=general&search_action=detail&method=1&show_obsolete=F&name=AT1G57980&sub_type=gene&SEARCH_EXACT=4&SEARCH_CONTAINS=1',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/tair.README',
+			    IDENTIFIER_2 => 'TAIR Accession',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Gene Alias',
+			    IDENTIFIER_2_EXAMPLE => 'gene:2100553',
+			    IDENTIFIER_3_EXAMPLE => 'AT3G52270.1',
+			    IDENTIFIER_11_EXAMPLE => 'T25B15.40',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/tair.txt',
+			    ANNOTATION_FILE => 'gene_association.tair',
+			    MENU_LABEL  => 'TAIR (Common wallcress - A. thaliana)',
+			    SLIM_MENU_LABEL  => 'TAIR (Generic GO slim)'
+			    },
+			jcvi_Banthracis => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => '',
+			    ORGANISM => 'Bacillus anthracis',
+			    ORGANISM_DB_NAME => 'JCVI anthracis',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=gba',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus='.'BA0001',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'BA0001',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'dnaA',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_anthracis.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Banthracis',
+			    MENU_LABEL  => 'JCVI_Banthracis (B. anthracis)',
+			    SLIM_MENU_LABEL  => 'JCVI_Banthracis (Generic GO slim)'
+			    },
+			jcvi_Cburnetii => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => '',
+			    ORGANISM => 'Coxiella burnetii',
+			    ORGANISM_DB_NAME => 'JCVI coxiella',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=gcb',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus='.'CBU0001',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'CBU0001',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'dnaA',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_coxiella.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Cburnetii',
+			    MENU_LABEL  => 'JCVI_Cburnetii (C. burnetii)',
+			    SLIM_MENU_LABEL  => 'JCVI_Cburnetii (Generic GO slim)'
+			    },
+			jcvi_Cjejuni => {
+			    ORGANISM => 'Campylobacter jejuni',
+			    COMMON_NAME => '',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Cjejuni',
+			    MENU_LABEL  => 'JCVI_Cjejuni (C. jejuni)',
+			    SLIM_MENU_LABEL  => 'JCVI_Cjejuni (Generic GO slim)'
+			    },
+			jcvi_Dethenogenes => {
+			    ORGANISM => 'Dehalococcoides ethenogenes',
+			    COMMON_NAME => '',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Dethenogenes',
+			    MENU_LABEL  => 'JCVI_Dethenogenes (D. ethenogenes)',
+			    SLIM_MENU_LABEL  => 'JCVI_Dethenogenes (Generic GO slim)'
+			    },
+			jcvi_Gsulfurreducens => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => 'Geobacter',
+			    ORGANISM => 'Geobacter sulfurreducens PCA',
+			    ORGANISM_DB_NAME => 'JCVI Gsulfurreducens',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=ggs',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus'.'=GSU0001',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'GSU_0001',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'dnaN',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_gsulfurreducens.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Gsulfurreducens',
+			    MENU_LABEL  => 'JCVI_Gsulfurreducens (G. sulfurreducens PCA)',
+			    SLIM_MENU_LABEL  => 'JCVI_Gsulfurreducens (Generic GO slim)'
+			    },                      
+			jcvi_Lmonocytogenes => {
+#			    GENE_URL => undef,
+			    ORGANISM => 'Listeria monocytogenes',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Lmonocytogenes', 
+			    MENU_LABEL  => 'JCVI_Lmonocytogenes (L. monocytogenes)',
+			    SLIM_MENU_LABEL  => 'JCVI_Lmonocytogenes (Generic GO slim)' 
+			    },
+			jcvi_Mcapsulatus => {
+			    ORGANISM => 'Methylococcus capsulatus',
+			    COMMON_NAME => '',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Mcapsulatus',
+			    MENU_LABEL  => 'JCVI_Mcapsulatus (M. capsulatus)',
+			    SLIM_MENU_LABEL  => 'JCVI_Mcapsulatus (Generic GO slim)' 
+			    },
+			jcvi_Psyringae => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => '',
+			    ORGANISM => 'Pseudomonas syringae',
+			    ORGANISM_DB_NAME => 'JCVI Psyringae',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=gps',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus'.'=PSPTO5598',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'PSPTO5598',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'atpC',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_psyringae.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Psyringae',
+			    MENU_LABEL  => 'JCVI_Psyringae (P. syringae)',
+			    SLIM_MENU_LABEL  => 'JCVI_Psyringae (Generic GO slim)'
+			    },
+			jcvi_Soneidensis => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => '',
+			    ORGANISM => 'Shewanella oneidensis',
+			    ORGANISM_DB_NAME => 'JCVI shewanella',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=gsp',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus'.'=SO0001',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'SO0001',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'mioC',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_shewannella.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Soneidensis',
+			    MENU_LABEL  => 'JCVI_Soneidensis (S. oneidensis)',
+			    SLIM_MENU_LABEL  => 'JCVI_Soneidensis (Generic GO slim)'
+			    },
+			jcvi_Spomeroyi => {
+			    ORGANISM => 'Silicibacter pomeroyi',
+			    COMMON_NAME => '',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Spomeroyi',
+			    MENU_LABEL  => 'JCVI_Spomeroyi (S. pomeroyi)',
+			    SLIM_MENU_LABEL  => 'JCVI_Spomeroyi (Generic GO slim)'
+			    },
+#			jcvi_Tbrucei_chr2 => {
+##			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/euk_manatee/shared/ORF_infopage.cgi?db=tba1&;orf=',
+#			    COMMON_NAME => '',
+#			    ORGANISM => 'Trypanosoma brucei',
+#			    ORGANISM_DB_NAME => 'JCVI Tbrucei chr2',
+##			    ORGANISM_DB_URL => 'http://www.jcvi.org/tdb/e2k1/tba1/',
+##			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/euk_manatee/shared/ORF_infopage.cgi?db'.'=tba1&;orf'.'=Tb927.2.5850',
+#			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+#			    IDENTIFIER_2 => 'JCVI Locus Name',
+#			    IDENTIFIER_3 => 'JCVI Locus Name',
+#			    IDENTIFIER_11 => '',
+#			    IDENTIFIER_2_EXAMPLE => 'Tb927.2.5850',
+#			    IDENTIFIER_3_EXAMPLE => '1F7.295',
+#			    IDENTIFIER_11_EXAMPLE => '',
+#			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_tbrucei_chr2.txt',
+#			    ANNOTATION_FILE => 'gene_association.jcvi_Tbrucei_chr2',
+#			    MENU_LABEL  => 'JCVI_Tbrucei_chr2 (T. brucei)',
+#			    SLIM_MENU_LABEL  => 'JCVI_Tbrucei_chr2 (Generic GO slim)'
+#			    },
+			jcvi_Vcholerae => {
+#			    GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus=',
+			    COMMON_NAME => 'Cholera spirillum',
+			    ORGANISM => 'Vibrio cholerae',
+			    ORGANISM_DB_NAME => 'JCVI vibrio',
+#			    ORGANISM_DB_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenomePage3.spl?database'.'=gvc',
+#			    DEFAULT_GENE_URL => 'http://www.jcvi.org/tigr-scripts/CMR2/GenePage.spl?locus'.'=VC0112',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+			    IDENTIFIER_2 => 'JCVI Locus Name',
+			    IDENTIFIER_3 => '',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'VC0112',
+			    IDENTIFIER_3_EXAMPLE => '',
+			    IDENTIFIER_11_EXAMPLE => 'cycA',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_vibrio.txt',
+			    ANNOTATION_FILE => 'gene_association.jcvi_Vcholerae',
+			    MENU_LABEL  => 'JCVI_Vcholerae (V. cholerae)',
+			    SLIM_MENU_LABEL  => 'JCVI_Vcholerae (Generic GO slim)'
+			    },
+#			jcvi_gene_index => {
+##			    GENE_URL => undef,
+#			    COMMON_NAME => 'Gene Index',
+#			    ORGANISM => '',
+#			    ORGANISM_DB_NAME => 'JCVI gene index',
+##			    ORGANISM_DB_URL => 'http://www.jcvi.org/tdb/tgi/index.shtml',
+##			    DEFAULT_GENE_URL => '',
+#			    README_URL => 'http://www.geneontology.org/gene-associations/readme/jcvi_prokaryotic.README',
+#			    IDENTIFIER_2 => 'JCVI_TGI_ID',
+#			    IDENTIFIER_3 => '',
+#			    IDENTIFIER_11 => '',
+#			    IDENTIFIER_2_EXAMPLE => 'Arabidopsis_TC171812',
+#			    IDENTIFIER_3_EXAMPLE => '',
+#			    IDENTIFIER_11_EXAMPLE => '',
+#			    SAMPLE_GENE_LIST => '/sampleGeneFiles/jcvi_gene_index.txt',
+#			    ANNOTATION_FILE => 'gene_association.jcvi_gene_index',
+#			    MENU_LABEL  => 'JCVI_gene_index',
+#			    SLIM_MENU_LABEL  => 'JCVI_gene_index (Generic GO slim)'
+# 			    },
+			wb => {
+			    GENE_URL => 'http://www.wormbase.org/db/gene/gene?name=',
+			    COMMON_NAME => 'Worm',
+			    ORGANISM => 'Caenorhabditis elegans',
+			    ORGANISM_DB_NAME => 'WormBase',
+			    ORGANISM_DB_URL => 'http://www.wormbase.org/',
+			    DEFAULT_GENE_URL => 'http://www.wormbase.org/db/gene/gene?name'.'=nhr-10',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/WormBase.README',
+			    IDENTIFIER_2 => 'Protein Name',
+			    IDENTIFIER_3 => 'Gene Name',
+			    IDENTIFIER_11 => 'Gene Symbol',
+			    IDENTIFIER_2_EXAMPLE => 'CE00815<A0>',
+			    IDENTIFIER_3_EXAMPLE => 'nhr-10',
+			    IDENTIFIER_11_EXAMPLE => 'B0280.8',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/wb.txt',
+			    ANNOTATION_FILE => 'gene_association.wb',
+			    MENU_LABEL  => 'WormBase (Worm - C. elegans)',
+			    SLIM_MENU_LABEL  => 'WormBase (Generic GO slim)'
+			    },
+			zfin => {
+			    GENE_URL => 'http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID=',
+			    COMMON_NAME => 'Zebrafish',
+			    ORGANISM => 'Danio rerio',
+			    ORGANISM_DB_NAME => 'ZFIN',
+			    ORGANISM_DB_URL => 'http://zfin.org/cgi-bin/webdriver?MIval'.'=aa-ZDB_home.apg',
+			    DEFAULT_GENE_URL => 'http://zfin.org/cgi-bin/webdriver?MIval'.'=aa-markerview.apg&OID'.'=ZDB-GENE-000125-12',
+			    README_URL => 'http://www.geneontology.org/gene-associations/readme/zfin.README',
+			    IDENTIFIER_2 => 'ZFIN_ID',
+			    IDENTIFIER_3 => 'Gene Symbol',
+			    IDENTIFIER_11 => '',
+			    IDENTIFIER_2_EXAMPLE => 'ZDB-GENE-000125-12',
+			    IDENTIFIER_3_EXAMPLE => 'igfbp2',
+			    IDENTIFIER_11_EXAMPLE => '',
+			    SAMPLE_GENE_LIST => '/sampleGeneFiles/zfin.txt',
+			    ANNOTATION_FILE => 'gene_association.zfin',
+			    MENU_LABEL  => 'ZFIN (Zebrafish - D. rerio)',
+			    SLIM_MENU_LABEL  => 'ZFIN (Generic GO slim)'
+			    }
+			);
+
+
+##############################################################################
+# CLASS PRIVATE METHODS
+#
+##############################################################################
+sub _annotationFileSuffix {
+##############################################################################
+# This class private method return gene annotation file suffix for the supplied 
+# gene annotation file.
+
+  my ($self, $annotationFile) = @_;
+
+  if ($annotationFile) {
+    my @annotationFileSuffix = split (/\./, $annotationFile);
+    my $annotationFileSuffix = $annotationFileSuffix[$#annotationFileSuffix];
+
+    return ($annotationFileSuffix);
+  }
+}
+
+
+##############################################################################
+# CLASS PUBLIC METHODS
+#
+##############################################################################
+sub new {
+##############################################################################
+=pod
+
+=head1 Constructor
+
+=head2 new
+
+This is the constructor for an MetaData object.
+
+Usage:
+
+   my $metaData = GO::AnnotationProvider::MetaData->new();
+
+=cut
+
+   my ($class) = @_;
+   #---Create a new object
+   my $self = bless { }, $class;
+   
+   return $self;                                     
+}
+
+
+=pod
+
+=head1 Public Instance Methods
+
+=cut
+
+##############################################################################
+sub AUTOLOAD {
+##############################################################################
+=pod
+
+=head2 AUTOLOAD
+
+This public AUTOLOAD method is used to get attributes and set attributes for an 
+organism's gene annotation file for the supplied gene annotation file.
+
+Usage:
+
+   my $annotationFile = 'gene_association.sgd';
+  
+   #---Get this organism's annotation file extension (i.e. suffix)
+   my @annotationFileSuffix = split (/\./, $annotationFile);
+   my $annotationFileSuffix = $annotationFileSuffix[$#annotationFileSuffix];
+
+   #MUTATOR METHODS
+   #---Call this MetaData.pm's set method to set the value for ANNOTATION_FILE for this organism
+   $metaData->setANNOTATION_FILE($file, $annotationFile);
+   #---Call this MetaData.pm's set method to set the value for ANNOTATE_GENE_NUM for this organism
+   $metaData->setANNOTATE_GENE_NUM($annotateGeneNum, $annotationFile);
+   #---Call this MetaData.pm's set method to set the value for ESTIMATE_GENE_NUM for this organism
+   $metaData->setESTIMATE_GENE_NUM($annotationFile);
+
+   #ACCESSOR METHODS
+   #---Call this MetaData.pm's get method to get organism gene url
+   my ($organismGeneUrl) = $metaData->getGENE_URL($annotationFile);
+   #---Call this MetaData.pm's get method to get organism common name 
+   my ($organismCommonName) = $metaData->getCOMMON_NAME($annotationFile);
+   #---Call this MetaData.pm's get method to get organism scientific name
+   my ($ogranismScientificName) = $metaData->getORGANISM($annotationFile);
+   #---Call this MetaData.pm's get method to get organism database name
+   my ($organismDatabaseName) = $metaData->getORGANISM_DB_NAME($annotationFile);
+   #---Call this MetaData.pm's get method to get organism database url
+   my ($organismDatabaseUrl) = $metaData->getORGANISM_DB_URL($annotationFile);
+   #---Call this MetaData.pm's get method to get all the attributes about this organism
+   my ($organismGeneAsscFileRef) = $metaData->getOrganismInfo($annotationFile);
+
+=cut
+
+   my ($self, $annotationFile) = @_;
+
+   #---Get the annotation file's file extension (i.e. suffix)
+   my $annotationFileSuffix = $self->_annotationFileSuffix($annotationFile);
+   #$DEBUG && print "annotationFileSuffix: $annotationFileSuffix\n";
+  
+   #$DEBUG && print "AUTOLOAD is set to ", $AUTOLOAD, "\n";
+   my ($getORsetMethod, $getORsetAttribute) = ( $AUTOLOAD =~ /(get|set)(\w+)/);
+   #$DEBUG && print "\$getORsetMethod: ", $getORsetMethod, "  \$getORsetAttribute: ", $getORsetAttribute, "\n"; 
+
+ if ($getORsetMethod) {
+   #---AUTOLOAD accessor methods
+   if ($getORsetMethod eq 'get') {
+   
+      #AUTOLOAD accessor method to get all the class data attributes for this organism (e.g. getOrganismInfo)
+      if ($getORsetAttribute eq 'OrganismInfo') {
+         return ($_annotationFiles{ $annotationFileSuffix }); #This returns reference
+      }#if ($getORsetAttribute eq 'OrganismInfo')
+      
+      #AUTOLOAD accessor methods to get a specific class data attribute for this organism (e.g. getGENE_URL, getCOMMON_NAME, getORGANISM, getORGANISM_DB_NAME, getORGANISM_DB_URL)
+      else {
+         return ( $_annotationFiles{ $annotationFileSuffix }->{ $getORsetAttribute } ); #This returns value
+      }
+   }#end if ($getORsetMethod eq 'get')
+
+
+   #---AUTOLOAD mutator methods
+   elsif ($getORsetMethod eq 'set') {
+
+      #AUTOLOAD mutator method to set value for ESTIMATE_GENE_NUM attribute (e.g. setESTIMATE_GENE_NUM)
+      if ($getORsetAttribute eq 'ESTIMATE_GENE_NUM') {
+         if ($_annotationFiles{ $annotationFileSuffix }->{ORGANISM} ) {
+            $_annotationFiles{ $annotationFileSuffix }->{$getORsetAttribute} = $organismEstimateTotalGeneNum { $_annotationFiles{ $annotationFileSuffix }->{ORGANISM} };
+         }                               
+         else {
+            $_annotationFiles{ $annotationFileSuffix }->{$getORsetAttribute} = '';
+         }
+   
+         $DEBUG && print "$getORsetAttribute: ", $_annotationFiles{ $annotationFileSuffix }->{$getORsetAttribute}, "\n";
+      }#end if ($getORsetAttribute eq 'ESTIMATE_GENE_NUM')
+
+      #AUTOLOAD mutator methods to set values for some of this class's data attributes to newValue (e.g. setANNOTATION_FILE, setANNOTATE_GENE_NUM)
+      else {
+         my ($self, $newValue, $annotationFile) = @_;
+
+         #---Get the annotation file's file extension (i.e. suffix)
+         my $annotationFileSuffix = $self->_annotationFileSuffix($annotationFile);
+         $DEBUG && print "annotationFileSuffix: $annotationFileSuffix\n";
+
+         $_annotationFiles{ $annotationFileSuffix }->{$getORsetAttribute} = $newValue;   
+         $DEBUG && print "$getORsetAttribute: ", $_annotationFiles{ $annotationFileSuffix }->{$getORsetAttribute}, "\n";
+         
+      }#end else
+    
+   }#end elsif ($getORsetMethod eq 'set')
+
+   else { croak "No such method:  ", $AUTOLOAD, "\n"; }
+ }
+}
+
+##############################################################################
+sub createGeneAsscTable {
+##############################################################################
+=pod
+
+=head2 createGeneAsscTable
+
+This public method creates a html table of the gene annotation file(s) available at GO ftp site 
+for the supplied reference to an array of hash reference(s), whose key is the gene annotation 
+file suffix and whose value is the reference to all the attributes of the organims.  The html  
+table is written to the specified filepath (2nd argument).
+
+An example of a html table of the gene annotation files created by this public method can 
+be viewed at http://go.princeton.edu/GOTermFinder/GOTermFinder_help.shtml#asscFile
+
+Usage:
+
+   $metaData->createGeneAsscTable(\@organismsInfoArray, $filepath, $relativeUrl);
+   
+
+Usage Example:
+
+   use GO::AnnotationProvider::AnnotationParser;
+   use go::AnnotationProvider::MetaData;
+ 
+
+   my $metaData = GO::AnnotationProvider::MetaData->new();
+   my $Dir = '/usr/local/go/lib/GO/'; #Directory where the GO and gene annotation files downloaded from GO ftp site locate
+   my $relativeUrl = "/goFiles/"; # a URL relative to the DOCUMENT_ROOT, where can the associations file be found (or linked to).
+
+   opendir (DIR, $Dir) or die("Error opening: $!");
+   my @organismsInfoArray;
+   foreach my $file (readdir(DIR)) {
+      if ($file !~ m/ontology/) {
+         my $annotationFilePath = $Dir.$file; 
+    
+         # Get this organism's total annotated gene number
+         my $annotationParser = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$annotationFilePath);
+         my @databaseIds = $annotationParser->allDatabaseIds();
+         my $annotateGeneNum = scalar(@databaseIds);
+
+         #---Get this organism's annotation file extension (i.e. suffix)
+         my @annotationFileSuffix = split (/\./, $file);
+         my $annotationFileSuffix = $annotationFileSuffix[$#annotationFileSuffix];
+
+         # Call these mutator methods to set values of these attributes for this organism
+         $metaData->setANNOTATION_FILE($file, $annotationFileSuffix);
+         $metaData->setANNOTATE_GENE_NUM($annotateGeneNum, $annotationFileSuffix);
+         $metaData->setESTIMATE_GENE_NUM($annotationFileSuffix);
+
+         # Call this accessor method to return a hash reference of all attributes for this organism
+         my ($organismInfo) = $metaData->getOrganismInfo($annotationFileSuffix);
+         # @organismsInfoArray contains hash references of attributes for all the organisms
+         push(@organismsInfoArray, $organismInfo);  
+      }
+   } 
+
+   # Call this method to create an html table of all the gene association files 
+   my $filepath = "/Genomics/curtal/www/go/html/GOTermMapper/MetaData.html";
+   $metaData->createGeneAsscTable(\@organismsInfoArray, $filepath, $relativeUrl);
+
+   closedir(DIR);
+
+=cut
+
+   my ($self, $organismGeneAsscFilesArrayRef, $filepath, $relativeUrl) = @_;
+
+   #dereference referenced variables
+   my @organismGeneAsscFilesArray = @$organismGeneAsscFilesArrayRef;
+
+   my @rows;
+
+   foreach my $geneAsscFile (@organismGeneAsscFilesArray) {
+      
+      my $commonName = $geneAsscFile->{COMMON_NAME};
+      my $organism = $geneAsscFile->{ORGANISM};
+      my $orgText = $organism;
+      if($commonName){$orgText =  "$commonName - $orgText"; }
+
+      my $organismDBName = $geneAsscFile->{ORGANISM_DB_NAME};
+
+      my $organismDBUrl = $geneAsscFile->{ORGANISM_DB_URL};
+      my $organismDBLink = "";
+      if ($organismDBUrl) {
+	  $organismDBLink = a({-href=>$organismDBUrl}, b($organismDBName));
+      }
+
+      my $defaultGeneUrl = $geneAsscFile->{DEFAULT_GENE_URL};
+      my $defaultGeneUrlLink = "";
+      if ($defaultGeneUrl) {
+	  $defaultGeneUrlLink = a({-href=>$defaultGeneUrl}, "Default gene URL");
+      }
+
+      my $annotationFile = $geneAsscFile->{ANNOTATION_FILE};
+      my $annotationFileLink = "";
+      if ($annotationFile) {
+	  $annotationFileLink = a({-href=>$relativeUrl.$annotationFile}, $annotationFile);
+      }
+
+      my $readMeUrl = $geneAsscFile->{README_URL};
+      my $readMeUrlLink = ""; 
+      if ($readMeUrl){
+	  $readMeUrlLink = a({-href=>$readMeUrl}, "README");
+      }
+
+      my $annotateGeneNum = $geneAsscFile->{ANNOTATE_GENE_NUM};
+      my $estimateGeneNum = $geneAsscFile->{ESTIMATE_GENE_NUM} || '';
+      my $identifier_2 = $geneAsscFile->{IDENTIFIER_2} || '';
+      my $identifier_3 = $geneAsscFile->{IDENTIFIER_3} || '';
+      my $identifier_11 = $geneAsscFile->{IDENTIFIER_11} || '';
+      my $identifier_2_example = $geneAsscFile->{IDENTIFIER_2_EXAMPLE} || '';
+      my $identifier_3_example = $geneAsscFile->{IDENTIFIER_3_EXAMPLE} || '';
+      my $identifier_11_example = $geneAsscFile->{IDENTIFIER_11_EXAMPLE} || '';
+   
+      my $sample_list = $geneAsscFile->{SAMPLE_GENE_LIST};
+      my $sample_list_link = "";
+      if ($sample_list) {
+	  $sample_list_link = a({-href=>$sample_list}, "sample list");
+      }
+
+      my $conversionTool = $geneAsscFile->{CONVERSION_TOOL};
+      my $conversionToolLink = "";
+      if ($conversionTool ) {
+	  # redefine if any were specified
+	  foreach my $label (keys(%{$conversionTool})){
+	      $conversionToolLink .= li(a({-href=>$$conversionTool{$label}}, $label));
+	  }
+	  $conversionToolLink = ul($conversionToolLink);
+      }
+
+      if($annotateGeneNum > 0){
+	  my $row = 
+	       Tr({-bgcolor=>"#DEB887"},
+		  td({-align=>"left", -valign=>"top", -nowrap=>''}, '<font size=-1>', join(br(), $orgText, $organismDBLink, $defaultGeneUrlLink, $annotationFileLink, $readMeUrlLink), '</font>'),
+		  td({-align=>"left", -valign=>"top", -nowrap=>''}, '<font size=-1>', $annotateGeneNum , '</font>'),
+		  td({-align=>"left", -valign=>"top", -nowrap=>''}, '<font size=-1><b>',$estimateGeneNum, '</b></font>'),
+		  td({-align=>"left", -valign=>"top", -nowrap=>''}, '<font size=-1>', join(br(), $identifier_2, $identifier_3, $identifier_11), '</font>'),
+		  td({-align=>"left", -valign=>"top", -nowrap=>''}, '<font size=-1>', join(br(), $identifier_2_example, $identifier_3_example, $identifier_11_example, $sample_list_link), '</font>'),
+		  td({-align=>"left", -valign=>"top"}, '<font size=-2>', $conversionToolLink, '</font>')
+		  );
+
+#      print $row."\n";
+      push(@rows,$row);
+
+      }else{
+	  warn "$annotationFile has zero annotations (probably filtered or eliminated by GO consortium conventions)\n";
+      }
+
+   }#end foreach my $geneAsscFile (@organismGeneAsscFilesArray)
+
+   #---Generate the requested gene association  table file
+   open (HTML, ">$filepath") || die "count not open $filepath : $!";
+      
+   print HTML "<center>\n",
+              "<a name=asscFiles></a>",
+              "<table border=0 cellpadding=3>\n",
+              "<tr>\n",
+              "<th align=center nowrap colspan=\"6\"><font color=\"#993300\" size=\"+1\">Gene Association File Table<font></th>\n",
+              "</tr>\n",
+
+              "<tr bgcolor=#993300>\n",
+                   "<th align=center>\n",
+                        "<a href=\"http://www.geneontology.org/GO.current.annotations.shtml\">\n",
+                        "<font color=#FFFFFF>Organism, Gene Associations, and Authority</font><\a></th>\n",
+                   "<th align=center>\n",
+                        "<a href=\"http://www.geneontology.org/GO.current.annotations.shtml\">\n",
+                        "<font color=#FFFFFF>Total Annotated Gene Products</font></a></th>\n",
+                   "<th align=center><font color=#FFFFFF>Total Estimated Gene Products</font></th>\n",
+                   "<th align=center nowrap>\n",
+#                        "<a href=\"http://www.geneontology.org/GO.annotation.html#file\">\n",
+#                        "<font color=#FFFFFF>Identifiers</font></a></th>\n",
+                        "<font color=#FFFFFF>Identifiers</font></th>\n",
+                   "<th align=center nowrap><font color=#FFFFFF>Example IDs</font></th>\n",
+                   "<th align=center><font color=#FFFFFF>Identifier Conversion Tool(s)</font></th>\n",
+              "</tr>\n";
+   
+   print HTML "@rows";
+         
+   print HTML "\n</table>\n",
+                "</center>\n";
+
+   close(HTML);
+
+}
+
+##############################################################################
+sub geneAnnotationFilesMenu {
+##############################################################################
+=pod
+
+=head2 geneAnnotationFilesMenu
+
+This public method returns a reference to an array of gene annotation
+files and a reference to a hash, whose keys are the gene annotation
+files and whose value are gene annotation file labels.  These returned
+array and hash references can then be used to create GOTermFinder's
+pull-down menu of gene annotation files.
+
+Usage:
+   
+   my ($geneAssValuesArrayRef, $geneAssLabelsHashRef) = $metaData->geneAnnotationFilesMenu;
+
+=cut    
+
+    my ($self) = @_;
+
+    return( $self->_menuValuesAndLables( labelKey => 'MENU_LABEL' ));
+} 
+
+
+##############################################################################
+sub geneAnnotationSlimFilesMenu {
+##############################################################################
+=pod
+
+=head2 geneAnnotationSlimFilesMenu
+
+This public method returns a reference to an array of gene annotation
+files and a reference to a hash, whose keys are the gene annotation
+files and whose value are gene annotation file labels.  These returned
+array and hash references can then be used to create a GOTermMapper's
+pull-down menu of gene annotation files.
+
+Usage:
+   
+   my ($geneAssValuesArrayRef, $geneAssLabelsHashRef) = $metaData->geneAnnotationSlimFilesMenu;
+
+=cut    
+
+    my ($self) = @_;
+
+    return( $self->_menuValuesAndLables(labelKey => 'SLIM_MENU_LABEL') );
+}
+
+
+##############################################################################
+sub _menuValuesAndLables {
+##############################################################################
+# given a key (MENU_LABEL or SLIM_MENU_LABEL), this private method
+# returns a 2 references suitable to construct a CGI popup menu.
+# Returned, in order :
+# 1) reference to a array of menu values
+# 2) reference to a hash of value to label
+#
+
+    my $self = shift;
+    my %args = @_;
+
+    my $labelKey = $args{'labelKey'};
+    my $fileKey = 'ANNOTATION_FILE';
+
+    my $check_dir;
+    if ($args{'checkUsableDir'}) {
+	$check_dir = $args{'checkUsableDir'};
+    }
+
+    my (@values, %labels);
+
+    foreach my $annotationFileSuffix ( keys(%_annotationFiles) ) {
+	next if( $_annotationFiles{ $annotationFileSuffix }->{UNUSABLE} );
+	# if both a value and a label are defined, use them
+	if( $_annotationFiles{ $annotationFileSuffix }->{$fileKey} &&
+	    $_annotationFiles{ $annotationFileSuffix }->{$labelKey} ) {
+
+	    push(@values, $_annotationFiles{ $annotationFileSuffix }->{$fileKey});
+	    $labels{ $_annotationFiles{ $annotationFileSuffix }->{$fileKey} } = $_annotationFiles{ $annotationFileSuffix }->{$labelKey};
+
+	}
+
+    }
+
+    @values = sort(@values);
+
+    return(\@values, \%labels);
+}
+
+
+##############################################################################
+sub checkUnusable {
+##############################################################################
+    my $self = shift;
+    my %args = @_;
+
+    my $check_dir = $args{dir};
+    return undef unless ($check_dir);
+
+    my @unusables;
+
+    my $n = 0;
+    foreach my $annotationFileSuffix ( keys(%_annotationFiles) ) {
+	# check if the file is usable, as indicated by readability for now
+	if( $_annotationFiles{ $annotationFileSuffix }->{ANNOTATION_FILE} ){
+	    if (! -r $check_dir."/".$_annotationFiles{ $annotationFileSuffix }->{ANNOTATION_FILE}) {
+		my %info;
+		$info{ANNOTATION_FILE_SUFFIX} = $annotationFileSuffix;
+		foreach my $key ("ORGANISM", "ORGANISM_DB_NAME", "MENU_LABEL") {
+		    $info{$key} = $_annotationFiles{ $annotationFileSuffix }->{$key};
+		}
+		$_annotationFiles{ $annotationFileSuffix }->{UNUSABLE} = 1;
+		$unusables[$n++] = \%info;
+	    } else {
+		$_annotationFiles{ $annotationFileSuffix }->{UNUSABLE} = 0;
+	    }
+	}
+    }
+
+    return \@unusables;
+}
+
+
+##############################################################################
+sub organismEstimateTotalGeneNum {
+##############################################################################
+=pod
+
+=head2 organismEstimateTotalGeneNum
+
+This public method returns an organism's estimate total gene number for the  
+supplied gene annotation file.
+
+Usage:
+
+   my ($estimateGeneNum) = $metaData->organismEstimateTotalGeneNum($annotionFile);
+
+=cut
+
+   my ($self, $annotationFile) = @_;
+
+   #---Get the annotation file's file extension (i.e. suffix)
+   my $annotationFileSuffix = $self->_annotationFileSuffix($annotationFile);
+   my $organism = $_annotationFiles{ $annotationFileSuffix }->{ORGANISM};
+   $DEBUG && print "annotationFileSuffix: $annotationFileSuffix, organism $organism\n";
+
+   #---Get TOTAL_NUM_GENES for the annotation file, if TOTAL_NUM_GENES is defined
+   my $totalNumGenes;
+   if (($organism) && (defined($organismEstimateTotalGeneNum { $organism }))) {
+      $totalNumGenes = $organismEstimateTotalGeneNum { $organism };
+#   }
+#   else {
+#      $totalNumGenes = '';
+   }
+   
+   return $totalNumGenes;  
+}
+
+
+##############################################################################
+sub organismGeneUrl {
+##############################################################################
+=pod
+
+=head2 organismGeneUrl
+
+This public method returns an organism's gene url for the supplied gene url specified in the 
+$opt_u variable. If no gene url specified in the $opt_u variable, the method returns the 
+organism's default gene url for the supplied gene annotation file.
+
+Usage:
+
+   my $opt_u = 'http://genome-www4.stanford.edu/cgi-bin/SGD/locus.pl?locus=';
+   my ($geneUrl) = $metaData->organismGeneUrl($annotationFile, $opt_u);
+
+=cut
+
+   my ($self, $annotationFile, $opt_u) = @_;
+
+   #---Get the annotation file's file extension (i.e. suffix)
+   my $annotationFileSuffix = $self->_annotationFileSuffix($annotationFile); 
+   $DEBUG && print "annotationFileSuffix: $annotationFileSuffix\n";
+
+   #---Get GENE_URL for the annotation file.
+   #   If user specifies the geneUrl, then use the user specified geneUrl. Otherwise use the 
+   #   geneUrl defined in the %annotationFiles hash
+   my $geneUrl;
+   if ($opt_u) { 
+       $geneUrl = $opt_u;
+   } else {
+      if (defined($_annotationFiles{ $annotationFileSuffix }->{GENE_URL}) ) {
+	  $geneUrl = $_annotationFiles{ $annotationFileSuffix }->{GENE_URL};
+#	  $geneUrl .= "<REPLACE_THIS>" if ($geneUrl !~ /\<REPLACE_/);
+      } else {
+         $geneUrl = '';
+      }
+   }
+   if (($geneUrl) && ($geneUrl !~ /\<REPLACE_/)) {
+       $geneUrl .= "<REPLACE_THIS>";
+   }
+
+   return $geneUrl;
+}
+
+
+##############################################################################
+sub organismFileExist {
+##############################################################################
+=pod
+
+=head2 organismFileExist
+
+This public method returns a boolean to indicate whether the supplied gene annotation file exists.
+
+Usage:
+
+   my ($organismFileExist) = $metaData->organismFileExist($annotationFile);
+
+=cut
+   
+   my ($self, $annotationFile) = @_;
+                          
+   #---Get the annotation file's file extension (i.e. suffix)
+   my $annotationFileSuffix = $self->_annotationFileSuffix($annotationFile);   
+   $DEBUG && print "annotationFileSuffix: $annotationFileSuffix\n";
+
+   #---Check if the annotation file exists
+   if(exists($_annotationFiles{ $annotationFileSuffix }) ) {
+      return(1);
+   }
+   else {
+      return(0);
+   }
+}
+
+1; #make sure the file returns true or require will fail
+
+
+
+##############################################################################
+# Additional POD (Plain Old Documentation) from here on down
+##############################################################################
+
+__END__
+
+=pod
+
+=head1 AUTHOR
+
+Linda McMahan, lmcmahan@genomics.princeton.edu
+
+=cut
diff --git a/www/lib/GO/OntologyProvider/OboParser.pm b/www/lib/GO/OntologyProvider/OboParser.pm
new file mode 100644
index 0000000..8be69e0
--- /dev/null
+++ b/www/lib/GO/OntologyProvider/OboParser.pm
@@ -0,0 +1,1042 @@
+package GO::OntologyProvider::OboParser;
+
+# File       : OboParser.pm
+# Authors    : Elizabeth Boyle; Gavin Sherlock
+# Date Begun : Summer 2001
+# Rewritten  : September 29th 2002
+#
+# Updated to parse the gene ontology info from the obo file.
+# August 2006, Shuai Weng  
+#
+# $Id: OboParser.pm,v 1.6 2011/06/03 18:01:40 mark Exp $
+
+# License information (the MIT license)
+
+# Copyright (c) 2003 Gavin Sherlock; Stanford University
+
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+=pod
+
+=head1 NAME
+
+GO::OntologyProvider::OboParser - Provides API for retrieving data from Gene Ontology obo file.
+
+=head1 SYNOPSIS
+
+    use GO::OntologyProvider::OboParser;
+
+    my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => "gene_ontology.obo",
+                                                        aspect       => [P|F|C]);
+
+    print "The ancestors of GO:0006177 are:\n";
+
+    my $node = $ontology->nodeFromId("GO:0006177");
+
+    foreach my $ancestor ($node->ancestors){
+    
+        print $ancestor->goid, " ", $ancestor->term, "\n";
+    
+    }
+
+    $ontology->printOntology();
+
+
+=head1 DESCRIPTION
+
+GO::OntologyProvider::OboParser implements the interface defined by
+GO::OntologyProvider, and parses the gene ontology obo file (GO) in
+plain text (not XML) format.  These files can be obtained from the
+Gene Ontology Consortium web site, http://www.geneontology.org/.  From
+the information in the file, it creates a directed acyclic graph (DAG)
+structure in memory.  This means that GO terms are arranged into
+tree-like structures where each GO node can have multiple parent nodes
+and multiple child nodes.  The file MUST be named with a .obo suffix.
+
+This data structure can be used in conjunction with files in which
+certain genes are annotated to corresponding GO nodes.
+
+Each GO ID (e.g. "GO:1234567") has associated with it a GO node.  That
+GO node contains the name of the GO term, a list of the nodes directly
+above the node ("parent nodes"), and a list of the nodes directly
+below the current node ("child nodes").  The "ancestor nodes" of a
+certain node are all of the nodes that are in a path from the current
+node to the root of the ontology, with all repetitions removed.
+
+The example format is as follows:
+
+[Term]
+id: GO:0000006
+name: high affinity zinc uptake transporter activity
+namespace: molecular_function
+def: "Catalysis of the reaction: Zn2+(out) = Zn2+(in), probably powered by proton motive force." [TC:2.A.5.1.1]
+xref_analog: TC:2.A.5.1.1
+is_a: GO:0005385 ! zinc ion transporter activity
+
+
+[Term]
+id: GO:0000005
+name: ribosomal chaperone activity
+namespace: molecular_function
+def: "OBSOLETE. Assists in the correct assembly of ribosomes or ribosomal subunits in vivo, but is not a component of the assembled ribosome when performing its normal biological function." [GOC:jl, PMID:12150913]
+comment: This term was made obsolete because it refers to a class of gene products and a biological process rather than a molecular
+function. To update annotations, consider the molecular function term 'unfolded protein binding ; GO:0051082' and the biological process
+term 'ribosome biogenesis and assembly ; GO:0042254' and its children.
+is_obsolete: true
+
+=cut
+
+##################################################################
+##################################################################
+
+use strict;
+use warnings;
+use diagnostics;
+
+use base qw (GO::OntologyProvider);
+use GO::Node;
+use Storable qw (nstore);
+
+use IO::File;
+
+our $VERSION = 0.01;
+our $PACKAGE = "GO::OntologyProvider::OntologyOboParser";
+
+##################################################################
+#
+# CLASS ATTRIBUTES
+#
+##################################################################
+
+# All the following class attributes are constants, that should be
+# initialized here at compile time.
+
+my $DEBUG = 0;
+
+my $kFile           = $PACKAGE.'::__file';
+my $kAspect         = $PACKAGE.'::__aspect';
+my $kRootNode       = $PACKAGE.'::__rootNode';
+my $kNodes          = $PACKAGE.'::__nodes';
+my $kSecondaryIds   = $PACKAGE.'::__secondaryIds';
+my $kParent         = $PACKAGE.'::__parent';
+my $kCVSVersion     = $PACKAGE.'::__cvsVersion';  # mark
+my $kVersionDate    = $PACKAGE.'::__versionDate'; # mark
+
+my %kAspects = (
+		'P' => 'biological_process',
+		'F' => 'molecular_function',
+		'C' => 'cellular_component'
+		);
+
+##################################################################
+
+# The constructor, and associated initialization methods
+
+##################################################################
+sub new {
+##################################################################
+# This is the constructor for an OntologyOboParser object.
+#
+# The constructor expects one of two type of arguments, either an
+# 'ontologyFile' and 'ontology' argument , or an 'objectFile' argument.  
+# When instantiated with an ontologyFile argument, it expects the file
+# to be in obo format. When instantiated with an objectFile argument, 
+# it expects to open a previously created OboParser object that 
+# has been serialized to disk.
+#
+#
+# Usage :
+#
+#    my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile=>$file,
+#                                                        ontology=>[P|F|C]); 
+#
+#    my $ontology = GO::OntologyProvider::OboParser->new(objectFile=>$file);
+#
+
+    my ($class, %args) = @_;
+
+    my $self;
+
+    if (exists($args{'objectFile'})){
+
+	$self = Storable::retrieve($args{'objectFile'})
+
+    }elsif (exists($args{'ontologyFile'})){
+
+	$self = {};
+	
+	bless $self, $class;
+	
+	$self->__setFile($args{'ontologyFile'},
+			 $args{'aspect'});
+
+#	$self->__init($args{'termByGoidHash'}, $args{'nodeByTermHash'});    # hash from goids to terms, terms to nodes - mark
+	$self->__init(%args);    # mark
+
+    }
+   
+    return ($self);
+
+}
+
+############################################################################
+sub __setFile{
+############################################################################
+# This private method simply stores the name of the file used for
+# construction inside the object's hash
+
+    my ($self, $file, $aspect) = @_;
+
+    if (!-e $file){
+
+	die "$file does not exist";
+
+    }elsif (-d $file){
+
+	die "$file is a directory";
+
+    }elsif (!-r $file){
+
+	die "$file is not readable";
+
+    }elsif ($file !~ /\.obo/){
+
+	die "$file must have a .obo suffix";
+
+    }
+
+    if (!defined $aspect) {
+
+	die "You have to pass the GO aspect [".join("\|", sort keys %kAspects) ."] to the ", ref($self);
+
+    }
+    
+    if (!exists $kAspects{$aspect}) {
+
+	die "Unknown aspect name: $aspect. The allowable GO aspects are ". join(", ", sort keys %kAspects)."\n";
+
+    }
+
+    $self->{$kFile} = $file;
+
+    $self->{$kAspect} = $aspect;
+
+}
+
+############################################################################
+sub __file {
+############################################################################
+# This private method returns the name of the file used to construct the object
+
+    return $_[0]->{$kFile};
+
+}
+
+############################################################################
+sub __aspect {
+############################################################################
+# This private method returns the name of the ontology used to construct the object
+
+    return $_[0]->{$kAspect};
+
+}
+
+############################################################################
+sub __init { # okay
+############################################################################
+# This method initializes the ontologyOboParser object, by parsing an ontology
+# file, and storing the structures represented therein, in memory.
+
+    my $self = shift;
+    my %args = @_;
+    my $termByGoidHash = $args{termByGoidHash};   # if requested, maintain hash of goids to terms - mark
+    my $nodeByTermHash = $args{nodeByTermHash};
+
+    my $ontologyFh = IO::File->new($self->__file, q{<} )|| die "$PACKAGE can't open file ". $self->__file ." : $!";
+
+    my $aspect = $kAspects{$self->__aspect};
+
+    my @entryLine;
+
+    my $isValidEntry = 0;
+
+    my $namespace;
+    
+    my $headers = 1;
+
+    while (<$ontologyFh>){
+
+	chomp;
+
+	# first read header to get version and date -- mark
+	if ($headers) {
+	    if (/^\s*date:\s+(\d+)\:(\d+)\:(\d+)\s/o) {
+		my $date = "$2/$1/$3";
+		$self->{$kVersionDate} = $date;
+	    } elsif (/cvs.*\$\s*Revision\:\s*([\d\.]+)(\s|\$)/o) {
+		$self->{$kCVSVersion} = $1;
+	    } elsif (/^\s*$/) {
+		$headers = 0;
+	    }
+	    next;
+	}
+
+	# finish parsing the obo file of we reach the typedef line.
+
+	last if (/^\[Typedef\]/);       
+
+	if ($_ eq '[Term]') {
+
+	    # we reached a new term - so process the previous entry
+
+	    if ($isValidEntry) {
+	   
+		$self->__processNode(\@entryLine, %args);
+
+	    }
+
+	    # reset our variables
+
+	    @entryLine = ();
+
+	    $isValidEntry = 0;
+	    $namespace    = '';
+
+	}elsif ($_ eq "namespace: $aspect"){
+
+	    # term is in the requested namespace
+
+	    $namespace = $aspect;
+
+	    $isValidEntry = 1;
+	
+	}elsif ($_ eq 'is_obsolete: true'){
+
+	    # we don't want obsolete nodes - DO NOT COMMENT THIS OUT -
+	    # infinite recursion will result!
+
+	    # Note, the logic here relies on the is_obsolete line coming after the
+	    # namespace line.
+
+	    $isValidEntry = 0;
+		
+	}else {
+
+	    # build up the information for this node
+
+	    push(@entryLine, $_);
+
+	}
+
+    }
+
+    # process the final node
+
+    if ($namespace eq $aspect && $isValidEntry) {
+    
+	$self->__processNode(\@entryLine, %args);
+
+    }
+
+    $ontologyFh->close || die "Can't close ". $self->__file ." : $!";
+
+    # now populate ancestor paths for each node.
+
+    $self->__populatePaths;
+
+}
+
+############################################################################
+sub __processNode {
+############################################################################
+# This private method processes entry lines identified as a node.  
+# The general idea is that it needs three pieces of
+# information about the line to deal with it:
+#
+# 1. The name of the node.
+# 2. The GOIDs associated with the node.
+# 3. The parent node ids.
+#
+# It creates a node object for the current node and then indicates in that node 
+# the identity of its parent(s).
+
+    my ($self, $entryLineArrayRef, %args) = @_;
+    my $termByGoidHash = $args{termByGoidHash};
+    my $nodeByTermHash = $args{nodeByTermHash};
+
+    my ($nodeName, $goid, $secondaryGoidArrayRef, $parentGoidArrayRef)
+	= $self->__getNodeInfoFromLine($entryLineArrayRef, %args);
+
+    my $node = $self->__createNode($goid, $nodeName);
+
+    # to save memory when storing id -> term hash, we store only references
+    # note - this still duplicates terms since they are stored as node->term as well,
+    # but at least it does not duplicate terms for synonym goids
+    # - mark
+    my $termRef;
+
+    if (scalar (@{$parentGoidArrayRef}) == 0) { # no parent goid
+
+	# The GOA has obsoleted the 'Gene_Ontology' term, but
+	# currently we need it to make the graph work.  Thus, we'll
+	# recreate the root, using it.s original id and name.  This
+	# needs to be fixed in future.
+
+	my $rootGoid = 'GO:0003673';
+	my $rootTerm = 'Gene_Ontology';
+
+	my $rootNode = $self->__createNode($rootGoid, $rootTerm);
+
+	$self->{$kRootNode} = $rootNode;
+
+	@{$parentGoidArrayRef} = ($rootGoid);
+
+	# don't forget to save the root goid map to the root term - mark
+	if (defined($termByGoidHash)) {
+	    $termRef = $termByGoidHash->{$rootGoid} = \$rootTerm;
+#	    print "HASH $rootGoid\n";
+	}
+	if (defined($nodeByTermHash)) {
+	    $nodeByTermHash->{$rootTerm} = $self;
+	}
+
+    } else {
+
+	# save the hash of the primary id to the term - mark
+	if (defined($termByGoidHash)) {
+	    my $term = $node->term;
+	    $termRef = $termByGoidHash->{$node->goid} = \$term;
+#	    print "HASH ".$node->goid."\n";
+	}
+	if (defined($nodeByTermHash)) {
+	    $nodeByTermHash->{$node->term} = $self;
+	}
+
+    }
+
+    ## now hash any secondaries to the primary
+
+    foreach my $secondaryId (@{$secondaryGoidArrayRef}){
+
+	$self->{$kSecondaryIds}{$secondaryId} = $goid;
+
+	# don't forget to add these to the term hash - mark
+	# actually we only want primaries
+	# actually it doesn't seem to make a difference
+#	if (defined($termByGoidHash)) {
+##	    $termByGoidHash->{$goid} = $termRef;
+#	    print "NOHASH $goid\n";
+#	}
+
+    }        
+
+    $self->{$kParent}{$goid} = $parentGoidArrayRef;
+
+}
+
+############################################################################
+sub __getNodeInfoFromLine { # okay
+############################################################################
+
+# This private method takes an array reference to the lines for a
+# given GO term node entry and returns the term name, a reference
+# that points to an array of goids associated with that term name, and
+# a reference that points to an array of direct parent GOIDs.  The
+# primary goid will be the first goid returned in the list.
+#
+# Usage:
+#
+#    my ($termName, $goidArrayRef, $parentGoidArrayRef) 
+#	= $self->__getNodeInfoFromLine($entryLineArrayRef);
+
+    my ($self, $entryLineArrayRef, %args) = @_;
+
+    my ($nodeName, $goid, @secondaryGoid, @parentGoid);
+
+    foreach my $line (@{$entryLineArrayRef}) {
+
+	if ($line =~ /^id: *(GO:0*[0-9]+)$/) {
+
+	    $goid =  $1;
+
+	}elsif ($line =~ /^name: *(.+)$/) {
+
+	    $nodeName = $1;
+
+	}elsif ($line =~ /^alt_id: *(GO:0*[0-9]+)$/) {
+	    
+	    push(@secondaryGoid, $1);
+
+	}elsif ($line =~ /^(is_a:|relationship: part_of) *(GO:0*[0-9]+)/) {	    
+	
+            push(@parentGoid, $2);
+
+	}elsif (($args{linkRegulates}) && ($line =~ /^relationship: ((positively_|negatively_)?regulates) *(GO:0*[0-9]+)/)) {
+	    # optionally follow regulation links - mark
+	
+            push(@parentGoid, $3);
+
+	}
+    }
+
+    # check that we can actually get some goids.  Added this in to
+    # deal with when a broken file that appeared on the GO site, it
+    # caused me to get email saying my code was broken...
+
+    if (!$goid){
+
+	die "ERROR: There appears to be a problem with the ontology file.\n".
+	    "No GOIDs could be extracted from '$nodeName'.n\n";
+	    
+    }
+
+    # remove \'s from nodeName
+
+    $nodeName =~ s/\\//g;
+
+    return ($nodeName, $goid, \@secondaryGoid, \@parentGoid);
+
+
+}
+
+###############################################################################
+sub __createNode {
+###############################################################################
+    
+    my ($self, $goid, $nodeName) = @_;
+
+    my $node;
+
+    if ($self->__nodeIsAlreadyCreated($goid)){
+
+	$node = $self->nodeFromId($goid);
+
+    } else { # node has not already been created 
+
+	# create node
+
+	$node = GO::Node->new(goid => $goid,
+			      term => $nodeName);
+
+	# store it
+	
+	$self->{$kNodes}{$goid} = $node;
+
+    }
+
+    return $node;
+
+}
+
+###############################################################################
+sub __populatePaths {
+###############################################################################
+# in this method, we populate all the paths to the root for each node
+# in the ontology.  To do this, we have to call the recursive method,
+# __findAncestor(), which will build up each path from a node to the
+# root, and when it reaches the end of the path (the root itself),
+# will add that path via the Node method addPathToRoot.
+
+# POSSIBLE ALTERNATIVE APPROACH
+#
+# Profiling of the OboParser reveals that when building the ontology,
+# ~77% of the time is spent in the recursive __findAncestor().  Thus,
+# if a way could be found to decrease the number of recursive calls to
+# that method, it might significantly positively impact the runtime
+# performance.
+#
+# A possible alternative approach to the current method, might be to
+# simply populate paths for every leaf node (we would need to know who
+# they are), and as their paths are populated, also populate the paths
+# for their ancestors as well, as the paths from the ancestors are
+# subparts of the paths from leaves to the root.  However, care would
+# have to be taken to not add the same path twice, as there would be
+# issues with when a leaf has two or more paths to a particular node,
+# whose paths are then being added.  Note also, if you encounter a
+# node for whom you have already added paths, you don't need to add
+# them again, so this might significantly save the number of recursive
+# calls required.
+
+    my $self = shift;
+
+    # First, remove all links to missing nodes.
+    # If the parent of a node does not exist, it is probably because it is in a different namespace
+    # (or there is an error in the file). When we detect this, we must delete the link to that parent. -- mark
+
+    foreach my $childGoid (keys %{$self->{$kParent}}) {
+	for (my $p = 0; $p <= $#{$self->{$kParent}{$childGoid}}; $p++) {
+	    my $parentGoid = $self->{$kParent}{$childGoid}->[$p];
+	    my $parentNode = $self->{$kNodes}{$parentGoid};
+	    if (!$parentNode) {
+		print "REMOVING NON-EXISTENT PARENT $parentGoid FROM $childGoid\n";
+		splice(@{$self->{$kParent}{$childGoid}}, $p, 1);
+		$p--;
+	    }
+	}
+    }
+
+    # go through each GO node in the $kParent hash, the keys of which
+    # are the goids that are parents of a given node.
+
+    foreach my $childGoid ( keys %{$self->{$kParent}} ) {
+
+	# note, we directly access the kNodes hash here, rather than
+	# use nodeFromId().  This is for performance reasons only -
+	# accessing the kNodes hash directly in this method, and the
+	# __findAncestor method shaces about 40% of the runtime off of
+	# the time taken to populate all the paths.
+
+#	print $childGoid.":";
+
+	my $childNode =  $self->{$kNodes}{$childGoid};
+
+	# now go through each of this child's parents
+
+	foreach my $parentGoid (@{$self->{$kParent}{$childGoid}}) {
+
+	    ### Note, there has been a case in the obo file where
+	    ### there was an error, and a node was listed as having
+	    ### parent in a different aspect.  This results in a fatal
+	    ### run time error, as when the parser reads the file, it
+	    ### only keeps nodes of a given aspect, and is thus left
+	    ### with a dangling reference.  In this case, parentNode
+	    ### will be undef, and the call to addParentNodes ends up
+	    ### in a run time error.  We can add some logic here to
+	    ### give a better error message.
+
+	    #! This was lifted from GO-TermFinder-0.83. This will happen
+	    #! since they are now officially adding references between GO IDs
+	    #! in different aspects (at least in slims, probably others.
+	    #! We will need to find a way to deal with it. - mark
+
+	    my $parentNode = $self->{$kNodes}{$parentGoid};
+#
+#	    || do { 
+#
+#		print "There is an error in the obo file, where the relationship between ",
+#		$childNode->goid,
+#		" and one or more of its parents is not correctly defined.\n",
+#		"Please check the obo file.\n",
+#		"The program is unable to continue.\n\n";
+#		
+#		exit;
+#
+#	    };
+
+	    # not needed -- mark
+#	    #! instead, lets just ignore it
+#	    if (!$parentNode) {
+#		print "NO NODE $parentNode\n";
+#		next;
+#	    }
+
+	    ### create connections between child node and its parent
+
+	    $childNode->addParentNodes($parentNode);
+  
+	    $parentNode->addChildNodes($childNode);
+
+	    # begin to build the ancestor path, starting with this
+	    # parent
+
+	    my @path = ($parentNode);
+
+	    if (exists $self->{$kParent}{$parentGoid}){
+
+		# if this parent has parents, then we continue to
+		# build the path upwards to the root.  We pass in the
+		# child node, so that each path which reaches the root
+		# can be added during the recursive calls to find
+		# ancestor
+
+		$self->__findAncestors($childNode,
+				       $parentGoid,
+				       \@path); 
+
+	    }else{
+
+		# otherwise, the path only contains the root, and we add it.
+
+		$childNode->addPathToRoot(@path);
+
+	    }
+
+	}
+
+#	print "\n";
+
+    }
+
+}
+
+#######################################################################  
+sub __findAncestors { 
+#######################################################################  
+# Usage:
+#
+#    $self->__findAncestor($childNode,
+#                          $parentGoid, 
+#                          $pathArrayRef);
+#
+# This method looks through each goid in hash %{$self->{$kParent}} to
+# find all ancestors and push everything to @{$pathArrayRef}..And if
+# there is no ancestor found for the $parentGoid, it just add the path
+# to the child node.
+
+    my ($self, $childNode, $parentGoid, $pathArrayRef) = @_;
+
+    # go through each immediate parent of the passed in parent 
+
+#    print " ".$parentGoid;
+
+    # If the parent of the passed in parent does not exist, it is probably because it is in a different namespace
+    # (or there is an error in the file). When we detect this, we must delete the link to that parent. -- mark
+
+    foreach my $ancestorGoid (@{$self->{$kParent}{$parentGoid}}) {
+
+# for testing - no longer needed -- mark
+#	if ($ancestorGoid eq "GO:0050789") {
+#	    print "EXISTS $ancestorGoid\n";
+#	}
+
+# for testing - no longer needed -- mark
+#	if (!exists $self->{$kNodes}{$ancestorGoid}) {
+#	    print "NOT EXISTS $ancestorGoid\n";
+#	}
+
+	# add the ancestor node to our path to the root which is being
+	# built
+
+	push (@{$pathArrayRef}, $self->{$kNodes}{$ancestorGoid});
+
+	if (exists $self->{$kParent}{$ancestorGoid}){
+
+	    # if this ancestor has parents, continue building the
+	    # paths to the root recursively up the DAG
+
+	    $self->__findAncestors($childNode,
+				   $ancestorGoid, 
+				   $pathArrayRef);
+
+	}else {
+
+
+	    $childNode->addPathToRoot(reverse @{$pathArrayRef});
+
+	}
+
+	# because there are multiple paths to the root for most nodes,
+	# we have now remove the current ancestor from this time
+	# through the loop so that the path is reset to the original
+	# condition that it was in when passed in to this method
+
+	pop @{$pathArrayRef};
+
+    }
+
+}
+
+############################################################################
+sub __nodeIsAlreadyCreated { # okay
+############################################################################
+# This private method returns a boolean to indicate whether a node has
+# already been created for a given GO ID.
+
+
+    return (exists($_[0]->{$kNodes}{$_[1]}));
+
+}
+
+############################################################################
+sub printOntology{
+############################################################################
+# This prints out the ontology, with redundancies.
+
+    my $self = shift;
+
+    $self->__printNode($self->rootNode, 0);
+
+}
+
+############################################################################
+sub __printNode{
+############################################################################
+# This recursive function prints the name of the specified node and the 
+# names of all of its descendants.
+#
+
+    my ($self, $node, $indentationLevel) = @_;
+
+    print " " x $indentationLevel, $node->term, " ; ", $node->goid, "\n";
+
+    foreach my $childNode (sort {$a->term cmp $b->term} $node->childNodes) {
+
+	$self->__printNode($childNode, $indentationLevel+1);
+
+    }
+
+}
+
+############################################################################
+sub allNodes{
+############################################################################
+# This method returns an array of all the nodes that have been created.
+#
+# Usage:
+#
+#    my @nodes = $ontologyParser->allNodes;
+
+    return (values %{$_[0]->{$kNodes}});
+
+}   
+
+############################################################################
+sub rootNode{
+############################################################################
+# This returns the root node in the ontology.
+#
+# Usage:
+#
+#    my $rootNode = $ontologyParser->rootNode;
+
+    return ($_[0]->{$kRootNode});
+
+}
+
+############################################################################
+sub nodeFromId{
+############################################################################
+# This public method takes a GOID and returns the GO::Node that
+# it corresponds to.  It should also work with secondary id's
+#
+# Usage :
+#
+#    my $node = $ontologyParser->nodeFromId($goid);
+
+    my ($self, $goid) = @_;
+
+    if (exists ($self->{$kNodes}{$goid})){ # it's a primary
+
+	return ($self->{$kNodes}{$goid});
+	
+    }elsif (exists ($self->{$kSecondaryIds}{$goid})){ # it's a secondary
+
+	return $self->{$kNodes}{$self->{$kSecondaryIds}{$goid}};
+
+    }else{
+
+	return undef;
+
+    }
+
+}
+
+############################################################################
+sub numNodes{
+############################################################################
+# This public method returns the number of nodes that exist with the
+# ontology
+#
+# Usage :
+#
+#    my $numNodes = $ontologyParser->numNodes;
+
+    return scalar (keys %{$_[0]->{$kNodes}});
+
+}
+
+############################################################################
+sub serializeToDisk {
+############################################################################
+# Saves the current state of the Ontology Parser Object to a file,
+# using the Storable package.  Saves in network order for portability,
+# just in case.  Returns the name of the file.  If no filename is
+# provided, then the name of the file (and it's directory, if one was
+# provided) used for object construction, will be used, with .obj
+# appended.  If the object was instantiated from a file with a .obj
+# suffix, then the same filename would be used, if none were provided.
+#
+# This method currently causes a segfault on MacOSX (at least 10.1.5
+# -> 10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store
+# the process ontology.  This failure occurs using either store, or
+# nstore, and is manifested by a segmentation fault.  It has not been
+# investigated whether this is a perl problem, or a Storable problem
+# (which has large amounts of C-code).  This does not cause a
+# segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13.  This
+# doesn't make it clear whether it's a MacOSX problem or a perl
+# problem or not.  It should be noted that newer versions of both perl
+# and Storable exist, and the code should be tested with those as
+# well.
+#
+# Usage:
+#
+#    my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename);
+
+    my ($self, %args) = @_;
+
+    my $fileName;
+
+    if (exists ($args{'filename'})){ # they supply their own filename
+
+	$fileName = $args{'filename'};
+
+    }else{ # we build a name from the file used to instantiate ourselves
+
+	$fileName = $self->__file;
+	
+	if ($fileName !~ /\.obj$/){ # if we weren't instantiated from an object
+	    
+	    $fileName .= ".obj"; # add a .obj suffix to the name
+	    
+	}
+
+    }
+
+    nstore ($self, $fileName) || die "$PACKAGE could not serialize itself to $fileName : $!";
+
+    return ($fileName);
+
+}
+
+
+sub version {
+    my $self = shift;
+    return $self->{$kCVSVersion};
+}
+
+
+sub versionDate {
+    my $self = shift;
+    return $self->{$kVersionDate};
+}
+
+
+1; # to keep perl happy
+
+
+# P O D   D O C U M E N T A T I O N #
+
+=pod
+
+=head1 Instance Constructor
+
+=head2 new
+
+This is the constructor for an OboParser object.  The constructor
+expects one of two arguments, either an 'ontologyFile' argument, or an
+'objectFile' argument.  When instantiated with an ontologyFile
+argument, it expects it to correspond to an obo file created by the GO
+consortium, according to their file format, and in addition, also
+requires an 'aspect' argument.  When instantiated with an objectFile
+argument, it expects to open a previously created ontologyParser
+object that has been serialized to disk (see serializeToDisk).
+
+Usage:
+
+    my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => $ontologyFile,
+                                                        aspect       => $aspect);
+
+    my $ontology = GO::OntologyProvider::OboParser->new(objectFile   => $objectFile);
+
+=head1 Instance Methods
+
+=head2 printOntology
+
+This prints out the ontology, with redundancies, to STDOUT.  It does
+not yet print out all of the ontology information (like relationship
+type etc).  This method will be likely be removed in a future version,
+so should not be relied upon.
+
+Usage:
+
+    $ontologyParser->printOntology;
+
+=head2 allNodes
+
+This method returns an array of all the GO:Nodes that have been
+created.
+
+Usage:
+
+    my @nodes = $ontologyParser->allNodes;
+
+=head2 rootNode
+
+This returns the root node in the ontology.
+
+    my $rootNode = $ontologyParser->rootNode;
+
+=head2 nodeFromId
+
+This public method takes a GOID and returns the GO::Node that
+it corresponds to.
+
+Usage :
+
+    my $node = $ontologyParser->nodeFromId($goid);
+
+If the GOID does not correspond to a GO node, then undef will be
+returned.  Note if you try to call any methods on an undef, you will
+get a fatal runtime error, so if you can't guarantee all GOIDs that
+you supply are good, you should check that the return value from this
+method is defined.
+
+=head2 numNodes
+
+This public method returns the number of nodes that exist with the
+ontology
+
+Usage :
+
+    my $numNodes = $ontologyParser->numNodes;
+
+=head2 serializeToDisk
+
+Saves the current state of the Ontology Parser Object to a file, using
+the Storable package.  Saves in network order for portability, just in
+case.  Returns the name of the file.  If no filename is provided, then
+the name of the file (and its directory, if one was provided) used for
+object construction, will be used, with .obj appended.  If the object
+was instantiated from a file with a .obj suffix, then the same
+filename would be used, if none were provided.
+
+This method currently causes a segfault on MacOSX (at least 10.1.5 ->
+10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store the
+process ontology.  This failure occurs using either store, or nstore,
+and is manifested by a segmentation fault.  It has not been
+investigated whether this is a perl problem, or a Storable problem
+(which has large amounts of C-code).  This does not cause a
+segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13.  This
+does not make it clear whether it is a MacOSX problem or a perl
+problem or not.  It should be noted that newer versions of both perl
+and Storable exist, and the code should be tested with those as well.
+
+Usage: 
+
+    my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename);
+
+=head1 Authors
+
+    Gavin Sherlock; sherlock@genome.stanford.edu
+    Elizabeth Boyle; ell@mit.edu
+    Shuai Weng; shuai@genome.stanford.edu
+
+=cut
diff --git a/www/lib/GO/README b/www/lib/GO/README
new file mode 100644
index 0000000..7f57d04
--- /dev/null
+++ b/www/lib/GO/README
@@ -0,0 +1 @@
+Some of these files are from the GO::TermFinder perl module, modified to support features of these tools. They must override those provided by GO::TermFinder.
diff --git a/www/lib/GO/TermFinder.pm b/www/lib/GO/TermFinder.pm
new file mode 100644
index 0000000..8cc75c8
--- /dev/null
+++ b/www/lib/GO/TermFinder.pm
@@ -0,0 +1,2134 @@
+package GO::TermFinder;
+
+# File        : TermFinder.pm
+# Author      : Gavin Sherlock
+# Date Begun  : December 31st 2002
+
+# $Id: TermFinder.pm,v 1.6 2015/01/29 00:22:38 mark Exp $
+
+# License information (the MIT license)
+
+# Copyright (c) 2003-2006 Gavin Sherlock; Stanford University
+
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+=pod
+
+=head1 NAME
+
+GO::TermFinder - identify GO nodes that annotate a group of genes with a significant p-value
+
+=head1 DESCRIPTION
+
+This package is intended to provide a method whereby the P-values of a
+set of GO annotations can be determined for a set of genes, based on
+the number of genes that exist in the particular genome (or in a
+selected background distribution from the genome), and their
+annotation, and the frequency with which the GO nodes are annotated
+across the provided set of genes.  The P-value is simply calculated
+using the hypergeometric distribution as the probability of x or more
+out of n genes having a given annotation, given that G of N have that
+annotation in the genome in general.  We chose the hypergeometric
+distribution (sampling without replacement) since it is more accurate,
+though slower to calculate, than the binomial distribution (sampling
+with replacement).
+
+In addition, a corrected p-value can be calculated, to correct for
+multiple hypothesis testing.  The correction factor used is the total
+number of nodes to which the provided list of genes are annotated,
+excepting any nodes which have only a single annotation in the
+background, as a priori, we know that these cannot be significantly
+enriched.  The client has access to both the corrected and uncorrected
+values.  It is also possible to correct the p-value using 1000
+simulations, which control the Family Wise Error Rate - using this
+option suggests that the Bonferroni correction is in fact somewhat
+liberal, rather than conservative, as might be expected.  Finally, the
+False Discovery Rate can also be calculated.
+
+The general idea is that a list of genes may have been identified for
+some reason, e.g. they are co-regulated, and TermFinder can be used to
+find out if any nodes annotate the set of genes to a level which is
+extremely improbable if the genes had simply been picked at random.
+
+=head1 TODO
+
+1.  May want the client to decide the behavior for ambiguous names,
+    rather than having it hard coded (e.g. always ignore; use if
+    standard name (current implementation); use all databaseIds for
+    the ambiguous name; decide on a case by case basis (potentially
+    useful if running on command line)).
+
+2.  Create new GO::Hypothesis and GO::HypothesisSet objects, so that
+    it is easier to access the information generated about the p-value
+    etc. of any particular GO node that annotates a set of genes.
+
+3.  Instead of all the global variables, $k..., replace them with
+    constants, which may improve runtime, as the optimizer should
+    optimize the hash look ups to look like hard-coded strings at
+    runtime, rather than variable lookups.
+
+4.  Lots of other stuff....
+
+=cut
+
+use strict;
+use warnings;
+use diagnostics;
+
+use vars qw ($PACKAGE $VERSION $WARNINGS);
+
+use GO::Node;
+use GO::TermFinder::Native;
+
+$VERSION = '0.83';
+$PACKAGE = 'GO::TermFinder';
+
+$WARNINGS = 1; # toggle this to zero if you don't want warnings
+
+# class variables
+
+my @kRequiredArgs = qw (annotationProvider ontologyProvider aspect);
+
+my $kArgs                     = $PACKAGE.'::__args';
+my $kPopulationNamesHash      = $PACKAGE.'::__populationNamesHash';
+my $kBackgroundDatabaseIds    = $PACKAGE.'::__backgroundDatabaseIds';
+my $kTotalGoNodeCounts        = $PACKAGE.'::__totalGoNodeCounts';
+my $kGoCounts                 = $PACKAGE.'::__goCounts';
+my $kGOIDsForDatabaseIds      = $PACKAGE.'::__goidsForDatabaseIds';
+my $kDatabaseIds              = $PACKAGE.'::__databaseIds';
+my $kTotalNumAnnotatedGenes   = $PACKAGE.'::__totalNumAnnotatedGenes';
+my $kCorrectionMethod         = $PACKAGE.'::__correctionMethod';
+my $kShouldCalculateFDR       = $PACKAGE.'::__shouldCalculateFDR';
+my $kPvalues                  = $PACKAGE.'::__pValues';
+my $kDatabaseId2OrigName      = $PACKAGE.'::__databaseId2OrigName';
+my $kDistributions            = $PACKAGE.'::__distributions';
+my $kDiscardedGenes           = $PACKAGE.'::__discardedGenes';
+my $kDirectAnnotationToAspect = $PACKAGE.'::__directAnnotationToAspect';
+
+# the methods by which the p-value can be corrected
+
+my %kAllowedCorrectionMethods = ('bonferroni' => undef,
+				 'none'       => undef,
+				 'simulation' => undef);
+
+# set up a GO node that corresponds to anything passed in that has no
+# annotation
+
+my $kUnannotatedNode = GO::Node->new(goid => "GO:XXXXXXX",
+				     term => "unannotated");
+
+my $kFakeIdPrefix    = "NO_DETERMINED_DATABASE_ID_";
+
+
+#####################################################################
+sub new{
+#####################################################################
+=pod
+
+=head1 Instance Constructor
+
+=head2 new
+
+This is the constructor.  It expects to be passed named arguments for
+an annotationProvider, and an ontologyProvider.  In addition, it must
+be told the aspect of the ontology provider, so that it knows how to
+query the annotationProvider.
+
+There are also some additional, optional arguments:
+
+population:
+
+This argument allows a client to indicate the population that should
+used to calculate a background distribution of GO terms.  In the
+absence of population argument, then the background distribution will
+be drawn from all genes in the annotationProvider.  This should be
+provided as an array reference, and no ambiguous names should be
+provided (see AnnotationProvider for details of name ambiguity).  This
+option is particularly pertinent in a case where for example you
+assayed only 2000 genes in a two hybrid experiment, and found 20
+interesting ones.  To find significant terms, you need to do it in the
+context of the genes that you assayed, not in the context of all genes
+with annotation.
+
+Note, new in version 0.71, if you provided a population as the
+background distribution from which genes have been drawn, any genes
+provided to the findTerms method that are not in the background
+distribution will be discarded from the calculations.  The identity of
+these genes can be retrieved using the discardedGenes() method, after
+the findTerms() method has been called.
+
+totalNumGenes:
+
+This argument allows a client to indicate that the size of the
+background distribution is in fact larger that the number of genes
+that exist in the annotation provider, and the extra genes are merely
+assumed to be entirely unannotated.
+
+NB: This is an API change, as totalNumGenes was previously required.
+
+Thus - if using 'population', the total number of genes considered as
+the background will be the number of genes in the provided population.
+If not using 'population', then the number of genes that will be
+considered as the total population will be the number of genes in the
+annotationProvider.  However, if the totalNumGenes argument is
+provided, then that number will be used as the size of the population.
+If it is not larger than the total number of genes in the
+annotationParser, then the number of genes in the annotationParser
+will be used.  The totalNumGenes and the population arguments are
+mutually exclusive, and both should not be provided at the same time.
+
+Usage ($num is larger than the number of genes with annotations):
+
+   my $termFinder = GO::TermFinder->new(annotationProvider=> $annotationProvider,
+                                        ontologyProvider  => $ontologyProvider,
+                                        totalNumGenes     => $num,
+                                        aspect            => <P|C|F>);
+
+
+Usage (use all annotated genes as population):
+
+   my $termFinder = GO::TermFinder->new(annotationProvider=> $annotationProvider,
+                                        ontologyProvider  => $ontologyProvider,
+                                        aspect            => <P|C|F>);
+
+Usage (use a subset of genes as the background population):
+
+   my $termFinder = GO::TermFinder->new(annotationProvider=> $annotationProvider,
+                                        ontologyProvider  => $ontologyProvider,
+                                        population        => \@genes,
+                                        aspect            => <P|C|F>);
+
+=cut
+
+    my ($class, %args) = @_;
+
+    my $self = {};
+
+    bless $self, $class;
+
+    $self->__checkAndStoreArgs(%args);
+
+    $self->__init; # initialize counts for all GO nodes
+
+    return $self;
+
+}
+
+#####################################################################
+sub __checkAndStoreArgs{
+#####################################################################
+# This private method simply checks that all the required arguments
+# have been provided, and stores them within the object
+
+    my ($self, %args) = @_;
+
+    # first check that the required arguments were provided
+
+    foreach my $arg (@kRequiredArgs){
+
+	if (!exists ($args{$arg})){
+
+	    die "You did not provide a $arg argument.";
+
+	}elsif (!defined ($args{$arg})){
+
+	    die "Your $arg argument is not defined";
+
+	}
+
+	$self->{$kArgs}{$arg} = $args{$arg}; # store in object
+
+    }
+
+    # store the population, and also create a hash of the population
+    # names for quick look up
+
+    if (exists($args{'population'})){
+
+	$self->{$kArgs}{'population'} = $args{'population'};
+
+	my %population;
+
+	foreach my $name (@{$args{'population'}}){
+
+	    $population{$name} = undef;
+
+	}
+
+	$self->{$kPopulationNamesHash} = \%population;
+
+    }
+
+    if (exists($args{'totalNumGenes'})){
+
+	$self->{$kArgs}{'totalNumGenes'} = $args{'totalNumGenes'};
+
+    }
+
+    # now check that we didn't get a funky combination
+
+    if (exists($args{'population'}) && exists($args{'totalNumGenes'})){
+
+	die "ERROR: The population and totalNumGenes arguments are mutually exclusive, but you have provided both.";
+
+    }
+
+}
+
+#####################################################################
+sub __init{
+#####################################################################
+# This private method determines all counts to all GO nodes, as the
+# background frequency of annotations in the genome
+
+    my ($self) = @_;
+
+    # first we determine the databaseIds for the background
+    # distribution
+
+    my @databaseIds;
+
+    if ($self->__isUsingPopulation){
+
+	# we need to get databaseids for the provided population
+
+	my ($databaseIdsRef, $databaseId2OrigNameRef) = $self->__determineDatabaseIdsFromGenes($self->__population);
+
+	@databaseIds = @{$databaseIdsRef};	
+
+    }else{
+
+	# we simply use all databaseIds from the annotationProvider
+
+	@databaseIds = $self->__annotationProvider->allDatabaseIds();
+
+    }
+
+    my $populationSize = scalar(@databaseIds);
+
+    # check that they said there's at least as many genes in total
+    # as the annotation provider says that there is.    
+
+    if (! defined $self->totalNumGenes){
+
+	# in this case, no 'totalNumGenes' argument was provided
+
+	$self->{$kArgs}{totalNumGenes} = $populationSize;
+
+    }elsif ($populationSize > $self->totalNumGenes){
+
+	# in this case, they are using an annotation provider, and
+	# have provided a totalNumGenes that is less than the number
+	# of genes that the annotation provider knows about
+
+	if ($WARNINGS){
+
+	    print STDERR "The annotation provider indicates that there are more genes than the client indicated.\n";
+	    print STDERR "The annotation provider indicates there are $populationSize, while the client indicated only ", $self->totalNumGenes, ".\n";
+	    print STDERR "Thus, assuming the correct total number of genes is that indicated by the annotation provider.\n";
+
+	}
+
+	$self->{$kArgs}{totalNumGenes} = $populationSize;
+
+    }
+
+    # now determine the level of annotation for each GO node in the
+    # population of genes to be used as the background distribution
+    my $totalNodeCounts = $self->__buildHashRefOfAnnotations(\@databaseIds);
+
+    # adjust those counts if needs be
+
+    if ($populationSize < $self->totalNumGenes){
+
+    	# if there are extra, entirely unannotated genes (indicated by
+    	# the total number of genes provided being greater than the
+    	# number that existed in the annotation provider), we must
+    	# make sure that it's treated that they will at least be
+    	# annotated to the root (Gene Ontology), and its immediate
+    	# child (which is the name of the Ontology, eg
+    	# Biological_process, Molecular_function, and
+    	# Cellular_component), and the 'unannotated' node
+
+	# so simply add extra annotations
+
+	my $rootNodeId  = $self->__ontologyProvider->rootNode->goid;
+	
+	my $childNodeId = ($self->__ontologyProvider->rootNode->childNodes())[0]->goid;
+
+	$totalNodeCounts->{$rootNodeId} = $self->totalNumGenes;
+
+	$totalNodeCounts->{$childNodeId} += ($self->totalNumGenes - $populationSize);
+
+	$totalNodeCounts->{$kUnannotatedNode->goid} += ($self->totalNumGenes - $populationSize);
+
+    }
+
+    # and now store the information
+
+    $self->{$kTotalGoNodeCounts}      = $totalNodeCounts;
+
+    $self->{$kTotalNumAnnotatedGenes} = $populationSize;
+
+    # set the discarded genes to be a reference to an empty list
+    # (technically they shouldn't ask to retrieve the discarded genes
+    # before calling findTerms, but this will prevent such behavior
+    # from being fatal
+
+    $self->{$kDiscardedGenes} = []; 
+
+    # store a hash of the databaseIDs that are in the background set of genes
+
+    my %databaseIds;
+
+    foreach my $databaseId (@databaseIds){
+
+	$databaseIds{$databaseId} = undef;
+
+    }
+
+    $self->{$kBackgroundDatabaseIds} = \%databaseIds;
+
+    # create a Distributions object, which has C code for all the various 
+    # Math that we will do.
+
+    $self->{$kDistributions} = GO::TermFinder::Native::Distributions->new($self->totalNumGenes);
+
+}
+
+=pod
+
+=head1 Instance Methods
+
+=cut
+
+#####################################################################
+sub findTerms{
+#####################################################################
+=pod
+
+=head2 findTerms
+
+This method returns an array of hash references, one for each GO::Node
+that was tested as a hypothesis, that indicates which terms annotate
+the list of genes with what P-values.  The contents of the hashes in
+the returned array depend on some of the run time options.  They are:
+
+    key                   value
+    -------------------------------------------------------------------------
+
+Always Present:
+
+    NODE                  A GO::Node
+
+    PVALUE		  The P-value for having the observed number of
+                          annotations that the provided list of genes
+                          has to that node.
+
+    NUM_ANNOTATIONS       The number of genes within the provided list that
+                          are annotated to the node.
+
+    TOTAL_NUM_ANNOTATIONS The number of genes in the population (total
+                          or provided) that are annotated to the node.
+
+    ANNOTATED_GENES       A hash reference, whose keys are the
+                          databaseIds that are annotated to the node,
+                          and whose values are the original name
+                          supplied to the findTerms() method.
+
+Present if corrected p-values are calculated:
+
+    CORRECTED_PVALUE      The CORRECTED_PVALUE is the PVALUE, but corrected
+                          for multiple hypothesis testing, due to the
+                          fact that you are more likely to generate
+                          significant looking p-values if you test a
+                          lot of hypotheses.  See below for details of
+                          how this pvalue is calculated, and the
+                          options associated with it.
+
+Present if p-values were corrected by simulation:
+
+    NUM_OBSERVATIONS      The number of simulations in which a p-value as
+                          good as this one, or better, was observed.
+
+Present if the False Discovery Rate is calculated:
+
+    FDR_RATE              The False Discovery Rate - this is a fraction 
+                          of how many of the nodes with p-values as good or 
+			  better than the node with this FDR would be expected 
+			  to be false positives.
+
+    FDR_OBSERVATIONS      The average number of nodes during simulations 
+                          that had an uncorrected p-value as good or better
+			  than the p-value of this node.
+
+    EXPECTED_FALSE_POSITIVES The expected number of false positives if this node
+                             is chosen as the cut-off.
+
+The entries in the returned array are sorted by increasing p-value
+(i.e. least likely is first).  If there is a tie in the p-value, then
+the sort order is determined by GOID, using a cmp comparison.
+
+findTerm() expects to be passed, by reference, a list of gene names
+for which terms will be found.  If a passed in name is ambiguous (see
+AnnotationProvider), then the following will occur:
+
+    1) If the name can be used as a standard name, it will assume that
+       it is that.
+
+    2) Otherwise it will not use it.
+
+Currently a warning will be printed to STDOUT in the case of an
+ambiguous name being used.
+
+The passed in gene names are converted into a list of databaseIds.  If
+a gene does not map to a databaseId, then an undef is put in the list
+- however, if the same gene name, which does not map to a databaseId,
+is used twice then it will produce only one undef in the list.  If
+more than one gene name maps to the same databaseId (either because
+you used the same name twice, or you used an alias as well), then that
+databaseId is only put into the list once, and a warning is printed.
+
+If a gene name does not have any information returned from the
+AnnotationProvider, then it is assumed that the gene is entirely
+unannotated.  For these purposes, TermFinder annotates such genes to
+the root node (Gene_Ontology), its immediate child (which indicates
+the aspect of the ontology (such as biological_process), and a dummy
+go node, corresponding to unannotated.  This node will have a goid of
+'GO:XXXXXXX', and a term name of 'unannotated'.  No other information
+will be set up for this GO::Node, so you should not count on being
+able to retrieve it.  What it does mean is that you can determine if
+the predominant feature of a set of genes is that they have no
+annotation.
+
+If more genes are provided that have been indicated exist in the
+genome (as provided during object construction), then an error message
+will be printed out, and an empty list will be returned.
+
+In addition, it is possible that for a small list of genes, that no
+hypotheses will be tested - in this case, those genes will only have
+annotated nodes with a count of 1, other than the Gene_Ontology node
+itself, and the node corresponding to the aspect of the ontology.
+Neither of these are considered for p-value testing, as a priori they
+must have a p-value of 1.
+
+MULTIPLE HYPOTHESIS CORRECTION
+
+An optional argument, 'correction' may be used, which indicates what
+method of multiple hypothesis correction should be used.  Multiple
+hypothesis correction attempts to keep the overall chance of getting
+any false positives at the same level (e.g. 0.05).  Acceptable values
+are:
+
+bonferroni, none, simulation
+
+ : 'bonferroni' will correct the p-values by using as the correction
+    factor the total number of nodes to which the provided list of
+    genes are annotated, either directly or indirectly, excepting any
+    nodes that are annotated only once in the background distribution,
+    as, a priori, these cannot be overrepresented.
+
+ : 'none' will perform no multiple hypothesis correction
+
+ : 'simulation' will run 1000 simulations with random lists of genes
+   (the same size as the originally provided gene list), and determine
+   a corrected value by how many simulations produced a p-value better
+   than the p-value associated with one of the real hypotheses.
+   E.g. if a node from the real data has a p-value of 0.05, but a
+   p-value that good or better is generated in 500 out of 1000 trials,
+   the corrected pvalue will be 0.5.  In the case that a p-value
+   generated from a real list of genes is never seen in the
+   simulations, it will be given a corrected p-value of < 0.001, and
+   the NUM_OBSERVATIONS attribute of the hypothesis will be 0.  Using
+   this option takes 1000 time as long!
+
+The default for this argument, if not provided, is bonferroni.
+
+FALSE DISCOVERY RATE
+
+As a way of preempting the potential problems of using p-values
+corrected for multiple hypothesis testing, the False Discovery Rate
+can instead be calculated, and you can instead set your cutoff based
+on an acceptable false discovery rate, such as 0.01 (1%), or 0.05 (5%)
+etc.  Thus, the optional argument 'calculateFDR' can be used.  A
+non-zero value means that the False Discovery Rate will be calculated
+for each node, such that you can determine, if you chose your p-value
+cut-off at that node, what the FDR would be.  The FDR is calculated by
+running 50 simulations, and counting the average number of times a
+p-value as good or better that a p-value generated from the real data
+is seen.  This is used as the numerator.  The denominator is the
+number of p-values in the real data that are as good or better than
+it.
+
+Usage example - in this example, the default (Bonferroni) correction
+is used to calculate a corrected p-value, and in addition, the False
+Discovery Rate is also calculated:
+
+    my @pvalueStructures = $termFinder->findTerms(genes        => \@genes,
+                                                  calculateFDR => 1);
+
+    my $hypothesis = 1;						    
+
+    foreach my $pvalue (@pvalueStructures){
+
+    print "-- $hypothesis of ", scalar @pvalueStructures, "--\n",
+
+	"GOID\t", $pvalue->{NODE}->goid, "\n",
+
+	"TERM\t", $pvalue->{NODE}->term, "\n",
+
+	"P-VALUE\t", $pvalue->{PVALUE}, "\n",
+
+	"CORRECTED P-VALUE\t", $pvalue->{CORRECTED_PVALUE}, "\n",
+
+        "FALSE DISCOVERY RATE\t", $pvalue->{FDR_RATE}, "\n",
+	
+        "NUM_ANNOTATIONS\t", $pvalue->{NUM_ANNOTATIONS}, " (of ", $pvalue->{TOTAL_NUM_ANNOTATIONS}, ")\n",
+
+        "ANNOTATED_GENES\t", join(", ", values (%{$pvalue->{ANNOTATED_GENES}})), "\n\n";
+
+        $hypothesis++;
+
+    }
+
+If a background population had been provided when the object was
+constructed, you should check to see if any of your genes for which
+you are finding terms were discarded, due to not being found in the background 
+population, e.g.:
+
+    my @pvalueStructures = $termFinder->findTerms(genes        => \@genes,
+                                                  calculateFDR => 1);
+
+    my @discardedGenes = $termFinder->discardedGenes;
+
+    if (@discardedGenes){
+
+        print "The following genes were not considered in the pvalue
+calculations, as they were not found in the provided background
+population.\n\n", join("\n", @discardedGenes), "\n\n";
+
+    }
+
+=cut
+
+    my ($self, %args) = @_;
+
+    # let's check that they have provided the required information
+
+    $self->__checkAndStoreFindTermsArgs(%args);
+    
+    # now we determine all the count for direct and indirect
+    # annotations for the provided list of genes.
+
+    $self->{$kGoCounts} = $self->__buildHashRefOfAnnotations([$self->genesDatabaseIds]);
+
+    # now we have these counts, and because we determined the counts
+    # of the background distribution during object construction, we
+    # can determine the p-values for the annotations of our list of
+    # genes of interest.
+
+    $self->__calculatePValues;
+
+    # now we want to add in which genes were annotated to each node
+    # so that the client can determine them
+
+    $self->__addAnnotationsToPValues;
+
+    # now what we want to do is calculate pvalues that are corrected
+    # for multiple hypothesis testing, unless it is specifically
+    # requested not to.
+
+    $self->__correctPvalues unless ($self->__correctionMethod eq 'none');    
+
+    # now calculate the False Discovery Rate, if requested to
+
+    $self->__calculateFDR if ($self->__shouldCalculateFDR);
+
+    return $self->__pValues;
+
+}
+
+#####################################################################
+sub __checkAndStoreFindTermsArgs{
+#####################################################################
+# This private method checks the arguments that are passed into the
+# findTerms() method, and stores various variables internally.
+
+    my ($self, %args) = @_;
+
+    # check they gave us a list of genes
+
+    if (!exists ($args{'genes'})){
+
+	die "You must provide a genes argument";
+
+    }elsif (!defined ($args{'genes'})){
+
+	die "Your genes argument is undefined";
+
+    }
+
+    # see if they gave us an allowable method by which to correct for
+    # multiple hypotheses
+
+    $self->{$kCorrectionMethod} = $args{'correction'} || 'bonferroni';
+
+    if (!exists $kAllowedCorrectionMethods{$self->__correctionMethod}){
+
+	die $self->__correctionMethod." is not an allowed correction method.  Use one of :". 
+
+	    join(", ", keys %kAllowedCorrectionMethods);
+
+    }
+
+    # store whether to calculate the FDR
+
+    if (exists $args{'calculateFDR'} && $args{'calculateFDR'} != 0){
+
+	$self->{$kShouldCalculateFDR} = 1;
+
+    }else{
+
+	# default is not to calculate it
+	
+	$self->{$kShouldCalculateFDR} = 0;
+
+    }
+
+    # what we want to do now is build up an array of identifiers that
+    # are unambiguous - ie databaseIds
+    #
+    # This means that when retrieving GOID's, we can always retrieve
+    # them by databaseId, which is unambiguous.
+
+    my ($databaseIdsRef, $databaseId2OrigNameRef) = $self->__determineDatabaseIdsFromGenes($args{'genes'});
+    
+    # now we want to make sure that if they provided a population as
+    # the background, then all of the provided genes that are being
+    # tested for enriched GO terms are sampled from that population
+
+    my @discardedGenes;
+
+    if ($self->__isUsingPopulation){
+
+	my @missingIds;
+
+	# go through each databaseID, and see if it is in the databaseIDs
+	# associated with the GO counts for the background population.  If
+	# it's a fake ID, then see if the original name is in the names
+	# that were passed in.
+	
+	foreach my $databaseId (@{$databaseIdsRef}){
+
+	    # if it's a fake databaseId, we have to see if the orig
+	    # name was in the provided population, otherwise, if it's
+	    # a real databaseId, check that the databaseId is in the
+	    # background
+
+	    if ((
+
+		 $databaseId =~ /^$kFakeIdPrefix/o &&
+		 !$self->__origNameInPopulation($databaseId2OrigNameRef->{$databaseId}))
+
+		||
+
+		!$self->__databaseIdIsInBackground($databaseId)){
+
+		push(@missingIds, $databaseId);
+
+	    }
+
+	}
+
+	# Now see if we have any missing names
+
+	# If we have as many missing names as there were genes
+	# provided, then we'll die, as there is nothing that can be
+	# done, as no gene remain for any enrichment calculations
+
+	if (@missingIds == @{$databaseIdsRef}){
+
+	    die "ERROR: None of the genes provided for analysis are found in the background population. Please check your background list.\n";
+
+	}
+
+	# Otherwise, we will print a warning that genes were
+	# discarded, but we also provide an API for them to retrieve
+	# the names of genes that were discarded.
+
+	if (@missingIds){
+
+	    if ($WARNINGS){
+
+		print STDERR "\nThe following names in the provided list of genes do not have a\n",
+
+		"counterpart in the background population that you provided.\n",
+
+		"These genes will not be used in the analysis for enriched GO terms.\n\n";
+
+		foreach my $databaseId (@missingIds){
+
+		    print STDERR $databaseId2OrigNameRef->{$databaseId}, "\n";		    
+
+		}
+
+		print STDERR "\n";
+
+	    }
+
+	    # now we have to actually remove them from the list of
+	    # considered genes
+
+	    # create a dummy hash of the databaseIds, delete the
+	    # elements, and then assign the remaining keys back to the
+	    # $databaseIdsRef
+
+	    # we'll also remember it
+
+	    my %dummyDatabaseIdsHash = %{$databaseId2OrigNameRef};
+
+	    foreach my $databaseId (@missingIds){
+
+		push (@discardedGenes, $databaseId2OrigNameRef->{$databaseId});
+
+		delete $dummyDatabaseIdsHash{$databaseId};
+
+	    }
+
+	    $databaseIdsRef = [keys %dummyDatabaseIdsHash]
+
+	}
+
+    }
+
+    # now remember the genes that were discarded
+
+    $self->__setDiscardedGenes(\@discardedGenes);
+
+    # now store the databaseIDs for the genes that can be used to
+    # determine enriched GO terms in the self object
+
+    $self->{$kDatabaseIds} = $databaseIdsRef;
+
+    # also store the mapping of the databaseId to its original name
+
+    $self->{$kDatabaseId2OrigName} = $databaseId2OrigNameRef;
+
+    # note, we need to provide the client with a way of determining
+    # how many genes were used when calculating p-values for
+    # annotations
+
+    if (scalar ($self->genesDatabaseIds) > $self->totalNumGenes){
+
+	if ($WARNINGS){
+
+	    print "You have provided a list corresponding to ", scalar ($self->genesDatabaseIds), "genes, ",
+	    
+	    "yet you have indicated that there are only ", $self->totalNumGenes, " in the genome.\n";
+	    
+	    print "No probabilities can be calculated.\n";
+
+	}
+
+	return (); # simply return an empty list
+
+    }
+
+
+
+}
+
+#####################################################################
+sub discardedGenes {
+#####################################################################
+=pod
+
+=head2 discardedGenes
+
+This method returns an array of genes which were discarded from the
+pvalue calculations, because they could not be found in the background
+population.  It should only be called after findTerms.  It will either
+return an empty list, if no genes were discarded, or an array of genes
+that were discarded.
+
+Usage:
+
+    my @pvalueStructures = $termFinder->findTerms(genes        => \@genes,
+                                                  calculateFDR => 1);
+
+    my @discardedGenes = $termFinder->discardedGenes;
+
+    if (@discardedGenes){
+
+        print "The following genes were not considered in the pvalue
+calculations, as they were not found in the provided background
+population.\n\n", join("\n", @discardedGenes), "\n\n";
+
+    }
+
+=cut
+
+    return @{$_[0]->{$kDiscardedGenes}};
+
+}
+
+
+#
+# PRIVATE INSTANCE METHODS
+#
+
+#####################################################################
+sub __databaseIdIsInBackground{
+#####################################################################
+# This private method will return a Boolean to indicate whether the
+# supplied databaseId is in the set of databaseIds determined for the
+# background set of genes.  Note, it does not check if the databaseId
+# is a fake one, so the client should do that if it needs to
+
+    return exists $_[0]->{$kBackgroundDatabaseIds}{$_[1]};
+
+}
+
+#####################################################################
+sub __isUsingPopulation{
+#####################################################################
+# This private method returns a boolean to indicate whether the client
+# passed in a population of genes to use as the background distribution
+
+    return exists $_[0]->{$kArgs}{population};
+
+}
+
+#####################################################################
+sub __population{
+#####################################################################
+# This private method returns a reference to an array of identifiers
+# that were passed in to be used as a background population
+
+    return $_[0]->{$kArgs}{population};
+
+}
+
+#####################################################################
+sub __origNameInPopulation{
+#####################################################################
+# This private method returns a Boolean to indicate whether the
+# provided name is in the list of names that were provided as a
+# background population
+
+    return exists $_[0]->{$kPopulationNamesHash}{$_[1]};
+
+}
+
+#####################################################################
+sub __setDiscardedGenes{
+#####################################################################
+# This private method will store the passed in array reference, which
+# points to a list of genes that had to be discarded.
+
+    $_[0]->{$kDiscardedGenes} = $_[1];
+
+}
+
+#####################################################################
+sub __totalNumAnnotatedGenes{
+#####################################################################
+# This private method returns the number of genes that have any annotation,
+# as determined from the AnnotationProvider.  This is set during object
+# initialization.
+
+    return $_[0]->{$kTotalNumAnnotatedGenes};
+}
+
+
+#####################################################################
+sub __numAnnotatedNodesInBackground{
+#####################################################################
+# This private method returns the number of nodes in the ontology that
+# have any annotation in the background distribution.  This is stored
+# during object initialization as a hash of GOID to the number of
+# counts.
+
+    return scalar keys %{$_[0]->{$kTotalGoNodeCounts}};
+
+}
+
+#####################################################################
+sub __allGoIdsForBackground{
+#####################################################################
+# This private method returns as an array all the GOIDs of nodes in
+# the ontology that have any annotation in the background
+# distribution.  This is stored during object initialization as a hash
+# of GOID to the number of counts.
+
+    return keys %{$_[0]->{$kTotalGoNodeCounts}};
+
+}
+
+#####################################################################
+sub genesDatabaseIds{
+#####################################################################
+=pod 
+
+=head2 genesDatabaseIds
+
+This method returns an array of databaseIds corresponding to the genes
+that were used for the findTerms() method.  Thus it allows a client to
+find out how many actual entities their list of genes that were passed
+in mapped to, e.g. they may have passed in the same thing with two
+different names.  Using this method, immediately following use of the
+findTerms method, they will determine how many genes their list
+collapsed to.
+
+=cut
+
+    return @{$_[0]->{$kDatabaseIds}};
+
+}
+
+#####################################################################
+sub __origNameForDatabaseId{
+#####################################################################
+# This method returns the original name that was provided to the term
+# finder for the databaseId that it was translated to.
+
+    return $_[0]->{$kDatabaseId2OrigName}->{$_[1]};
+
+}
+
+#####################################################################
+sub __pValues{
+#####################################################################
+# This method returns an array of pValues structures
+
+    return @{$_[0]->{$kPvalues}};
+
+}
+
+#####################################################################
+sub __correctionMethod{
+#####################################################################
+# This method returns the name of the method by which the client has
+# chosen to have their p-values corrected - either none, bonferroni,
+# custom, or simulation.
+
+    return $_[0]->{$kCorrectionMethod};
+
+}
+
+#####################################################################
+sub __shouldCalculateFDR{
+#####################################################################
+# This method returns a boolean, to indicate whether the false discovery
+# rate should be calculated
+
+    return $_[0]->{$kShouldCalculateFDR};
+
+}
+
+#####################################################################
+sub __determineDatabaseIdsFromGenes{
+#####################################################################
+# This method determines a list of databaseIds for a list of genes
+# passed in by reference.  It then returns a reference to that list,
+# and a reference to a hash that maps the databaseIds to the
+# originally supplied name
+#
+# If more than one gene maps to the same databaseId, then the
+# databaseId is only put in the list once, and a warning is printed.
+#
+# If a gene does not map to a databaseId, then an undef is put in the
+# list - however, if the same gene name, which does not map to a
+# databaseId, is used twice then it will produce only one undef in the
+# list.
+#
+# In addition, it removes leading and trailing whitespace from supplied
+# gene names (assuming they should have none) and will skip any names that
+# are either empty, or whitespace only.
+
+    my ($self, $genesRef) = @_;
+
+    my (@databaseIdsList, $databaseId, %databaseIds, %genes, %duplicates, %warned);
+
+    foreach my $gene (@{$genesRef}){
+
+	# strip leading and trailing spaces
+
+	$gene =~ s/^\s+//;
+	$gene =~ s/\s+$//;
+
+	next if $gene eq ""; # skip empty names
+
+	# skip and warn if we've already seen the gene
+
+	if (exists ($genes{$gene})){
+
+	    if ($WARNINGS && !exists($warned{$gene})){
+
+		print "The gene name '$gene' was used more than once.\n";
+		print "It will only be considered once.\n\n";
+
+		$warned{$gene} = undef;
+
+	    }
+
+	    next; # just skip to the next supplied gene
+
+	}
+
+	# determine if the gene is ambiguous
+
+	if ($self->__annotationProvider->nameIsAmbiguous($gene)){
+
+ 	    print "$gene is an ambiguous name.\n" if $WARNINGS;
+
+	    if ($self->__annotationProvider->nameIsStandardName($gene)){
+
+		if ($WARNINGS){
+
+		    print "Since $gene is used as a standard name, it will be assumed to be one.\n\n";
+	
+		}
+
+		# returns undef if not found or ambiguous
+		$databaseId = $self->__annotationProvider->databaseIdByStandardName($gene);
+
+		if ($databaseId) {  #! mark
+		    push (@databaseIdsList, $databaseId);
+		} else {
+		    print "No database ID could be found for $gene.\n\n";
+		}
+		
+	    }else{
+
+		if ($WARNINGS){
+
+		    print "Since $gene is an ambiguous alias, it will not be used.\n\n";
+		
+		}
+
+		# Without a next here, we could end up popping some other
+		# database Id from @databaseIds, since this one was never pushed,
+		# and $databaseId contains a previous value.
+		# Do this or set $databaseId to undef at the end of the loop (or both) -- mark
+		next;
+
+	    }
+	    
+	}else{
+
+	    # note, if the gene has no annotation, then we will want
+	    # to create a fake databaseId, that we can easily
+	    # recognize, and will have to make sure that we deal with
+	    # this later when getting annotations.
+
+	    $databaseId = $self->__annotationProvider->databaseIdByName($gene);
+
+	    # if the total number of genes is equal to the number of
+	    # things with some annotation, then there should be no
+	    # genes that do not return a databaseId.  If this is the
+	    # case, we will warn them.
+
+	    if (!defined $databaseId){
+
+		# If we've already defined the total number of genes
+		# with annotation, and it's equal to the number of
+		# genes for the background distribution, and we're not
+		# using a population, we'll print a warning, as under
+		# these circumstances we shouldn't not get a
+		# databaseId.
+
+		if (defined ($self->__totalNumAnnotatedGenes) && 
+		    $self->__totalNumAnnotatedGenes == $self->totalNumGenes &&
+		    $WARNINGS &&
+		    !$self->__isUsingPopulation){
+
+		    print "\nThe name '$gene' did not correspond to an entry from the AnnotationProvider.\n";
+		    print "However, the client has indicated that all genes have annotation.\n";
+		    print "You should probably check that '$gene' is a real name.\n\n";
+
+		}
+
+		# Now we need to deal with the lack of databaseId
+		# We'll simply create a fake one, that we can easily
+		# recognize later, so we can deal with it accordingly
+
+		$databaseId = $kFakeIdPrefix.$gene;
+
+	    }
+
+	    push (@databaseIdsList, $databaseId);
+
+	}
+
+	# if we have a databaseId that we've already seen, we want to
+	# make sure we only consider it once.
+
+	if (defined ($databaseId) && exists($databaseIds{$databaseId})){
+
+	    pop (@databaseIdsList); # get rid of the extra
+
+	    # and let's remember what it was, as well as the previous
+	    # name associated with this databaseId, so we can give an
+	    # appropriate warning
+
+	    $duplicates{$databaseId}{$gene} = undef;
+	    $duplicates{$databaseId}{$databaseIds{$databaseId}} = undef;
+
+	}
+
+	# remember the databaseId and gene, in case we see them again
+
+	$databaseIds{$databaseId} = $gene if (defined ($databaseId));
+	$genes{$gene}             = undef;
+	$databaseId               = undef;   # -- mark
+
+    }
+
+
+    if (%duplicates && $WARNINGS){
+	
+	print "The following databaseIds were represented multiple times:\n\n";
+
+	foreach my $duplicate (sort keys %duplicates){
+
+	    print $duplicate, " represented by ", join(", ", (sort keys %{$duplicates{$duplicate}})), "\n";
+
+	}
+
+	print "\nEach of these databaseIds will only be considered once.\n";
+
+    }
+
+    # return databaseIds, and their mapping to the originally supplied
+    # name
+
+    return (\@databaseIdsList, \%databaseIds);
+
+}
+
+############################################################################
+sub __buildHashRefOfAnnotations{
+############################################################################
+# This private method takes a reference to an array of databaseIds and
+# calculates the level of annotations for all GO nodes that those
+# databaseIds have either direct or indirect annotation for.  It
+# returns a reference to a hash of GO node counts, with the goids
+# being the keys, and the number of annotations they have from the
+# list of databaseId's being the values.
+
+    my ($self, $databaseIdsRef) = @_;
+
+    my %goNodeCounts;
+
+    # keep track of how many are annotated to the aspect node
+    # (e.g. such as molecular function).  See comments for
+    # __allGOIDsForDatabaseId for more information
+
+    my $aspectNodeDirectAnnotations = 0;
+
+    my $aspectNodeGoid = ($self->__ontologyProvider->rootNode->childNodes())[0]->goid;
+
+    # If gene has no annotation, annotate it to the top node
+    # (Gene_Ontology), and its immediate child (the aspect itself) and
+    # the 'unannotated' node.
+
+    my @noAnnotationNodes = ($aspectNodeGoid, 
+			     $self->__ontologyProvider->rootNode->goid,
+			     $kUnannotatedNode->goid);
+
+    foreach my $databaseId (@{$databaseIdsRef}) {
+
+	# get goids count, if the databaseId is not a fake one
+
+	my $goidsRef;
+
+	if ($databaseId !~ /^$kFakeIdPrefix/o){
+
+	    $goidsRef = $self->__allGOIDsForDatabaseId($databaseId);
+
+	}
+
+	if (!defined $goidsRef || !(@{$goidsRef})) { 
+
+	    # If gene has no annotation, annotate it to the top node
+	    # (Gene_Ontology), and its immediate child (the aspect itself)
+	    # and the 'unannotated' node, which we cached earlier.
+
+	    $goidsRef = [@noAnnotationNodes];
+
+	    # now cache the goids for the unnannotated genes.  The
+	    # ones that were annotated, had their goids cached in the
+	    # __allGOIDsForDatabaseId.  It is an optimization to take
+	    # care of that there, but this here.
+
+	    $self->{$kGOIDsForDatabaseIds}->{$databaseId} = $goidsRef;
+	}
+
+	# increment count for all goids appearing in @goids;
+
+	foreach my $goid (@{$goidsRef}) {
+
+	    $goNodeCounts{$goid}++;
+
+	}
+
+	# keep count of how many are directly annotated to the aspect node
+
+	if (exists ($self->{$kDirectAnnotationToAspect}{$databaseId})){
+
+	    $aspectNodeDirectAnnotations++;
+
+	}
+
+    }
+
+    # now we'd like to replace the counts for the aspect annotations,
+    # so that they only refer to the direct annotations, rather than
+    # direct and indirect annotations
+
+    $goNodeCounts{$aspectNodeGoid} = $aspectNodeDirectAnnotations;
+
+    return \%goNodeCounts;
+
+}
+
+############################################################################
+sub __allGOIDsForDatabaseId{
+############################################################################
+# This method returns a reference to an array of all GOIDs to which a
+# databaseId is annotated, whether explicitly, or implicitly, by
+# virtue of the GO node being an ancestor of an explicitly annotated
+# one.  The returned array contains no duplicates.
+
+# Because the Gene Ontology no longer has the unknown terms, then
+# direct annotation to the aspect node (e.g. molecular function),
+# means what annotation to the unknown terms previously meant.  But,
+# as all nodes are descendents of the aspect node, then enrichment for
+# this node will never happen, unless we only look for enrichment of
+# direct annotations to this node.  Thus, in this method, we also
+# record which databaseIds are directly annotated to the aspect node, which
+# will be used elsewhere.
+
+    my ($self, $databaseId) = @_;
+    
+    # cache aspect's ID, so we don't have to repeatedly retrieve it
+
+    my $aspectId = ($self->__ontologyProvider->rootNode->childNodes())[0]->goid; # 
+
+    # generate list of GOIDs if not cached
+    
+    if (!exists($self->{$kGOIDsForDatabaseIds}->{$databaseId})) {
+	
+	my %goids; # so we keep the list unique	
+
+        # go through the direct annotations
+
+	foreach my $goid (@{$self->__annotationProvider->goIdsByDatabaseId(databaseId => $databaseId,
+									   aspect     => $self->aspect)}){
+
+	    # just in case an annotation is to a goid not present in the ontology
+	    my $node = $self->__ontologyProvider->nodeFromId($goid);
+
+	    if (!$node) {
+
+		if ($WARNINGS){
+
+		    print STDERR "\nWarning : $goid, used to annotate $databaseId with an aspect of ".$self->aspect.", does not appear in the provided ontology.\n";
+		    
+		}
+		    
+		# don't record annotations to this goid
+
+		next;
+
+	    }
+
+	    # in case this was a seconday id, switch to primary id -- mark
+	    $goid = $node->goid;
+
+	    # record the goid and its ancestors
+
+	    $goids{$goid} = undef;
+
+	    foreach my $ancestor ($self->__ontologyProvider->nodeFromId($goid)->ancestors){
+
+		$goids{$ancestor->goid} = undef;
+
+	    }
+
+	    # record in the self object if it's directly annotated to the aspectId
+
+	    if ($goid eq $aspectId){
+
+		$self->{$kDirectAnnotationToAspect}{$databaseId} = undef;
+
+	    }
+
+	}    
+	
+	# cache the value
+	
+	$self->{$kGOIDsForDatabaseIds}->{$databaseId} = [keys %goids];
+    }
+    
+    return ($self->{$kGOIDsForDatabaseIds}->{$databaseId});
+    
+}
+
+#####################################################################
+sub __calculatePValues{
+#####################################################################
+# This method actually determines the p-values of the various levels
+# of annotation for the particular GO nodes, and stores them within
+# the object.
+
+    my $self = shift;
+
+    my $numDatabaseIds = scalar $self->genesDatabaseIds;
+
+    my @pvalueArray;
+
+    # cache so we don't have to repeatedly look it up
+
+    my $rootGoid = $self->__ontologyProvider->rootNode->goid;
+
+    # each node we consider here must have at least one annotation in
+    # our list of provided genes.
+
+    foreach my $goid ($self->__allGoIdsForList) {
+
+	# skip the root node, as it has to have a probability of 1,
+	# but don't skip its immediate child though, as we now test
+	# for enriched direct annotations
+
+	next if ($goid eq $rootGoid);
+
+	# skip any that has only one (or zero - could happen for the
+	# aspect goid, as we replaced its counts) annotation in the
+	# background distribution, as by definition these cannot be
+	# overrepresented
+
+	next if ($self->__numAnnotationsToGoId($goid) <= 1);
+
+	# if we get here, we should calculate a p-value for this node
+
+	my $pv = $self->__processOneGOID($goid, $numDatabaseIds);
+	push (@pvalueArray, $pv) if ($pv);
+
+    }
+
+    # now sort the pvalueArray by their pValues.  If the values are the same,
+    # then sort by goid (text based comparison).
+
+    @pvalueArray = sort {$a->{PVALUE}     <=> $b->{PVALUE} ||
+			 $a->{NODE}->goid cmp $b->{NODE}->goid } @pvalueArray;
+
+    $self->{$kPvalues} = \@pvalueArray;
+
+}
+
+############################################################################
+sub __processOneGOID{
+############################################################################
+# This processes one GOID.  It determines the number of annotations to
+# the current GOID, and the P-value of that number of annotations.
+# The pvalue is calculated as the probability of observing x or more
+# positives in a sample on n, given that there are M positives in a
+# population of N.  This is calculated using the hypergeometric
+# distribution.
+#
+# It returns a hash reference encoding that information.
+
+    my ($self, $goid, $n) = @_;
+
+    my $M = $self->__totalNumAnnotationsToGoId($goid);
+#    return undef if (!$M);   # sometimes happens with "unannotated" node - mark
+    my $x = $self->__numAnnotationsToGoId($goid);
+    my $N = $self->totalNumGenes();
+
+    # If not using a background population, query identifiers that are not found in the annotation
+    # will appear in x but not M. We need to add them to M, otherwise M could be < x. With a
+    # background population, those query identifiers are removed before the analysis.
+    if ($goid eq $kUnannotatedNode->goid) {
+	$M += $x;
+    }
+
+    my $pvalue = $self->{$kDistributions}->pValueByHypergeometric($x, $n, $M, $N);
+
+#    my $node = $self->__ontologyProvider->nodeFromId($goid) || $kUnannotatedNode;
+    my $node;
+    $node = $self->__ontologyProvider->nodeFromId($goid);
+    if (!$node) {
+	$node = $kUnannotatedNode;
+    }
+
+    my $hashRef = {
+	
+	NODE                  => $node,
+	PVALUE		      => $pvalue,
+	NUM_ANNOTATIONS       => $x,
+	TOTAL_NUM_ANNOTATIONS => $M,
+	CORRECTED_PVALUE      => $pvalue,    # this will get changed later if a correction method is used --mark
+
+	};
+
+    return $hashRef;
+
+}
+
+############################################################################
+sub __numAnnotationsToGoId{
+############################################################################
+# This private method returns the number of annotations to a
+# particular GOID for the list of genes supplied to the findTerms
+# method.
+
+    my ($self, $goid) = @_;
+
+    return $self->{$kGoCounts}->{$goid};
+
+}
+
+############################################################################
+sub __totalNumAnnotationsToGoId{
+############################################################################
+# This returns the total number of genes that have been annotated to a
+# particular GOID based on all annotations.
+
+    my ($self, $goid) = @_;
+
+    return $self->{$kTotalGoNodeCounts}->{$goid};
+}
+
+############################################################################
+sub totalNumGenes{
+############################################################################
+=pod
+
+=head2 totalNumGenes
+
+This returns the total number of genes that are in the background set
+of genes from which the genes of interest were drawn.  Unannotated
+genes are included in this count.
+
+=cut
+
+    return $_[0]->{$kArgs}{totalNumGenes};
+
+}
+
+############################################################################
+sub __allGoIdsForList{
+############################################################################
+# This returns an array of GOIDs to which genes in the passed in gene
+# list were directly or indirectly annotated.
+
+    return keys %{$_[0]->{$kGoCounts}};
+
+}
+
+############################################################################
+sub __correctPvalues{
+############################################################################
+# This method corrects the pvalues for multiple hypothesis testing, by
+# dispatching to the appropriate method based on what method was
+# requested for hypothesis correction.
+
+    my $self = shift;
+
+    my $correctionMethod = "__correctPvaluesBy".$self->__correctionMethod;
+
+    $self->$correctionMethod;
+
+}
+
+#####################################################################
+sub __correctPvaluesBybonferroni{
+#####################################################################
+# This method corrects the p-values using a Bonferroni correction,
+# where the correction factor is the total number of nodes for which
+# we tested whether there was significant enrichment
+
+    my $self = shift;
+
+    # now correct the pvalues with the correction factor
+
+    my $correctionFactor = scalar(@{$self->{$kPvalues}});
+
+    # no correction needs to be done if there is 0 or 1 hypotheses
+    # that were tested
+
+    # print "Applying Bonferroni correction factor of ".$correctionFactor."\n";
+
+    if ($correctionFactor > 1){
+
+	# simply go through each hypothesis and calculate the corrected
+	# p-value by multiplying the uncorrected p-value by the number of
+	# nodes in the ontology
+
+	foreach my $hypothesis ($self->__pValues){
+
+	    $hypothesis->{CORRECTED_PVALUE} = $hypothesis->{PVALUE} * $correctionFactor;
+
+	    # make sure we have a ceiling of 1
+
+	    $hypothesis->{CORRECTED_PVALUE} = 1 if ($hypothesis->{CORRECTED_PVALUE} > 1);
+
+	}
+
+    }
+
+}
+
+############################################################################
+sub __correctPvaluesBysimulation{
+############################################################################
+# This method corrects the P-values based on a thousand random trials,
+# using the same number of genes for each trial as was used in the
+# client query.  A p-value will be corrected based on the number of
+# simulations in which that p-value was seen, e.g. if an uncorrected
+# p-value of 0.05 or better was observed in 100 of 1000 trials, the
+# corrected value will be 0.1 (100/1000).
+
+    my $self = shift;
+
+    # when we run any simulation, any of the variables that get
+    # modified during the findTerms method will be trampled on - thus
+    # we have to save them away, and then restore them afterwards
+
+    my $variables = $self->__saveVariables();
+
+    # we will need access to the real hypotheses - we'll reverse them
+    # for now, as it makes them easier when we use them later on
+
+    my @realHypotheses = reverse @{$self->{$kPvalues}};
+
+    # now let's get the population from which we will sample genes
+    # randomly
+
+    my @names = $self->__samplingPopulation;
+
+    my $populationSize = scalar @names;
+
+    # now get the number of genes in the original test set
+    # for which terms were found.
+    
+    my $numGenes = scalar $self->genesDatabaseIds;
+
+    # now we can finally run the simulations
+
+    my $numSimulations = 1000;
+
+    for (my $i = 1; $i <= $numSimulations; $i++) {
+
+	# run simulation
+
+	my @pvals = $self->__runOneSimulation(\@names, $numGenes, $populationSize);
+
+	# go onto a new simulation if no hypothese resulted (which is
+	# possible if the randomly selected genes did not have more
+	# than one annotation to any particular GO node)
+
+	next if !@pvals;
+
+	# now we look at the best pvalue for the random genes, and
+	# determine whether it is more significant that any of the
+	# p-values generated for the real genes.  We will keep a count
+	# of how many times we see a p-value that is better than one
+	# calculated with the real genes, on a per simulation basis
+
+	# if we go through the p-values for the real nodes in reverse
+	# order (we reversed them above), then we can quit out of the
+	# loop as soon as we have a p-value better than the best one
+	# generated from the random genes
+
+	foreach my $realHypothesis (@realHypotheses){
+
+	    # skip examining, if the real pvalue is better than the
+	    # best one for the random genes
+
+	    last if ($pvals[0]->{PVALUE} > $realHypothesis->{PVALUE});
+
+	    # if we get here, we know that this simulation has generated
+	    # a P_VALUE that is better than the P_VALUE for the currently
+	    # considered hypothesis.  We'll simply keep count for now
+
+	    $realHypothesis->{NUM_OBSERVATIONS}++;
+
+	}
+
+    }
+
+    # now we've run all the simulations, we should be able to simply divide
+    # the observed frequency by the number of simulations.
+
+    foreach my $realHypothesis (@realHypotheses){
+
+	if (exists $realHypothesis->{NUM_OBSERVATIONS}){
+
+	    $realHypothesis->{CORRECTED_PVALUE} = $realHypothesis->{NUM_OBSERVATIONS}/$numSimulations;
+
+	}else{
+
+	    # a pvalue better than this wasn't observed in any
+	    # simulation - just record the minimum
+
+	    $realHypothesis->{CORRECTED_PVALUE} = 1/$numSimulations;
+
+	    # and say that we never saw it
+
+	    $realHypothesis->{NUM_OBSERVATIONS} = 0;
+
+	}
+
+    }
+
+    @realHypotheses = reverse @realHypotheses;
+
+    # now restore the variables
+
+    $self->__restoreVariables($variables);
+
+    # finally replace the hypotheses with our local copy, which we've
+    # made some modifications to
+
+    $self->{$kPvalues} = \@realHypotheses;
+
+}
+
+############################################################################
+sub __saveVariables{
+############################################################################
+# This private method returns a hash containing various of the
+# instance variables that might get trampled on during a simulation
+
+    my ($self) = @_;
+
+    my %variables;
+
+    my @keys = ($kCorrectionMethod, $kShouldCalculateFDR, $kDatabaseIds, 
+		$kDatabaseId2OrigName, $kGoCounts, $kPvalues, $kDiscardedGenes);
+
+    foreach my $key (@keys){
+
+	$variables{$key} = $self->{$key};
+
+    }
+
+    return \%variables;
+
+}
+
+############################################################################
+sub __restoreVariables{
+############################################################################
+# This private method uses a passed in hash (by reference) to restore
+# variables within the instance
+
+    my ($self, $hashRef) = @_;
+
+    foreach my $key (%{$hashRef}){
+
+      $self->{$key} = $hashRef->{$key};
+
+    }
+
+}
+
+############################################################################
+sub __samplingPopulation{
+############################################################################
+# This private method returns an array of id's that should be used as
+# the sampling population for the simulation
+
+    my $self = shift;
+
+    # we will need to pick genes randomly from the background
+    # population.  Note that population may be larger than the
+    # databaseIds that are referenced in the annotations file - if so,
+    # we have to be able to randomly select unannotated genes too
+
+    # alternatively, the user may have specified a population of genes
+    # that define the background - in which case we should pick only
+    # from that population
+
+    my @names;
+
+    if ($self->__isUsingPopulation){
+
+	@names = @{$self->__population};
+
+    }else{
+
+	# we simply use all databaseIds from the annotationProvider
+
+	@names = $self->__annotationProvider->allDatabaseIds();
+
+    }
+
+    # note the population size
+
+    my $populationSize;
+
+    if (! defined $self->totalNumGenes){
+
+	$populationSize = scalar @names;
+
+    }else{
+
+	$populationSize = $self->totalNumGenes;
+
+    }
+
+    # now, if the population from which we should sample is bigger
+    # that the number of databaseIds which we have to sample from, we
+    # want to expand the the list of databaseIds with some fake ones,
+    # that correspond to unnannotated genes.
+
+    my $numDatabaseIds = scalar @names;
+
+    for (my $n = $numDatabaseIds; $n < $populationSize; $n++){
+	
+	push (@names, $kFakeIdPrefix.$n);
+	    
+    }
+
+    return @names;
+
+}
+
+############################################################################
+sub __runOneSimulation{
+############################################################################
+# This method runs a single simulation of GO::TermFinder, and returns the 
+# generated hypotheses.  It requires a reference to a list of genes that
+# should be used to sample from, the number of genes that should be chosen,
+# and the size of the background distribution
+
+    my ($self, $namesRef, $numGenes, $populationSize) = @_;
+
+    # first get a random list of genes
+
+    my $listRef = $self->__listOfRandomGenes($namesRef, $numGenes, $populationSize);	
+
+    # now we have a list of genes, we can findTerms for them
+	
+    # however, we have to make sure that for these guys, we attempt
+    # no p-value correction, otherwise we will infinitely recurse,
+    # and make sure that we don't ask to calculate the FDR
+    
+    my @pvals = $self->findTerms(genes        => $listRef,
+				 correction   => 'none',
+				 calculateFDR => 0);
+
+    # now return the hypotheses
+
+    return (@pvals);
+
+}
+
+############################################################################
+sub __listOfRandomGenes{
+############################################################################
+# This private method returns a reference to an array of randomly
+# chosen genes from a population that was passed in by reference
+
+    my ($self, $namesRef, $numGenes, $populationSize) = @_;
+
+    # create an array with as many indices as there are genes in the
+    # background set of genes from which those of interest were drawn
+
+    my @indices;
+
+    for (my $i = 0; $i < $populationSize; $i++){
+
+	$indices[$i] = $i;
+
+    }
+
+    # now sample those indices, removing sampled elements as we go.
+    # Use the randomly chosen index to get a random gene, and select
+    # as many random genes as were in the test set
+
+    my @list;
+
+    for (my $i = 0; $i < $numGenes; $i++) {
+
+	my $index = int(rand(scalar(@indices))); # random number between 0 and last array index.
+
+	my $selectedIndex = splice(@indices, $index, 1); # Remove the randomly selected element from the array.
+
+	push(@list, $namesRef->[$selectedIndex]);
+
+    }
+
+    return \@list;
+
+}
+
+############################################################################
+sub __calculateFDR{
+############################################################################
+# This method calculates the false discovery rate for each hypothesis,
+# such that you know if you draw your cut-off at a particular node,
+# what the false discovery rate is.  It does 50 simulations with
+# random genes, and calculates on average the percentage of nodes that
+# exceed a given value in the simulation, compared to the number that
+# exceed that p-value in the real data.
+
+    my $self = shift;
+
+    # when we run any simulation, any of the variables that get
+    # modified during the findTerms method will be trampled on - thus
+    # we have to save them away, and then restore them afterwards
+
+    my $variables = $self->__saveVariables();
+
+    # we will need access to the real hypotheses
+
+    my @realHypotheses = @{$self->{$kPvalues}};
+
+    # now let's get the population from which we will sample genes
+    # randomly
+
+    my @names = $self->__samplingPopulation;
+
+    my $populationSize = scalar @names;
+
+    # now get the number of genes in the original test set
+    # for which terms were found.
+    
+    my $numGenes = scalar $self->genesDatabaseIds;
+
+    # now we can finally run the simulations
+
+    my $numSimulations = 50;
+
+    for (my $i = 1; $i <= $numSimulations; $i++) {
+
+	# now run a simulation
+
+	my @pvals = $self->__runOneSimulation(\@names, $numGenes, $populationSize);
+
+	# go onto a new simulation if no hypotheses resulted (which is
+	# theoretically possible if the randomly selected genes did
+	# not have more than one annotation to any particular GO node)
+
+	next if !@pvals;
+
+	# now we look at the best pvalue for the random genes, and
+	# determine whether it is more significant that any of the
+	# p-values generated for the real genes.  We will keep a count
+	# of how many times we see a p-value that is better than one
+	# calculated with the real genes, on a per simulation basis
+
+	# if we go through the p-values for the real nodes in reverse
+	# order (we reversed them above), then we can quit out of the
+	# loop as soon as we have a p-value better than the best one
+	# generated from the random genes
+
+	foreach my $realHypothesis (@realHypotheses){
+
+	    # count the number of nodes that this simulation has
+	    # generated a P_VALUE that is better than the P_VALUE for
+	    # the currently considered hypothesis.
+
+	    foreach my $pval (@pvals){
+
+		# finish considering this real hypothesis as soon as
+		# we see a pvalue that is worse from the simulated
+		# data
+
+		last if ($pval->{PVALUE} > $realHypothesis->{PVALUE});
+
+		# if we get here, our simulated pvalue must exceed the
+		# pvalue associated with the real hypothesis
+
+		$realHypothesis->{FDR_OBSERVATIONS}++;
+		
+	    }
+
+	}
+
+    }
+
+    # now we've run all the simulations, and counted for each real
+    # hypothesis how many hypotheses from the simulations were better,
+    # we calculate on average how many were better per simulation,
+    # then divide by the number of hypotheses as good or better in our
+    # real data.  We threshold this at a maximum of 1, as we can't
+    # have a FDR of greater than 100%
+
+    foreach (my $i = 0; $i < @realHypotheses; $i++){
+
+	if (exists $realHypotheses[$i]->{FDR_OBSERVATIONS}){
+
+	    # the rate is the average number in the simulations that
+	    # are better than this pvalue, divided by the number that
+	    # are better in the real data
+
+	    $realHypotheses[$i]->{FDR_OBSERVATIONS} /= $numSimulations;
+
+	    $realHypotheses[$i]->{FDR_RATE} = $realHypotheses[$i]->{FDR_OBSERVATIONS} / ($i + 1);
+
+	    if ($realHypotheses[$i]->{FDR_RATE} > 1){
+
+		$realHypotheses[$i]->{FDR_RATE} = 1;
+
+	    }	    
+
+	}else{
+
+	    # a pvalue better than this wasn't observed in any
+	    # simulation - so the FDR should be 0
+
+	    $realHypotheses[$i]->{FDR_RATE} = 0;
+
+	    # and say that we never saw it
+
+	    $realHypotheses[$i]->{FDR_OBSERVATIONS} = 0;
+
+	}
+
+	# now based on the FDR, and the number of hypotheses that would
+	# be chosen at this point, we can calculate the expected number of
+	# false positives, as the FDR x the number of hypotheses
+
+	$realHypotheses[$i]->{EXPECTED_FALSE_POSITIVES} = $realHypotheses[$i]->{FDR_RATE} * ($i+1);	
+
+    }
+
+    # now restore the variables
+
+    $self->__restoreVariables($variables);
+
+    # finally we want to replace our real hypotheses with our local
+    # copy, as we've made some changes
+
+    $self->{$kPvalues} = \@realHypotheses;
+
+}
+
+############################################################################
+sub __addAnnotationsToPValues{
+############################################################################
+# This method looks through the annotated nodes, and adds in information
+# about which genes are annotated to them, so that the client can retrieve
+# that information.
+
+    my $self = shift;
+
+    # to do this, we can take advantage of the fact that all the
+    # nodes should have all their databaseIds cached, and we can
+    # retrieve them through the __allGOIDsForDatabaseId() method
+
+    # first go through the annotated nodes, and simply hash the goid to the
+    # entry in the pValues array
+
+    my %nodeToIndex;
+
+    for (my $i = 0; $i < @{$self->{$kPvalues}}; $i++){
+    
+	$nodeToIndex{$self->{$kPvalues}->[$i]->{NODE}->goid} = $i;
+
+    }
+
+    # now go through each databaseId, and add the information in
+
+    foreach my $databaseId ($self->genesDatabaseIds) {
+
+	# look at all goids for this database id
+
+	foreach my $goid (@{$self->__allGOIDsForDatabaseId($databaseId)}){
+	    
+	    next if (! exists $nodeToIndex{$goid}); # this node wasn't a hypothesis
+	    
+	    # if this goid was a hypothesis, we can annotate the
+	    # corresponding hypothesis with the gene
+
+	    $self->{$kPvalues}->[$nodeToIndex{$goid}]->{ANNOTATED_GENES}->{$databaseId} = $self->__origNameForDatabaseId($databaseId);
+	    
+	}
+
+    }
+
+}
+
+############################################################################
+sub __annotationProvider{
+############################################################################
+# This private method returns the annotationProvider that was used
+# during construction.
+
+    return $_[0]->{$kArgs}{annotationProvider};
+
+}
+
+############################################################################
+sub __ontologyProvider{
+############################################################################
+# This private methid returns the ontologyProvider that was used
+# during construction.
+
+    return $_[0]->{$kArgs}{ontologyProvider};
+
+}
+
+############################################################################
+sub aspect{
+############################################################################
+=pod
+
+=head2 aspect
+
+Returns the aspect with the the GO::TermFinder object was constructed.
+
+Usage:
+
+    my $aspect = $termFinder->aspect;
+
+=cut
+
+    return $_[0]->{$kArgs}{aspect};
+
+}
+
+1; # to make perl happy
+
+
+__END__
+
+#####################################################################
+#
+#  Additional POD Documentation from here on down
+#
+#####################################################################
+
+=pod
+
+=head1 Authors
+
+    Gavin Sherlock; sherlock@genome.stanford.edu
+    Elizabeth Boyle; ell@mit.edu
+    Ihab Awad; ihab@genome.stanford.edu
+
+=cut
diff --git a/www/lib/GO/TermFinderReport/Html.pm b/www/lib/GO/TermFinderReport/Html.pm
new file mode 100644
index 0000000..3d8c595
--- /dev/null
+++ b/www/lib/GO/TermFinderReport/Html.pm
@@ -0,0 +1,419 @@
+package GO::TermFinderReport::Html;
+
+=pod
+
+=head1 NAME
+
+GO::TermFinderReport::Html - prints an html table of the results of GO::TermFinder
+
+=head1 DESCRIPTION
+
+This print() method of this Perl module receives a reference to an the
+array that is the return value from the findTerms method of
+GO::TermFinder, the aspect for which terms were found, the number of
+genes that were used to generate the terms, and the number of genes
+that were said to be in the genome.  It will then generate an html
+table that summarizes those results.  Optionally, filehandle, p-value
+cutoff, gene URL, and GOID URL arguments may also be passed in.  Url
+links should have the string <REPLACE_THIS> to indicate where the gene
+name, or GOID should be put.
+
+=head1 SYNOPSIS
+
+    use GO::TermFinder;
+    use GO::TermFinderReport::Html;
+
+    .
+    .
+    .
+
+    my @pvalues = $termFinder->findTerms(genes=>\@genes);
+
+    my $report  = GO::TermFinderReport::Html->new();
+
+    open (HTML, ">blah.html");
+
+    print HTML "<html><body>";
+
+    my $numRows = $report->print(pvalues  => \@pvalues,
+                                 aspect   => $aspect,
+                                 numGenes => scalar(@genes),
+                                 totalNum => $totalNum,
+                                 fh       => \*HTML,
+                                 cutoff   => 0.01,
+                                 geneUrl  => 'https://www.yeastgenome.org/locus/<REPLACE_THIS>',
+                                 goidUrl  => 'https://www.yeastgenome.org/go/GO:0000916=<REPLACE_THIS>');
+
+    print HTML "</body></html>";
+
+    close HTML;
+
+=cut
+
+use strict;
+use warnings;
+use diagnostics;
+
+use vars qw ($VERSION);
+
+$VERSION = 0.12;
+
+use CGI qw/:all :html3/;
+
+######################################################################################
+sub new{
+######################################################################################
+
+=head2 new
+
+This is the constructor.
+
+Usage:
+
+    my $report = GO::TermFinderReport::Html->new();
+
+A GO::TermFinderReport::Html object is returned.
+
+=cut
+
+######################################################################################
+
+    my $self = {};
+
+    bless $self, shift;
+
+    return $self;
+
+}
+
+######################################################################################
+sub print{
+######################################################################################
+
+=head2 print
+
+This method prints out the report, in the form of an html table.  The
+table is ordered in ascending order of p-value (i.e. most significant
+first), and will print out the GO node, the frequency of use of that
+node within the selected group of genes, and the population as a
+whole, the corrected p-value of that, and a list of the genes
+annotated to that node.  If the FDR was calculated, the FDR will also
+be printed.  It returns the number of annotation rows in the table
+that exceed the provided p-value cutoff (which may even be zero, in
+which case nothing is printed).
+
+Usage:
+
+    my $numRows = $report->print(pvalues      => \@pvalues,
+				 aspect       => $aspect,   # P, C, or F
+				 numGenes     => scalar(@genes),
+				 totalNum     => $totalNum,
+				 fh           => \*HTML,
+				 pvalueCutOff => 0.01,
+				 geneUrl      => 'https://www.yeastgenome.org/locus/<REPLACE_THIS>',
+				 goidUrl      => 'https://www.yeastgenome.org/go/GO:0000916<REPLACE_THIS>');
+
+Required arguments:
+
+    pvalues   :  A reference to the array returned by the findTerms() method of GO::TermFinder
+
+    aspect    :  The aspect of the Gene Ontology for which terms were found (C, F or P)
+
+    numGenes  :  The number of genes that were in the list passed to the findTerms method
+
+    totalNum  :  The total number of genes that were indicated to be in the genome for finding terms.
+
+Optional arguments:
+
+
+    fh       : A reference to a file handle to which the table should be
+               printed.  Defaults to standard out.
+
+    pvalueCutOff   : The p-value cutoff, above which p-values and associated
+                     information will not be printed.  Default is no cutoff.
+
+    geneUrl  : A url to which you want genes linked.  Must contain the
+               text '<REPLACE_THIS>', which will be replaced with the
+               gene name.
+
+    goidUrl  : A url to which you want the GOIDs linked.  Must contain the
+               text '<REPLACE_THIS>', which will be replaced with the
+               goid.
+
+=cut
+
+######################################################################################
+
+    my ($self, %args) = @_;
+
+    if (!exists($args{'pvalues'})){
+
+	die "You must supply a pvalues argument to the print method.";
+
+    }
+
+    if (!exists($args{'aspect'})){
+
+	die "You must supply a aspect argument to the print method.";
+
+    }
+    
+
+    if (!exists $args{'numGenes'}){
+
+	die "You must supply a numGene argument to the print method.";
+
+    }
+
+    if (!exists $args{'totalNum'}){
+
+	die "You must supply a totalNum argument to the print method.";
+
+    }
+
+    my $pvalues  = $args{'pvalues'};
+    my $aspect   = $args{'aspect'};
+    my $numGenes = $args{'numGenes'};
+    my $totalNum = $args{'totalNum'};
+    my $fh       = $args{'fh'}           || \*STDOUT;
+    my $cutoff   = $args{'pvalueCutOff'} || 1;
+    my $geneUrl  = $args{'geneUrl'};
+    my $goidUrl  = $args{'goidUrl'};
+    my $annoFile = $args{'annoFile'};
+    my $correction = $args{'correction'};  # -- mark
+    $correction = "bonferroni" if (!$correction);
+    my $replacementText = "<REPLACE_THIS>";
+    my $replacementText2 = "<REPLACE_DBID>";  # added because some sites may not allow gene name - mark
+
+    my $rows;
+    my $numRows = 0;
+
+    my $hasFdr = 0;
+
+    foreach my $pvalue (@{$pvalues}){
+
+	# skip if above cutoff
+
+	# this is the same as the uncorrected PVALUE if no correction method used -- mark
+	next if ($pvalue->{CORRECTED_PVALUE} > $cutoff);
+
+	# now generate a list of loci annotated to this node, with
+	# links if requested
+
+	my @loci;
+	
+	foreach my $databaseId (keys %{$pvalue->{ANNOTATED_GENES}}){
+	    
+	    my $gene = $pvalue->{ANNOTATED_GENES}->{$databaseId};
+	    if (defined $geneUrl) {
+		
+		my $url = $geneUrl;
+
+		$url =~ s/$replacementText/$gene/;
+		$url =~ s/$replacementText2/$databaseId/;
+		$gene = a({-href=>$url,
+			   -target=>'infowin'}, $gene);
+
+	    }
+
+	    push (@loci, $gene);
+		
+	}
+
+	my $loci = join(", ", @loci);
+	
+	# now calculate the frequency for annotation for the list and the genome
+
+	my $frequencyPercent = sprintf("%.1f", ($pvalue->{NUM_ANNOTATIONS}/$numGenes) * 100);
+  
+#	my $frequency = $pvalue->{NUM_ANNOTATIONS}." out of $numGenes genes, $frequencyPercent\%";
+	my $frequency = $pvalue->{NUM_ANNOTATIONS}." of $numGenes genes, $frequencyPercent\%";
+
+        my $geneFrequencyPercent = sprintf("%.1f", ($pvalue->{TOTAL_NUM_ANNOTATIONS}/$totalNum) * 100);
+
+#	my $genomeFrequency = $pvalue->{TOTAL_NUM_ANNOTATIONS}." out of $totalNum genes, $geneFrequencyPercent\%";
+	my $genomeFrequency = $pvalue->{TOTAL_NUM_ANNOTATIONS}." of $totalNum genes, $geneFrequencyPercent\%";
+
+	# now format the p-value	
+
+	my $value = $pvalue->{CORRECTED_PVALUE};
+
+	# if it's in scientific notation, we want up to two of the decimal places
+
+	$value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/;
+	
+	# otherwise, we'll take up to five decimal places
+
+	$value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/;
+
+	if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){
+
+	    # simulations were used to generate the corrected p-value.
+	    # If we never saw anything better than this p-value in the
+	    # simulations, then prepend a less than sign to the
+	    # corrected p-value
+
+	    $value = "&lt;".$value;
+
+	}
+
+	# now deal with the GOID column
+
+	my $goColumn;
+
+	if (defined $goidUrl){
+
+	    my $url  = $goidUrl;
+	    my $goid = $pvalue->{NODE}->goid;
+
+	    $url =~ s/$replacementText/$goid/;
+
+	    # make link with name of term as the text
+
+	    $goColumn = a({-href=>$url,
+			   -target=>'infowin'}, $pvalue->{NODE}->term);
+
+	}else{
+
+	    # if no link, just use term, and parenthetical GOID
+
+	    $goColumn = $pvalue->{NODE}->term." (".$pvalue->{NODE}->goid.")";
+
+	}
+
+	# deal with FDR
+
+	my ($fdr, $falsePositives);
+
+	if (exists ($pvalue->{FDR_RATE})){
+
+	    $hasFdr = 1;
+
+	    $fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100);
+
+	    $falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES});
+
+	}
+
+	$rows .= $self->_oneRow($goColumn, $frequency,
+				$genomeFrequency, $value, $loci, $fdr,
+				$falsePositives);
+
+	$numRows++;
+
+    }
+
+    # print the table out, if there were any rows
+
+    $self->_printTable($fh, $rows, $aspect, $annoFile, $cutoff, $hasFdr, $args{evidenceCodes}, $args{evidenceCodesNot}, $correction) if ($numRows > 0);
+
+    return $numRows;
+
+}
+
+###################################################################
+sub _oneRow{
+###################################################################
+# This protected method simply returns a row from the html table,
+# based on what was passed in.
+
+    my ($self, $goColumn, $frequency, $genomeFrequency, $pvalue,
+	$loci, $fdr, $falsePositives) = @_;
+
+    my $row = td($goColumn).
+	      td($frequency).
+	      td($genomeFrequency).
+	      td($pvalue);
+
+    if (defined($fdr)){
+
+	$row .= td($fdr).td($falsePositives);
+
+    }
+	      td($loci);
+
+    $row .= td($loci);
+
+    return Tr($row);
+
+}
+
+###################################################################
+sub _printTable{
+###################################################################
+# This method prints out the actual html table
+
+    my ($self, $fh, $rows, $aspect, $annoFile, $cutoff, $hasFdr, $evidenceCodes, $evidenceCodesNot, $correction) = @_;
+
+    $aspect =~ s/^F/Function/i;
+    $aspect =~ s/^P/Process/i;
+    $aspect =~ s/^C/Component/i;
+
+    print $fh a({-name=>'table'});
+    print $fh center(h3("Result Table")), p;
+
+    print $fh "<table align=center border=1 cellpadding=2 width=400>";
+#    print $fh table({-align       => 'center',
+#		     -border      => 1,
+#		     -cellpadding => 2,
+#		     -width       => 400},
+    print $fh Tr(td({-bgcolor => '#FFCC99',
+		     -align   => 'center',
+		     -width   => '100%',
+		     -nowrap  => undef},
+#			  b("Terms from the $aspect Ontology with p-value as good or better than $cutoff"))));
+		    b("Terms from the $aspect Ontology ",$annoFile? "of $annoFile " : "", "with p-value <= $cutoff")));
+
+    if ($evidenceCodes) {
+	print $fh Tr(td({-bgcolor => '#CCBBAA',
+			 -align   => 'center',
+			 -width   => '100%',
+			 -nowrap  => undef},
+			b("Evidence code filter included: ".join(" ", sort { $a cmp $b } keys(%{$evidenceCodes})))));
+    } elsif ($evidenceCodesNot) {
+	print $fh Tr(td({-bgcolor => '#CCBBAA',
+			 -align   => 'center',
+			 -width   => '100%',
+			 -nowrap  => undef},
+			b("Evidence codes filtered out: ".join(" ", sort { $a cmp $b } keys(%{$evidenceCodesNot})))));
+    }
+
+    my $headings = th({-align => 'center'}, "Gene Ontology term").
+		   th({-align => 'center'}, "Cluster frequency").
+#		   th({-align => 'center'}, "Genome frequency of use").
+		   th({-align => 'center'}, "Genome frequency");
+    if ($correction ne "none") {  # change column header depending on whether correction is enabled -- mark
+	$headings .= th({-align => 'center'}, "Corrected<br>P-value");
+    } else {
+	$headings .= th({-align => 'center'}, "Uncorrected<br>P-value");
+    }
+
+
+    if ($hasFdr){
+
+	$headings .= th({-align => 'center'}, "FDR").
+	    th({-align => 'center'}, "False<br>Positives");
+
+    }
+
+    $headings .= th({-align => 'center'}, "Genes annotated to the term");
+
+    print $fh table({-align  => 'center',
+		     -border => 2},
+		    Tr({-bgcolor  => '#CCCCFF'},
+		       $headings).
+		    $rows), p;
+
+}
+    
+1; # to keep Perl happy
+
+=pod
+
+=head1 AUTHOR
+
+Gavin Sherlock
+
+sherlock@genome.stanford.edu
+
+=cut
diff --git a/www/lib/GO/TermFinderReport/Text.pm b/www/lib/GO/TermFinderReport/Text.pm
new file mode 100644
index 0000000..a3ec0f4
--- /dev/null
+++ b/www/lib/GO/TermFinderReport/Text.pm
@@ -0,0 +1,283 @@
+package GO::TermFinderReport::Text;
+
+=pod
+
+=head1 NAME
+
+GO::TermFinderReport::Text - prints results of GO::TermFinder as a text report
+
+=head1 DESCRIPTION
+
+This print() method of this Perl module receives a reference to an the
+array that is the return value from the findTerms method of
+GO::TermFinder, the number of genes that were used to generate the
+terms, and the number of genes that were said to be in the genome.  It
+will then generate a text report that summarizes those results.
+Optionally, filehandle and p-value cutoff arguments may also be passed
+in.  It will return the 
+
+=head1 SYNOPSIS
+
+    use GO::TermFinder;
+    use GO::TermFinderReport::Text;
+
+    .
+    .
+    .
+
+    my @pvalues = $termFinder->findTerms(genes=>\@genes);
+
+    my $report  = GO::TermFinderReport::Text->new();
+
+    open (OUT, ">report.text");
+
+    my $numHypotheses = $report->print(pvalues  => \@pvalues,
+                                       aspect   => $aspect,
+                                       numGenes => scalar(@genes),
+                                       totalNum => $totalNum,
+                                       cutoff   => 0.01,
+                                       fh       => \*OUT);
+
+    close OUT;
+
+=cut
+
+use strict;
+use warnings;
+use diagnostics;
+
+use vars qw ($VERSION);
+
+$VERSION = 0.10;
+
+######################################################################################
+sub new{
+######################################################################################
+
+=head2 new
+
+This is the constructor.
+
+Usage:
+
+    my $report = GO::TermFinderReport::Text->new();
+
+A GO::TermFinderReport::Text object is returned.
+
+=cut
+
+######################################################################################
+
+    my $self = {};
+
+    bless $self, shift;
+
+    return $self;
+
+}
+
+######################################################################################
+sub print{
+######################################################################################
+
+=head2 print
+
+This method prints out the text report of the passed in hypotheses.
+The report is ordered in ascending order of p-value (i.e. most
+significant first).  If the FDR was calculated, the FDR will also be
+printed.  It returns the number of hypotheses that had corrected
+p-values as good or better than the passed in cutoff.
+
+Usage:
+
+    my $numHypotheses = $report->print(pvalues  => \@pvalues,
+				       numGenes => scalar(@genes),
+				       totalNum => $totalNum,
+				       cutoff   => 0.01,
+				       fh       => \*OUT,
+                                       table    => 0 );
+
+Required arguments:
+
+pvalues  : A reference to the array returned by the findTerms() method
+           of GO::TermFinder
+
+numGenes : The number of genes that were in the list passed to the
+           findTerms method
+
+totalNum : The total number of genes that were indicated to be in the
+           genome for finding terms.
+
+Optional arguments:
+
+fh       : A reference to a file handle to which the table should be
+           printed.  Defaults to standard out.
+
+cutoff   : The p-value cutoff, above which p-values and associated
+           information will not be printed.  Default is no cutoff.
+
+table    : 0 for standard output, 1 for tab delimited table.  Default is 0
+
+=cut
+
+######################################################################################
+
+    my ($self, %args) = @_;
+
+    if (!exists($args{'pvalues'})){
+
+	die "You must supply a pvalues argument to the print method.";
+
+    }
+
+    if (!exists $args{'numGenes'}){
+
+	die "You must supply a numGene argument to the print method.";
+
+    }
+
+    if (!exists $args{'totalNum'}){
+
+	die "You must supply a totalNum argument to the print method.";
+
+    }
+
+    my $pvalues  = $args{'pvalues'};
+    my $numGenes = $args{'numGenes'};
+    my $totalNum = $args{'totalNum'};
+    my $fh       = $args{'fh'}     || \*STDOUT;
+    my $cutoff   = $args{'cutoff'} || 1;
+    my $table    = $args{'table'}  || 0;
+
+    my $correction = $args{'correction'};   # -- mark
+    $correction = "bonferroni" if (!$correction);
+
+    my $rows;
+    my $numRows = 0;
+
+    my $hasFdr = 0;
+
+    my $hypothesis = 1;
+
+    my @header = ("GOID", "TERM", "CORRECTED_PVALUE",
+		  "UNCORRECTED_PVALUE", "NUM_LIST_ANNOTATIONS",
+		  "LIST_SIZE", "TOTAL_NUM_ANNOTATIONS",
+		  "POPULATION_SIZE", "FDR_RATE",
+		  "EXPECTED_FALSE_POSITIVES", "ANNOTATED_GENES");
+
+    print $fh join("\t", @header), "\n" if ($table);
+
+    foreach my $pvalue (@{$pvalues}){
+
+	# skip if above cutoff
+
+	next if ($pvalue->{CORRECTED_PVALUE} > $cutoff);
+	
+	# now format the p-value	
+
+	my $value = $pvalue->{CORRECTED_PVALUE};
+
+	# if it's in scientific notation, we want up to two of the decimal places
+
+	$value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/;
+	
+	# otherwise, we'll take up to five decimal places
+
+	$value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/;
+
+	if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){
+
+	    # simulations were used to generate the corrected p-value.
+	    # If we never saw anything better than this p-value in the
+	    # simulations, then prepend a less than sign to the
+	    # corrected p-value
+
+	    $value = "<".$value;
+
+	}
+	
+	if (!$table){
+
+	    print $fh 
+
+		"-- $hypothesis of ", scalar @{$pvalues}, " --\n",
+		"GOID\t", $pvalue->{NODE}->goid, "\n",
+		"TERM\t", $pvalue->{NODE}->term, "\n",
+		($correction ne "none")? "CORRECTED P-VALUE\t".$pvalue->{CORRECTED_PVALUE}."\n" : "",  # -- mark
+		"UNCORRECTED P-VALUE\t", $pvalue->{PVALUE}, "\n";
+
+	}else{
+
+	    print $fh join("\t", ($pvalue->{NODE}->goid, 
+				  $pvalue->{NODE}->term,
+				  ($correction ne "none")? $pvalue->{CORRECTED_PVALUE} : "-",
+				  $pvalue->{PVALUE},
+				  $pvalue->{NUM_ANNOTATIONS},
+				  $numGenes,
+				  $pvalue->{TOTAL_NUM_ANNOTATIONS},
+				  $totalNum)), "\t";
+
+	}
+	
+	# deal with FDR
+
+	my ($fdr, $falsePositives);
+
+	if (exists ($pvalue->{FDR_RATE})){
+
+	    $fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100);
+
+	    $falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES});
+
+	    if(!$table){
+
+		print $fh 
+
+		    "FDR_RATE\t", $fdr, "\n",
+		    "EXPECTED_FALSE_POSITIVES\t", $falsePositives, "\n";
+
+	    }else{
+
+	      print $fh $fdr, "\t", $falsePositives, "\t";
+
+	    }
+
+	}else{
+
+	    print $fh "\t\t" if ($table); # Gotta fill in the blanks
+
+	}
+
+	if (!$table){
+	
+	    print $fh "NUM_ANNOTATIONS\t"; 
+	    print $fh $pvalue->{NUM_ANNOTATIONS};
+	    print $fh " of $numGenes in the list, vs ";
+	    print $fh $pvalue->{TOTAL_NUM_ANNOTATIONS};
+	    print $fh " of $totalNum in the genome\n";
+	    print $fh "The genes annotated to this node are:\n";;
+
+	}
+
+	print $fh join(", ", values(%{$pvalue->{ANNOTATED_GENES}})), "\n";
+	print $fh "\n" if (!$table);
+	
+	$hypothesis++;
+	
+    }
+
+    return ($hypothesis - 1);
+
+}
+    
+1; # to keep Perl happy
+
+=pod
+
+=head1 AUTHOR
+
+Gavin Sherlock
+
+sherlock@genome.stanford.edu
+
+=cut
diff --git a/www/lib/GO/Utils/General.pm b/www/lib/GO/Utils/General.pm
new file mode 100644
index 0000000..b347046
--- /dev/null
+++ b/www/lib/GO/Utils/General.pm
@@ -0,0 +1,112 @@
+package GO::Utils::General;
+
+=head1 NAME
+
+GO::Utils::General - provides some general utilities for clients of other GO classes
+
+=head1 SYNOPSIS
+
+  use GO::Utils::General qw (CategorizeGenes);
+
+  CategorizeGenes(annotation  => $annotation,
+		  genes       => \@genes,
+		  ambiguous   => \@ambiguous,
+		  unambiguous => \@unambiguous,
+		  notFound    => \@notFound);
+
+=cut
+
+use strict;
+use warnings;
+use diagnostics;
+
+use vars qw (@ISA @EXPORT_OK $VERSION);
+use Exporter;
+@ISA = ('Exporter');
+@EXPORT_OK = qw (CategorizeGenes);
+
+$VERSION = 0.11;
+
+my @kRequiredArgs = qw (annotation genes ambiguous unambiguous notFound);
+
+##########################################################################
+sub CategorizeGenes{
+##########################################################################
+
+=head2 CategorizeGenes
+
+CategorizeGenes categorizes a set of genes into three categories, whether they
+are ambiguous, whether they are not found, or whether they are unambiguous.  The 
+category to which they belong is determined by using an annotation provider.
+
+Usage:
+
+  use GO::Utils::General qw (CategorizeGenes);
+
+  CategorizeGenes(annotation  => $annotation,
+		  genes       => \@genes,
+		  ambiguous   => \@ambiguous,
+		  unambiguous => \@unambiguous,
+		  notFound    => \@notFound);
+
+All the above named arguments are required:
+
+annotation : A GO::AnnotationProvider concrete subclass instance
+genes      : A reference to an array of gene names
+
+The remaining arguments should be empty arrays, passed in by reference, that
+will be populated by this function.
+
+=cut
+
+##########################################################################
+
+    my (%args) = @_;
+
+    foreach my $arg (@kRequiredArgs){
+
+	if (!exists $args{$arg} || !defined $args{$arg}){
+
+	    die "You must provide a $arg argument to CategorizeGenes";
+
+	}
+
+    }
+
+    my $annotation     = $args{'annotation'};
+    my $genesRef       = $args{'genes'};
+    my $ambiguousRef   = $args{'ambiguous'};
+    my $unambiguousRef = $args{'unambiguous'};
+    my $notFoundRef    = $args{'notFound'};
+
+    foreach my $gene (@{$genesRef}){
+
+	if ($annotation->nameIsAmbiguous($gene)){
+
+	    if ($annotation->nameIsStandardName($gene)) {
+		if ($annotation->databaseIdByStandardName($gene)) {
+		    push(@{$unambiguousRef}, $gene);
+		    next;
+		}
+	    }
+	    push(@{$ambiguousRef}, $gene);
+	    next;
+	}
+
+	my $name = $annotation->standardNameByName($gene);
+
+	if (defined $name){
+
+	    push(@{$unambiguousRef}, $gene);
+
+	}else{
+
+	    push(@{$notFoundRef}, $gene);
+
+	}
+
+    }
+
+}
+
+1; # keep Perl happy
diff --git a/www/lib/GO/View.pm b/www/lib/GO/View.pm
new file mode 100644
index 0000000..01c23aa
--- /dev/null
+++ b/www/lib/GO/View.pm
@@ -0,0 +1,2809 @@
+package GO::View;
+
+#########################################################################
+# Module Name  :  View.pm
+#
+# Date created :  Oct. 2003
+# 
+# Cared for by Shuai Weng <shuai@genome.stanford.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#########################################################################
+
+# POD documentation - main docs before the code
+
+# TODO
+
+# have a much better options handling set of code, probably based on Getopt::Long
+# see http://www.perl.com/pub/a/2007/07/12/options-and-configuration.html
+
+=pod
+
+=head1 NAME
+
+GO::View - Creates a gif or png image for visualizing the GO DAG
+
+
+=head1 DESCRIPTION
+
+This perl module generates a graphic that displays the parent and child 
+relationships of a selected GO term. It also provides the visualization 
+for the GO::TermFinder perl module created by the Stanford Microarray 
+Database (SMD). This module is useful when analyzing experimental or 
+computational results that produce a set of gene products that may have 
+a common function or process.
+
+=head1 SYNOPSIS
+
+    use GO::View;
+
+    my $goView = 
+
+       GO::View->new(-goid               => $goid,
+		     -ontologyProvider   => $ontology,
+		     -annotationProvider => $annotation,
+		     -termFinder         => \@pvalues,
+		     -aspect             => 'P',
+		     -configFile         => $confFile,
+		     -imageDir           => "/tmp",
+		     -imageUrlRoot       => "http://www.ABC.com/tmp",
+		     -imageName          => "GOview.88.png",
+		     -tree               => 'up',
+		     -nodeUrl            => $goUrl,
+                     -geneUrl            => $geneUrl,
+		     -pvalueCutOff       => '0.01',
+		     -imageLabel         => "SGD");
+				  
+
+    argument              required             expect data and type
+    -------------------------------------------------------------------------
+    -goid                 No          A gene ontology ID (GOID).
+                                      If nothing is passed in, the module 
+                                      will use the top goid of each ontology 
+                                      branch (i.e, goid for 
+				      molecular_function, biological_process,
+				      or cellular_component)
+
+    -ontologyProvider	  Yes         An ontology provider instance.
+
+    -annotationProvider   No          An annotation provider instance. It is
+                                      required for creating tree for GO Term
+                                      Finder result.
+    
+    -termFinder           No          An array of hash references returned 
+                                      from 'findTerms' method of 
+                                      GO::TermFinder module. It is required
+                                      for creating tree for GO Term Finder 
+                                      result. 
+
+    -aspect               No          <P|C|F>. The aspect of the ontology 
+                                      provider. It is required for creating 
+                                      tree for GO Term Finder result.
+    
+    -configFile           Yes         The configuration file for setting the
+                                      general variables for the graphic 
+                                      display. 
+				  
+    -imageDir             Yes         The directory for storing the newly 
+                                      created image file. It must be 
+                                      world (nobody) readable and writable
+                                      if you want to display the image to 
+                                      the web.
+ 
+    -imageUrlRoot         No          The url root for the -imageDir. It is
+                                      required if you want to display the
+                                      image to the web.
+
+    -imageName            No          The image file name. By default, the 
+                                      name will be something like 
+                                      'GOview.xxxx.png'. The 'xxxx' will be
+                                      the process id.  A suffix for the image (.png
+                                      or .gif) should not be provided, as that will
+                                      be determined at run time, depending on the
+                                      capabilities of the GD library.
+
+    -treeType             No          <up|down>. The tree type. 
+                                      
+                                      1. up   => display the ancestor tree 
+                                                 for the given goid.
+                                      2. down => display the descendant tree
+                                                 for the given goid.
+                                      By default, it will display the 
+                                      descendant tree.
+
+    -geneUrl              No          The URL for each Gene to link to.
+                                      It needs to have the text <REPLACE_THIS> in 
+                                      the url which will be substituted 
+                                      by the real goid for a node.
+
+    -nodeUrl              No          The url for each GO node to link to.
+                                      It needs to have the text <REPLACE_THIS> in 
+                                      the url which will be substituted 
+                                      by the real goid for a node.
+
+    -pvalueCutOff         No          The p-value cutoff for displaying
+                                      the graphic for GO Term Finder. 
+                                      The default is 0.01
+
+    -imageLabel           No          The image label which will appear at
+                                      the left bottom corner of the map.
+
+    -maxTopNodeToShow     No          This argument is used to limit the
+                                      amount of the graph that might be
+                                      shown, for the sake of reducing run-
+                                      time.  The default is 6.
+
+    ------------------------------------------------------------------------
+
+    To display the image on the web:
+
+         $goView->showGraph;
+    
+    To create and return image file name with full path:
+    
+         my $imageFile = $goView->createImage;
+
+
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Bug reports can be submitted via email 
+
+  shuai@genome.stanford.edu
+
+=head1 AUTHOR
+
+Shuai Weng, shuai@genome.stanford.edu
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Stanford University. All Rights Reserved.
+This module is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the public methods.
+
+=cut
+
+use strict;
+use warnings;
+use GD;
+use GD::Polyline;   # we use polylines for edges -- mark
+use GraphViz;
+use IO::File;
+
+use GO::View::GD;
+
+use vars qw ($PACKAGE $VERSION);
+
+$PACKAGE = 'GO::View';
+$VERSION = 0.15;
+
+my $kReplacementText = "<REPLACE_THIS>";
+
+#########################################################################
+
+=head1 METHODS
+
+=cut
+
+#########################################################################
+sub new {
+#########################################################################
+
+=head2 new
+
+ Title   : new
+ Function: Initializes the GO::View object. 
+         : Recognized named parameters are -goid, -ontologyProvider,
+           -annotationProvider, -termFinder, -aspect, -configFile, 
+           -imageDir, -imageUrlRoot, -imageName, -treeType, -nodeUrl, 
+           -imageLabel
+ Returns : a new object
+ Args    : named parameters
+
+=cut
+
+#########################################################################
+    my ($class, %args) = @_;
+
+    my $self = bless {}, $class;
+
+    $self->_init(%args);
+
+    return $self;
+
+}
+
+#################################################################
+sub graph {
+#################################################################
+
+=head2 graph
+
+ Title   : graph
+ Usage   : my $graph = $goView->graph;
+ Function: Gets the newly created Graphviz instance.   
+ Returns : a new Graphviz instance.
+ 
+=cut
+
+################################################################
+ 
+    return $_[0]->{GRAPH};
+
+}
+
+################################################################
+sub showGraph {
+################################################################
+
+=head2 showGraph
+
+ Title   : showGraph
+ Usage   : $goView->showGraph;
+ Function: Creates the image and print the map image to a file.  
+ Returns : the name of the file to which the image was written
+ Throws  : Exception if the imageUrlRoot is not passed to the object.  
+
+=cut
+ 
+#########################################################################
+    my ($self) = @_;
+
+    if ($self->graph) {
+
+	$self->_createAndShowImage;
+    
+    }
+
+    return $self->{IMAGE_FILE};
+
+}
+
+#########################################################################
+sub imageFile {
+#########################################################################
+
+=head2 imageFile
+
+ Title   : imageFile
+ Usage   : my $imageFile = $goView->imageFile;
+ Function: Gets the newly created image file name (with full path).  
+ Returns : image file name.
+
+=cut
+    
+#########################################################################
+
+    my ($self) = @_;
+
+    return $self->{IMAGE_FILE};
+
+
+}
+
+################################################################
+sub createImage {
+################################################################
+
+=head2 createImage
+
+ Title   : createImage
+ Usage   : $goView->createImage; 
+ Function: Creates the GO tree image file. Calls it only if you 
+           want to create the image file only and do not want to
+           display the image.  
+ Returns : The newly created image file name with full path.
+
+=cut
+    
+#########################################################################
+    
+    my ($self) = @_;
+
+    if ($self->graph) {
+
+	$self->{CREATE_IMAGE_ONLY} = 1;
+
+	return $self->_createAndShowImage;
+	
+    }
+
+}
+
+################################################################
+sub imageMap{
+################################################################
+
+=head2 imageMap
+
+ Title    : imageMap
+ Usage    : my $map = $goView->imageMap;
+ Function : returns the text that constitutes an image map for the
+            created image.
+ Returns  : a string
+
+=cut
+
+#########################################################################
+
+    return $_[0]->{IMAGE_MAP};
+
+}
+
+################################################################
+sub _goid {
+################################################################
+#
+# =head2 _goid 
+#
+#  Title   : _goid
+#  Usage   : my $goid = $self->_goid;
+#  Function: Gets the goid that client interface passed in.
+#  Returns : GOID
+#  Args    : none    
+# 
+# =cut
+# 
+#########################################################################
+
+    my ($self) = @_;
+
+    return $self->{GOID};
+
+}
+
+#########################################################################
+sub _ontologyProvider {
+#########################################################################
+#
+# =head2 _ontologyProvider 
+#
+#  Title   : _ontologyProvider
+#  Usage   : my $ontology = $self->_ontologyProvider;
+#  Function: Gets the ontology provider instance which is passed in by 
+#            client interface.
+#  Returns : ontology provider instance 
+#  Args    : none    
+# 
+# =cut
+# 
+#########################################################################
+
+    my ($self) = @_;
+
+    return $self->{ONTOLOGY};
+
+}
+
+#########################################################################
+sub _annotationProvider {
+#########################################################################
+#
+# =head2 _annotationProvider 
+#
+#  Title   : _annotationProvider
+#  Usage   : my $annotation = $self->_annotationProvider;
+#  Function: Gets the annotation provider instance which is passed in by 
+#            client interface.
+#  Returns : annotation provider instance 
+#  Args    : none    
+# 
+# =cut
+# 
+#########################################################################
+
+    my ($self) = @_;
+
+    return $self->{ANNOTATION};
+
+}
+
+#########################################################################
+sub _termFinder {
+#########################################################################
+#
+# =head2 _termFinder 
+#
+#  Title   : _termFinder
+#  Usage   : my $termFinder = $self->_termFinderProvider;
+#  Function: Gets the term finder result arrayref which is passed in by 
+#            client interface.
+#  Returns : term finder result arrayref 
+#  Args    : none    
+# 
+# =cut
+# 
+#########################################################################
+
+    my ($self) = @_;
+
+    return $self->{TERM_FINDER};
+
+}
+
+#########################################################################
+sub _init {
+#########################################################################
+#
+# =head2 _init
+#
+# Title   : _init
+# Usage   : n/a; automatically called by new()
+# Function: Initializes the variables required for creating the map. 
+# Returns : void 
+# Args    : n/a
+# Throws  : Exception if ontology provider instance or tmp image 
+#           directory for storing the image file are not passed 
+#           to this object.
+#
+# =cut
+#
+#########################################################################
+
+    my ($self, %args) = @_;
+
+    # first do some sanity checks
+
+    if (!$args{-ontologyProvider} || !$args{-configFile} || 
+	!$args{-imageDir}) {
+
+	die "Can't build a $PACKAGE object without the ontologyProvider, configuration file, and tmp image directory name for storing the image file.";
+		
+    }
+
+    $self->{ONTOLOGY} = $args{-ontologyProvider};
+
+    if ($args{-goid} && $args{-goid} !~ /^[0-9]+$/ && 
+	$args{-goid} !~ /^GO:[0-9]+$/) {
+
+	die "The goid ($args{-goid}) passed to $PACKAGE is is not in a recognized format, such as GO:0000166.";
+
+    }
+	
+    my $goid = $args{-goid};
+
+    # fix format of the GOID, so it is padded preceded with GO: and properly padded
+
+    if ($goid && $goid !~ /^GO:[0-9]+$/) { 
+
+	$goid = $self->_formatGoid($goid); 
+
+    }
+
+    # if we have no goid passed in, then we're likely creating an
+    # image based upon the output from GO::TermFinder.  In this case
+    # we just set the goid to the node corresponding to the aspect
+    # that is being dealt with
+
+    if (!$goid) {
+
+	# set top goid for the given ontology (molecular_function, 
+	# biological_process, or cellular_component).
+
+	$goid = $self->_initGoidFromOntology; 
+
+    }
+
+    # now store the goid
+
+    $self->{GOID} = $goid;
+
+    # work out the image name and url - note that they will both
+    # receive a suffix later on that indicates the type of image that
+    # has been output (png or gif)
+
+    $self->{IMAGE_DIR} = $args{-imageDir};
+
+    if ($self->{IMAGE_DIR} !~ /\/$/) {  
+
+	$self->{IMAGE_DIR} .= "/"; 
+
+    }
+
+    my $suffix;
+
+    if (GD::Image->can('png')){
+
+	$suffix = 'png';
+
+    }else{
+
+	$suffix = 'gif';
+
+    }
+
+    my $imageName;
+
+    if (exists $args{-imageName} && defined $args{-imageName} && $args{-imageName} ne ""){
+
+	$imageName = $args{-imageName};
+
+    }else{
+
+	my $id = $$;
+
+	# now keep incrementing $id, until the name
+	# doesn't clash with an existing file
+
+	while (-e $self->{IMAGE_DIR}."GOview.$id.$suffix"){
+
+	    $id++;
+     
+	}
+
+	$imageName = "GOview.$id";
+
+    }
+
+    $imageName .= ".$suffix";
+
+    $self->{IMAGE_FILE} = $self->{IMAGE_DIR}.$imageName;
+
+    if ($args{-imageUrlRoot}) {
+	
+	$self->{IMAGE_URL} = $args{-imageUrlRoot};
+ 
+	if ($self->{IMAGE_URL} !~ /\/$/) { $self->{IMAGE_URL} .= "/"; }
+
+	$self->{IMAGE_URL} .= $imageName;
+
+    }else{
+
+	# if we didn't get a root url, we just assume that the image will
+	# be in the same directory as the image
+
+	$self->{IMAGE_URL} = "./".$imageName;
+
+    }
+
+    # now set the TREE_TYPE, which can be up or down.
+
+    $self->{TREE_TYPE} = $args{-treeType} || 'down';
+
+    my $count;
+
+    $self->{MAX_TOP_NODE_TO_SHOW} = $args{-maxTopNodeToShow} || 6;
+
+    if ($args{-annotationProvider} && $args{-termFinder} && 
+	$args{-aspect}) {
+
+	# we're dealing with GO::TermFinder output, so we'll
+	# store and initialize all the information we need
+
+	$self->{PVALUE_CUTOFF} = $args{-pvalueCutOff} || 0.01;
+
+        #LINDA'S ADDITION: 
+#        $self->{MAX_TOP_NODE_TO_SHOW} = $args{-maxTopNodeToShow};
+        $self->{MAX_CHILD_GOID_NUM} = $args{-maxChildGoidNum};
+
+	$self->{ANNOTATION} = $args{-annotationProvider};
+
+	$self->{TERM_FINDER} = $args{-termFinder};
+
+	$self->{ASPECT} = $args{-aspect};
+
+	$count = $self->_initPvaluesGeneNamesDescendantGoids;
+
+    }elsif ($args{-annotationProvider} || $args{-termFinder} || 
+	    $args{-aspect}) {
+
+	die "You have to pass annotation provider and term finder instances and GO aspect ([P|F|C]) to $PACKAGE if you want to display graphic for term finder result.";
+
+    }
+  
+    # store some more variables    
+
+    $self->{IMAGE_LABEL} = $args{-imageLabel};
+
+    $self->{GENE_URL} = $args{-geneUrl};
+
+    $self->{GO_URL} = $args{-nodeUrl};
+
+    $self->_initVariablesFromConfigFile($args{-configFile});
+
+    # only make the graph if we had at least one node passing our p value cutoff
+
+    $self->_createGraph if ($count > 0);
+
+    # support these new options -- mark
+    $self->{MAKE_PS} = $args{-makePS};
+    $self->{MAKE_SVG} = $args{-makeSVG};
+
+}
+
+################################################################
+sub _createGraph {
+################################################################
+#
+# =head2 _createGraph
+#
+# Title   : _createGraph
+# Usage   : $self->_createGraph; 
+#           automatically called by _init()
+# Function: To create the GraphViz instance and add each descendant/ 
+#           ancestor goid into the Graph tree.
+# Returns : newly created GraphViz instance.
+#
+# =cut
+#
+#########################################################################
+
+    my ($self) = @_;
+
+    # If client does not ask for up (ancestor) tree and this is not
+    # for the special tree paths client asked for the given goid to
+    # its specified descendants (i.e. for GO Term Finder), then we
+    # need to determine how many generations of the descendants we can
+    # display.
+
+    if ($self->{TREE_TYPE} !~ /up/i && 
+	!$self->{DESCENDANT_GOID_ARRAY_REF}) {
+	
+	$self->_setGeneration($self->_goid);
+
+    }
+
+    # If this is for the ancestor tree, we need to determine up to
+    # which ancestor we want to display the tree paths. We will
+    # display tree path up to $self->{TOP_GOID}
+
+    if ($self->{TREE_TYPE} =~ /up/i) {
+
+	$self->_setTopGoid;
+
+	if (!$self->{TOP_GOID}) { return; }
+	    
+	$self->{NODE_NUM} = $self->_descendantCount($self->{TOP_GOID},
+						    $self->{GENERATION});
+
+    }
+    
+    my $goid;
+
+    if ($self->{TOP_GOID}) {
+
+	$goid = $self->{TOP_GOID};
+
+    }else {
+
+	$goid = $self->_goid;
+
+    }
+
+    $self->_createGraphObject;
+
+    my %foundNode;
+    my %foundEdge;
+    
+    # add the top node to the graph
+
+    $self->_addNode($goid,
+		    fillcolor => $self->_colorForNode($goid),
+		    color     => $self->_colorForNode($goid));
+
+    # and record that we've seen it
+
+    $foundNode{$goid}++;
+
+    # draw go_path for ancestor goid ($self->{GOID}) to each
+    # descendant goid in @{$self->{DESCENDANT_GOID_ARRAY_REF}}.  The
+    # DESCENDANT_GOID_ARRAY_REF is only created if we were dealing
+    # with output from GO::TermFinder
+
+    if ($self->{DESCENDANT_GOID_ARRAY_REF}) {
+	
+	# get the top node
+
+	my $topAncestorNode = $self->_ontologyProvider->nodeFromId($self->_goid);
+#	my $topAncestorNode = $self->_ontologyProvider->nodeFromId($goid);  #!?
+
+	# and record its term
+
+	$self->{TERM_HASH_REF_FOR_GOID}{$self->_goid} = $topAncestorNode->term;
+
+	my $i;
+
+	# now go through every GO Node to which our list of genes is
+	# directly annotated that contibuted to the gene count of a
+	# node that passed the cutoff
+
+	foreach my $goid (@{$self->{DESCENDANT_GOID_ARRAY_REF}}) {
+
+	    my $childNode = $self->_ontologyProvider->nodeFromId($goid);
+
+	    # get the list of paths that link from this node up to the
+	    # top - the first node in each path is the root, and the
+	    # final node is the immediate parent of $childNode
+
+	    my @path = $childNode->pathsToAncestor($topAncestorNode);
+
+	    # now add that path to the graph
+
+	    my $found = $self->_addAncestorPathToGraph($childNode, 
+						       \@path, 
+						       \%foundNode, 
+						       \%foundEdge); 
+
+	    # we can skip to the next goid if no new nodes were added
+	    # to the graph
+
+	    next if (!$found);
+
+	    $i++;
+
+	    # now add the genes that are annotated to this node
+
+	    if ($self->{GENE_NAME_HASH_REF_FOR_GOID} &&
+		$self->{GENE_NAME_HASH_REF_FOR_GOID}->{$goid}) {
+
+		my $loci = $self->{GENE_NAME_HASH_REF_FOR_GOID}->{$goid};
+
+		$loci =~ s/:/ /g;
+
+		$self->_addNode($loci,
+				fillcolor => 'grey65',
+				color     => 'grey65',
+				fontcolor => 'blue');
+
+		$self->_addEdge($goid, $loci);
+
+	    }
+
+	}
+
+	return;
+
+    } 
+
+    # draw part of the tree, and it can go up and down the tree.
+
+    # draw up tree and only show ancestors in the paths from the given
+    # goid to the ancestor goid $self->{TOP_GOID} since there are too
+    # many nodes...
+
+    if ($self->{TREE_TYPE} =~ /up/i && 
+	$self->{NODE_NUM} > $self->{MAX_NODE}) {
+	
+	my $childNode = $self->_ontologyProvider->nodeFromId($self->_goid);
+
+	my $topAncestorNode = $self->_ontologyProvider->nodeFromId($goid);
+
+	my @path = $childNode->pathsToAncestor($topAncestorNode);
+
+	$self->_addAncestorPathToGraph($childNode, 
+				       \@path, 
+				       \%foundNode, 
+				       \%foundEdge); 
+
+	return;
+
+    }
+		
+    ##### draw down tree
+
+    my $node = $self->_ontologyProvider->nodeFromId($goid);
+
+    $self->_addChildOfTheNodeToGraph($node, 
+				     \%foundNode,
+				     \%foundEdge);
+
+}
+
+################################################################
+sub _addChildOfTheNodeToGraph {
+################################################################
+#
+# =head2 _addChildOfTheNodeToGraph
+#
+# Title   : _addChildOfTheNodeToGraph
+# Usage   : $self->_addChildOfTheNodeToGraph($node, 
+#                                            $foundNodeHashRef,
+#                                            $foundEdgeHashRef);
+#           
+# Function: To add each unique descendant of the given node to the 
+#           graph tree.
+# Returns : void
+#
+# =cut
+#
+#########################################################################
+
+# This is only called when we are dealing with a 'down' tree.  It is
+# not called when dealing with GO::TermFinder output
+
+    my ($self, $node, $foundNodeHashRef, $foundEdgeHashRef,
+	$generation) = @_;
+
+    if (!$generation) { $generation = 1; }
+    
+    my @childNodes = $node->childNodes;
+
+    foreach my $childNode ($node->childNodes) {
+
+	my $parentGoid = $node->goid;
+
+	my $childGoid = $childNode->goid;
+
+	if (!$foundNodeHashRef->{$parentGoid}) {
+
+	    $self->_addNode($parentGoid);
+
+	    $foundNodeHashRef->{$parentGoid}++;
+
+	}
+
+	if (!$foundNodeHashRef->{$childGoid}) {
+
+	    $self->_addNode($childGoid);
+
+	    $foundNodeHashRef->{$childGoid}++;
+
+	}
+	if (!$foundEdgeHashRef->{$parentGoid."::".$childGoid}) {
+
+	    $self->_addEdge($parentGoid, $childGoid);
+
+	    $foundEdgeHashRef->{$parentGoid."::".$childGoid}++;
+
+        }
+
+	if ($generation < $self->{GENERATION}) {
+
+	    $self->_addChildOfTheNodeToGraph($childNode, 
+					     $foundNodeHashRef, 
+					     $foundEdgeHashRef,
+					     $generation++);
+	}
+
+    }
+
+}
+
+################################################################
+sub _addAncestorPathToGraph {
+################################################################
+#
+# =head2 _addAncestorPathToGraph
+#
+# Title   : _addAncestorPathToGraph
+# Usage   : $self->_addAncestorPathToGraph($node,
+#                                          $ancestorPathArrayRef, 
+#                                          $foundNodeHashRef,
+#                                          $foundEdgeHashRef);
+#           
+# Function: To add each unique ancestor of the given node to the 
+#           graph tree.
+# Returns : void
+#
+# =cut
+#
+#########################################################################
+
+# This is only called when we are dealing with an 'up' tree, or when
+# dealing with GO::TermFinder output
+
+    my ($self, $childNode, $ancestorPathArrayRef, 
+	$foundNodeHashRef, $foundEdgeHashRef) = @_; 
+
+    my $found;
+
+    # go through each path back to the root
+
+    foreach my $ancestorNodeArrayRef (@{$ancestorPathArrayRef}) {
+	
+	# add the child node to the path, so it gets included too
+		
+	push(@{$ancestorNodeArrayRef}, $childNode);
+
+	# reverse the order, so that we have the child node first, and
+	# the root last
+
+	@{$ancestorNodeArrayRef} = reverse(@{$ancestorNodeArrayRef});
+
+	# now go through the path
+
+	for (my $i = 0; $i < @{$ancestorNodeArrayRef}; $i++) {
+
+	    my ($goid1, $goid2);
+
+	    # get the goid for the current node, and store it's term
+
+	    if (defined $ancestorNodeArrayRef->[$i]) {
+
+		$goid1 = $ancestorNodeArrayRef->[$i]->goid;
+
+		$self->{TERM_HASH_REF_FOR_GOID}{$goid1} 
+		        = $ancestorNodeArrayRef->[$i]->term;
+
+	    }
+
+	    # get the goid for the next node (the current node's
+	    # parent), and store it's term too
+
+	    if (defined $ancestorNodeArrayRef->[$i+1]) {
+
+		$goid2 = $ancestorNodeArrayRef->[$i+1]->goid;
+
+		$self->{TERM_HASH_REF_FOR_GOID}{$goid2} 
+		        = $ancestorNodeArrayRef->[$i+1]->term;
+		
+	    }
+    
+	    # if the current node isn't already on the graph, add it
+
+	    if ($goid1 && !$foundNodeHashRef->{$goid1}) {
+
+		$self->_addNode($goid1,
+				fillcolor => $self->_colorForNode($goid1),
+				color     => $self->_colorForNode($goid1));
+
+		# record that we've seen it
+
+		$foundNodeHashRef->{$goid1}++;
+
+	    }
+
+	    # if we have a parent, and we haven't yet recorded an edge
+	    # between the child and parent, let's add that
+
+	    if ($goid1 && $goid2 && 
+		!$foundEdgeHashRef->{$goid2."::".$goid1}) {
+
+		$self->_addEdge($goid2, $goid1);
+
+		# record that we've added the edge
+
+		$foundEdgeHashRef->{$goid2."::".$goid1}++;
+
+	    }
+
+	}
+
+	# record that something has been added to the graph (as it's
+	# possible that we may end up finding we've added everything
+
+	$found++;
+
+    }
+
+    # return whether something was added to the graph
+
+    return $found;
+    
+}
+
+################################################################
+sub _createAndShowImage {
+################################################################
+#
+# =head2 _createAndShowImage
+#
+# Title   : _createAndShowImage
+# Usage   : $self->_createAndShowImage();
+#           automatically called by showGraph() and createImage().
+# Function: To create the graph tree image. It will print the image to
+#           stdout if it is called by showGraph().
+# Returns : returns graphText file if the text format is changed. 
+#           returns image file name if called by createImage().
+#
+# =cut
+#
+#########################################################################
+
+    my ($self) = @_;
+
+    my ($width, $height);
+
+    # first thing we do is get the contents of the graph in text form.
+    # We will then use this text to create a gif or png image, with
+    # various boxes etc that have the coordinates as indicated by the
+    # text from the graph image.
+
+    # the actual contents of the text string will be something like:
+    
+    # digraph test {
+    #        node [label="\N", shape=box];
+    #        graph [bb="0,0,662,1012"];
+    #        node1 [label=" biological_process\nGO:GO:0008150", pos="342,986", width="1.97", height="0.50"];
+    #        node2 [label="pre-replicative\ncomplex\nformation and\n maintenance\nGO:GO:0006267", pos="162,122", width="1.69", height="1.17"];
+    #        node3 [label="DNA-dependent\n DNA replication\nGO:GO:0006261", pos="353,226", width="1.86", height="0.72"];
+    #        node4 [label=" DNA replication\nGO:GO:0006260", pos="353,306", width="1.86", height="0.50"];
+    #        node5 [label="DNA replication\nand chromosome\n cycle\nGO:GO:0000067", pos="299,394", width="1.83", height="0.94"];
+    #        node6 [label=" cell cycle\nGO:GO:0007049", pos="271,482", width="1.72", height="0.50"];
+    #        node7 [label="cell\n proliferation\nGO:GO:0008283", pos="271,586", width="1.67", height="0.72"];
+    #        node8 [label="cell growth\nand/or\n maintenance\nGO:GO:0008151", pos="271,706", width="1.61", height="0.94"];
+    #        node9 [label="cellular\nphysiological\n process\nGO:GO:0050875", pos="272,810", width="1.67", height="0.94"];
+    #        node10 [label="cellular\n process\nGO:GO:0009987", pos="272,906", width="1.69", height="0.72"];
+    #        node11 [label="physiological\n process\nGO:GO:0007582", pos="412,906", width="1.69", height="0.72"];
+    #        node12 [label=" DNA metabolism\nGO:GO:0006259", pos="422,482", width="1.97", height="0.50"];
+    #        node13 [label="nucleobase,\nnucleoside,\nnucleotide and\nnucleic acid\n metabolism\nGO:GO:0006139", pos="421,586", width="1.64", height="1.39"];
+    #        node14 [label=" metabolism\nGO:GO:0008152", pos="420,706", width="1.64", height="0.50"];
+    #        node15 [label=" 1: MCM4 MCM3 CDC6 MCM2", pos="119,26", width="3.31", height="0.50"];
+    #        node16 [label=" DNA unwinding\nGO:GO:0006268", pos="353,122", width="1.92", height="0.50"];
+    #        node17 [label=" 2: MCM4 MCM3 MCM2", pos="353,26", width="2.69", height="0.50"];
+    #        node18 [label="DNA replication\n initiation\nGO:GO:0006270", pos="534,122", width="1.78", height="0.72"];
+    #        node19 [label=" 3: MCM4 MCM3 MCM2", pos="565,26", width="2.69", height="0.50"];
+    #        node5 -> node4 [pos="e,342,324 320,360 325,351 331,341 337,332"];
+    #        node13 -> node12 [pos="e,422,500 421,536 421,527 422,517 422,509"];
+    #        node12 -> node4 [pos="e,360,324 415,464 402,433 377,369 363,333"];
+    #        node4 -> node3 [pos="e,353,252 353,288 353,280 353,271 353,262"];
+    #        node3 -> node2 [pos="e,223,155 305,200 283,187 256,173 232,160"];
+    #        node3 -> node16 [pos="e,353,140 353,200 353,184 353,165 353,149"];
+    #        node3 -> node18 [pos="e,489,148 399,200 423,186 454,168 480,153"];
+    #        node2 -> node15 [pos="e,127,44 143,80 139,70 135,61 131,52"];
+    #        node16 -> node17 [pos="e,353,44 353,104 353,90 353,69 353,53"];
+    #        node18 -> node19 [pos="e,559,44 542,96 547,82 552,66 556,53"];
+    #        node6 -> node5 [pos="e,288,428 277,464 279,457 282,447 285,438"];
+    #        node11 -> node14 [pos="e,419,724 413,880 415,842 418,773 419,734"];
+    #        node11 -> node9 [pos="e,322,844 374,880 361,871 345,860 330,850"];
+    #        node1 -> node11 [pos="e,389,932 358,968 365,959 374,949 382,940"];
+    #        node1 -> node10 [pos="e,295,932 326,968 319,959 310,949 302,940"];
+    #        node8 -> node7 [pos="e,271,612 271,672 271,656 271,638 271,621"];
+    #        node14 -> node13 [pos="e,420,636 420,688 420,677 420,661 420,647"];
+    #        node7 -> node6 [pos="e,271,500 271,560 271,544 271,525 271,509"];
+    #        node10 -> node9 [pos="e,272,844 272,880 272,872 272,863 272,854"];
+    #        node9 -> node8 [pos="e,271,740 272,776 272,768 271,758 271,749"];
+    # }
+
+    # a description of the dot language can be found at:
+    #
+    # http://www.research.att.com/~erg/graphviz/info/lang.html
+
+    #! why is this always done? -- mark
+    if (1){#defined $self->{MAKE_PS} && $self->{MAKE_PS}){
+
+	my $file = $self->{IMAGE_FILE};
+
+	$file =~ s/\.\w+$/\.ps/;
+
+	my $fh = IO::File->new($file, q{>} )|| die "Cannot create $file : $!";
+
+	print $fh $self->graph->as_ps;
+
+	$fh->close;
+
+    }
+
+    # support SVG output -- mark
+    if (defined $self->{MAKE_SVG} && $self->{MAKE_SVG}){
+	my $file = $self->{IMAGE_FILE};
+	$file =~ s/\.\w+$/\.svg/;
+	my $fh = IO::File->new($file, q{>} )|| die "Cannot create $file : $!";
+	print $fh $self->graph->as_svg;
+	$fh->close;
+    }
+
+    # hence we can determine the size of the image, the positions and sizes
+    # of every box, and how to draw the edges between the boxes 
+
+    my $graphText = $self->graph->as_text;
+#    {
+#    my $fh = IO::File->new("/tmp/gtf.dot", q{>} )|| die "Cannot create /tmp/gtf.dot : $!";
+#    print $fh $graphText;
+#    $fh->close;
+#    }
+
+    # the following line *may* fix reported problems when running on
+    # Windows, that I think are a result of dot.exe using CRLF line
+    # endings.
+
+    $graphText =~ s/\015?\012/\n/g;
+
+    # remove line continuations
+    $graphText =~ s/\\\n//g;
+    # replace any newlines not following ';' or '{' with space
+    $graphText =~ s/([^\;\{])\n\s*/$1 /g;
+#    print $graphText;
+
+    # if we can get the height and width, we'll get them
+
+#    if ($graphText =~ /graph \[bb=\"0,0,([0-9]+),([0-9]+)\" *\]\;/) {
+    if ($graphText =~ /graph \[bb=\"0,0,([0-9\.]+),([0-9\.]+)\"/) {
+       
+	$width  = $1 * $self->{WIDTH_DISPLAY_RATIO};  # round up?
+
+	$height = $2 * $self->{HEIGHT_DISPLAY_RATIO}; # round up?
+
+    }else {
+
+	# otherwise, we simply create a png image and we're done
+
+	# this seems to be undocumented - I'm not sure under what
+	# circumstances we can't actually get the height and width
+
+	$self->graph->as_png($self->{IMAGE_DIR}."goPath.$$.png");
+
+        return $self->{IMAGE_DIR}."goPath.$$.png";
+
+    }
+
+    my @graphLine = split(/\n/, $graphText);
+
+    # add borders sizes to the height and width
+
+    my $border = 25;
+    
+    my $mapWidth  = $width  + 2 * $border;
+
+    my $mapHeight = $height + 2 * $border;
+
+    my $keyY;
+
+    # now modify the height and width according to unclear rules....
+
+    if ($self->{PVALUE_HASH_REF_FOR_GOID} || !$self->{GENE_NAME_HASH_REF_FOR_GOID}) {
+
+	$keyY = $mapHeight;
+
+	# make the width to be at least the minimum acceptable width
+
+	if ($mapWidth < $self->{MIN_MAP_WIDTH}) { 
+
+	    $mapWidth = $self->{MIN_MAP_WIDTH}; 
+
+	}
+
+	# modify the height 
+
+	if (!$self->{GENE_NAME_HASH_REF_FOR_GOID}) {
+
+	    # if there are no genes annotated to nodes, use some
+	    # complex, opaque and unclear rule to change the height
+
+	    $mapHeight += int((length($self->{MAP_NOTE})*6/($mapWidth-100))*15) + 65;
+
+	}else {
+
+	    # otherwise just add 50, the 'magic number'...
+
+	    $mapHeight += 50;
+
+	}
+
+    }
+
+    # now create a GD image of the appropriate height and width
+
+    my $gd = GO::View::GD->new(width  => $mapWidth,
+			       height => $mapHeight);
+
+    # add a border, with a label and a date
+
+    $self->_drawFrame($gd, $mapWidth, $mapHeight);
+
+    my (@nodeLine, @edgeLine);
+
+    # now go through each line of the output from the graph->as_text method
+
+    foreach my $line (@graphLine) {
+
+	# in case line was continuated
+	# sometimes the break is just before ',', in which case the extra space needs to be removed
+	$line =~ s/\\(\s)/$1/g;
+	$line =~ s/\s,/,/g;
+
+	# now store type of entity (nodes vs edges) in different
+	# arrays
+
+#	if ($line =~ / *node[0-9]+ *\[.*(label=\".+\")/i) {
+	if ($line =~ /^\s*node[0-9]+\s*\[/) {
+
+	    # it's a node
+	    push(@nodeLine, $line);
+
+#	}elsif ($line  =~ / *node[0-9]+ *-> *node[0-9]+ \[pos=\"(.+)\"/i) {
+	}elsif ($line  =~ /^\s*node[0-9]+\s*->\s*node[0-9]+\s*\[pos=\"(.+)\"/i) {
+
+	    # it's an edge
+
+	    push(@edgeLine, $1);
+
+	}
+
+    }
+
+    # add the keys, which are either keys for the p-value colors, or a
+    # general description about GO terms and their annotations
+
+    if (exists $self->{PVALUE_HASH_REF_FOR_GOID} && 
+	$height > $self->{MIN_MAP_WIDTH_FOR_ONE_LINE_KEY}) {
+
+	### draw keys on the top of the map
+	if ($width < $self->{MIN_MAP_WIDTH_FOR_ONE_LINE_KEY}) {
+
+	    $self->{MOVE_Y} = 15;
+
+	}
+
+#	$self->_drawKeys($gd, $mapWidth, 5, 'isTop');
+
+    }
+
+    if ($self->{PVALUE_HASH_REF_FOR_GOID} || 
+	!$self->{GENE_NAME_HASH_REF_FOR_GOID}) {
+
+	$self->_drawKeys($gd, $mapWidth, $keyY);
+
+    }
+
+    # now draw the actual edges and nodes
+
+    # do the edges first
+    
+    foreach my $line (@edgeLine) {
+
+	$self->_drawEdge($gd, $height, $border, $line);
+
+    }
+
+    # and now the nodes
+
+    foreach my $line (@nodeLine) {
+
+	$self->_drawNode($gd, $height, $border, $line);
+
+    }
+
+    # now output the image to a file
+
+    my $imageFile = $self->{IMAGE_FILE}; 
+    my $imageUrl  = $self->{IMAGE_URL};
+
+    my $fh = IO::File->new($imageFile, q{>} )|| die "Cannot create $imageFile : $!";
+
+    binmode $fh;
+
+    if ($gd->im->can('png')) {
+
+	print $fh $gd->im->png;
+
+    }else {
+
+	print $fh $gd->im->gif;
+
+    }
+
+    $fh->close;
+
+    if ($self->{CREATE_IMAGE_ONLY}) {
+	
+	return $imageFile;
+
+    }else{
+
+	my $map = $gd->imageMap;
+
+	if (defined ($map)){
+
+	    $self->{IMAGE_MAP} = 
+		
+		"<MAP NAME='goPathImage'>\n".
+		$gd->imageMap.
+		"</MAP>".
+		"<center><img src='$imageUrl' usemap='#goPathImage'></center><p>\n";
+
+	}
+
+    }
+
+}
+
+######################################################################
+sub _drawNode {
+######################################################################
+#
+# =head2 _drawNode
+#
+# Title   : _drawNode
+# Usage   : $self->_drawNode($gd, $height, $border, $line);
+#           automatically called by _createAndShowImage().
+# Function: To draw each node.
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $height, $border, $line) = @_;
+
+    # let's let's extract the information from the line, e.g. which may look like:
+    #
+    #        label="MCM4 MCM3 MCM2", pos="565,26", width="2.69", height="0.50"
+    #
+    # or:
+    #
+    #        label=MET28, color=grey65, fillcolor=grey65, fontcolor=blue, pos="735,561", width="0.92", height="0.50"
+    #
+    # or:
+    #
+    #        label=" DNA unwinding\nGO:GO:0006268", pos="353,122", width="1.92", height="0.50"
+    #
+    # depending on whether it's genes annotating a GO node, or a box
+    # representing a GO Node itself
+
+    # get the label - may or may not be surrounded in quotes
+
+    my $label;
+
+    if ($line =~ /label=\"([^\"]*)\"/){
+    
+	$label = $1;
+
+    }elsif ($line =~ /label=(.+?), /){
+
+	$label = $1;
+	
+    }
+
+    my @label = split(/\\n/, $label);
+
+    # get the width and height, and convert to number of pixels - I
+    # don't know where the use of the value '60' comes from.  The documentation for graphviz says that
+    # the default scale is actually 72 pixels per inch...
+
+    $line =~ /width=\"?([\d\.]+)/;
+
+    my $boxW = $1 * 60 + 5;  # tweak - mark
+
+    $line =~ /height=\"?([\d\.]+)/;
+
+    my $boxH = $1 * 55;   # tweak - mark
+
+    # remove some arbitrary amount based on the label - presumably
+    # this is because we're removing the GO id from the label?
+
+    if (!$self->{MOVE_Y}) { 
+
+	$self->{MOVE_Y} = 0;
+
+    }
+
+    # now get the coordinates of the center of box for the node, and
+    # use that to work out the coordinate of the bounding box for the
+    # node.
+
+    $line =~ /pos=\"([0-9\.]+),([0-9\.]+)\"/;
+
+    my $x1 = int($1 * $self->{WIDTH_DISPLAY_RATIO}-$boxW/2 + $border);
+
+    my $y1 = int($height - $2 * $self->{HEIGHT_DISPLAY_RATIO} - $boxH/2 + $border + $self->{MOVE_Y});
+
+    my $x2 = $x1 + $boxW;
+
+    my $y2 = $y1 + $boxH;
+
+    # Retrieve stored goid, which we now save in
+    # a hash, rather than in the label. -- mark
+    my $goid = $self->{LABEL_TO_GOID}->{$label};
+
+    my $geneNum;
+    my $totalGeneNum;
+    my $barColor;
+    my $outline;
+    my $linkUrl;
+
+    # now work out the color for the box, and work out URL links
+    # for the nodes
+    
+    if ($self->{PVALUE_HASH_REF_FOR_GOID} && $goid) {
+	
+	# we must be dealing with GO::TermFinder output for a goid
+	
+	$barColor = $self->_getBoxColor($gd, $goid);
+	
+	if (!$self->{CREATE_IMAGE_ONLY}) {
+	    
+	    $linkUrl = $self->{GO_URL};
+	    
+	    $linkUrl =~ s/$kReplacementText/$goid/ if $linkUrl;
+	    
+	}
+	
+    }elsif ($goid) {
+	
+	# non GO::TermFinder output for a GOID
+	
+	my $node = $self->_ontologyProvider->nodeFromId($goid);
+	
+	if ($node && $node->childNodes) {
+	    
+	    $barColor = $gd->lightBlue;
+	    
+	    if (!$self->{CREATE_IMAGE_ONLY}) {
+		
+		$linkUrl = $self->{GO_URL};
+		
+		$linkUrl =~ s/$kReplacementText/$goid/ if $linkUrl;
+		
+	    }
+	    
+	}else {
+	    
+	    # the box isn't representing a GOID, e.g. annotating genes
+	    
+	    $barColor = $gd->grey;
+	    
+	}
+	
+    }else { 
+	
+	$barColor = $gd->grey;
+	
+    }
+    
+    
+    my $onInfoText;
+    
+    if (( $self->{TOP_GOID} && $goid && $goid eq $self->{TOP_GOID}) ||
+	(!$self->{TOP_GOID} && $goid && $goid eq $self->_goid)) {
+	
+	$self->_drawUpArrow($gd, $goid, ($x1+$x2)/2-7, 
+			    ($x1+$x2)/2+7, $y1-15, 10, 
+			    $linkUrl);
+	
+    }
+    
+    # now draw the box itself
+    $gd->drawBar(barColor   => $barColor,
+		 numX1      => $x1,
+		 numX2      => $x2,
+		 numY       => $y1,
+		 linkUrl    => $linkUrl,
+		 barHeight  => $boxH,
+		 outline    => $outline,
+		 onInfoText => $onInfoText);
+	
+    
+    # and now add the label to the box, one line at a time
+    
+    my $i = 0;
+    my $y3 = $y1 + int(($boxH - 12*(@label)) / 2) - 2;  # center - mark
+    
+    foreach my $label (@label) {
+	
+	# skip if it's a GOID or is blank
+	
+	next if (!$label || $label =~ /^GO:/i);
+
+	if (!$goid) {
+	    
+	    $label =~ s/[0-9]+://i;
+	    
+	}
+	
+	my $nameColor = $gd->black;
+	
+	if (!$goid) {
+	    
+	    $nameColor = $gd->blue;
+	    
+	}elsif ($goid eq $self->_goid) {
+	    
+	    $nameColor = $gd->red;
+	    
+	}
+	
+	my $startPixel = int(($boxW - length($label)*6)/2);
+	my $numX1 = $x1 + $startPixel;
+#	my $numY1 = $y1 + $i*12;
+	my $numY1 = $y3 + $i*12;
+	
+	if ($goid) {
+	    
+	    # the box we're labeling is for a goid
+	    
+	    $gd->drawName(name      => $label,
+			  nameColor => $nameColor,  
+			  numX1     => $numX1,
+			  numY      => $numY1);
+	    
+	}else {
+	    
+	    # we're adding in a list of genes
+	    
+	    my @gene = split(' ', $label);
+	    
+	    # add in each one - the image map being generated by
+	    # the GO::View::GD object will have the relevant
+	    # information added to support linking genes to it.
+
+	    # go through each gene
+	    
+	    foreach my $gene(@gene) {
+		
+		my $linkUrl;
+		
+		if (!$self->{CREATE_IMAGE_ONLY} && $self->{GENE_URL}) {
+		    
+		    $linkUrl = $self->{GENE_URL};
+		    
+		    $linkUrl =~ s/$kReplacementText/$gene/;
+		    
+		}
+		
+		# add the gene name
+		
+		$gd->drawName(name      => $gene,
+			      nameColor => $nameColor,
+			      linkUrl   => $linkUrl,
+			      numX1     => $numX1,
+			      numY      => $numY1);
+		
+		$numX1 += (length($gene)+1)*6;
+		
+	    }
+	    
+	}
+	
+	$i++;
+	
+    }
+    
+    # I think this is supposed to say something about how many
+    # genes are annotated to a given node, but am not sure that
+    # $geneNum ever gets defined...
+    
+    if ($geneNum) {
+	
+	my $label = $geneNum." gene";
+	
+	if ($totalGeneNum != 1) {
+	    
+	    $label .= "s";
+	    
+	}
+	
+	my $startPixel = int(($boxW - length($label)*6)/2);
+	
+	my $numX1 = $x1 + $startPixel+2;
+	
+	my $numY1 = $y1 + $i*10+2;
+	
+	$gd->drawName(name      => $label,
+		      nameColor => $gd->maroon,  
+		      numX1     => $numX1,
+		      numY      => $numY1);
+	
+    }
+    
+}
+
+######################################################################
+sub _drawEdge {
+######################################################################
+#
+# =head2 _drawEdge
+#
+# Title   : _drawEdge
+# Usage   : $self->_drawEdge($gd, $height, $border, $line);
+#           automatically called by _createAndShowImage().
+# Function: To draw each edge.
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $height, $border, $line) = @_;
+
+    # $line will contain something like:
+    #
+    # 342,324 320,360 325,351 331,341 337,332
+    #
+    # where each pair of coordinates defines a point on the line.
+    # Thus to draw the line, we simply connect the points.
+
+    # rather than crooked edges, draw splines -- mark
+    my @points = split(/ /, $line);
+    my $polyline = GD::Polyline->new;
+    my $spline;
+
+    for (my $p = 0; $p <= $#points; $p++) {
+
+	my $point = $points[$p];
+
+	my ($x, $y) = split(/\,/, $point);
+	
+	# sometimes we get trailing '\' etc.
+	my $x1 = $x;
+	my $y1 = $y;
+
+	$x =~ s/[^\d\.]//g;
+	$y =~ s/[^\d\.]//g;
+
+#	print "BEFORE '$x1', '$y1', AFTER '$x', '$y'\n" if (($x1 ne $x) || ($y1 ne $y));
+
+	$x *= $self->{WIDTH_DISPLAY_RATIO};
+
+	$x += $border;
+
+	if (!defined $self->{MOVE_Y}) { $self->{MOVE_Y} = 0; }
+
+#	$y = $height - $y*$self->{HEIGHT_DISPLAY_RATIO} + $border +
+#	     $self->{MOVE_Y} + 5;
+	$y = $height - $y*$self->{HEIGHT_DISPLAY_RATIO} + $border +
+	    $self->{MOVE_Y};
+
+	$polyline->addPt($x, $y);
+    }
+
+    if ($#points % 3 == 0) {
+	$spline = $polyline->toSpline();
+	$gd->im->polydraw($spline, $gd->black);
+    } else {
+	$gd->im->polydraw($polyline, $gd->black);
+    }
+
+# this is the original routine -- mark
+#    my @point = split(/ /, $line);
+#
+#    # get rid of everything prior to the first point
+#    
+#    my ($preX, $preY);
+#
+#    # now go through each point
+#
+#    foreach my $point (@point) {
+#
+#	my ($x, $y) = split(/\,/, $point);
+#
+#	# modify the x coorfinate to take account of the border and
+#	# scaling factor
+#
+#	$x *= $self->{WIDTH_DISPLAY_RATIO};
+#
+#	$x += $border;
+#
+#	if (!defined $self->{MOVE_Y}) { $self->{MOVE_Y} = 0; }
+#
+#	# modify the y coordinate based on the scaling factor, the border
+#	# the 'MOVE_Y' (whatever that is) and an arbitrary 5
+#
+#	$y = $height - $y*$self->{HEIGHT_DISPLAY_RATIO} + $border +
+#	     $self->{MOVE_Y} + 5;
+#
+#	# now if we have a prior x and y coordinate, we can draw a
+#	# line from it to the current point
+#
+#	if ($preX && $preY) {
+#
+#	    $gd->im->line($x, $y, $preX, $preY, $gd->black);
+#
+#	}
+#
+#	# remember these coordinates for the next time through the loop
+#
+#	$preX = $x;
+#
+#	$preY = $y;
+#
+#    }
+
+}
+
+#################################################################
+sub _drawUpArrow {
+#################################################################
+#
+# =head2 _drawUpArrow
+#
+# Title   : _drawUpArrow
+# Usage   : $self->_drawUpArrow($gd, $goid, $X1, $X2, $Y, $barHeight,
+#                               $linkUrl);
+#           automatically called by _drawNode().
+# Function: To draw an up arrow on the tree map. 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $goid, $X1, $X2, $Y, $barHeight, 
+	$linkUrl) = @_;
+
+    my $node = $self->_ontologyProvider->nodeFromId($goid);
+
+    my $maxGenerationUp = $node->lengthOfLongestPathToRoot;
+
+    if ($maxGenerationUp <= 1) { return; } 
+
+    if (!$self->{CREATE_IMAGE_ONLY} && $linkUrl) {
+    
+	$linkUrl .= "&tree=up";
+    
+    }
+    $gd->drawBar(barColor  => $gd->blue,
+		 numX1     => $X1,
+		 numX2     => $X2,
+		 numY      => $Y,
+		 linkUrl   => $linkUrl,
+		 barHeight => $barHeight,
+		 outline   => 1,
+		 arrow     => 'up');
+
+}
+
+######################################################################
+sub _drawKeys {
+######################################################################
+#
+# =head2 _drawKeys
+#
+# Title   : _drawKeys
+# Usage   : $self->_drawKeys($gd, $mapWidth, $keyY, $isTop);
+#           automatically called by _createAndShowImage().
+# Function: To draw the display keys on the top or bottom of the tree map. 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $mapWidth, $keyY, $isTop) = @_;
+    
+    if (!$self->{GENE_NAME_HASH_REF_FOR_GOID}) {
+	
+	my $y = $keyY;
+	
+	my $boxH = 10;
+	
+	my $startX = 50;
+	
+	$gd->drawBar(barColor  => $gd->lightBlue,
+		     numX1     => $startX,
+		     numX2     => $startX + 20,
+		     numY      => $y,
+		     barHeight => $boxH);
+
+	my $numX1 = $startX + 20;
+
+	$gd->drawName(name      => " = GO term with child(ren)",
+		      nameColor => $gd->black,  
+		      numX1     => $numX1,
+		      numY      => $y-2);
+
+	$y += 15;
+
+	$gd->drawBar(barColor  => $gd->grey,
+		     numX1     => $startX,
+		     numX2     => $startX + 20,
+		     numY      => $y,
+		     barHeight => $boxH);
+
+	$gd->drawName(name      => " = GO term with no child(ren)",
+		      nameColor => $gd->black,  
+		      numX1     => $numX1,
+		      numY      => $y-2);
+
+	my $maxTextLen = int(($mapWidth-2*$startX)/6);
+
+	my $text = $self->_processLabel($self->{MAP_NOTE}, $maxTextLen);
+
+	my @geneNumExample = split(/\12/, $text);
+	
+	$y += 15;
+
+	foreach my $text (@geneNumExample){
+
+	    $text =~ s/^ *//;
+
+	    $gd->drawName(name      => $text,
+			  nameColor => $gd->black,  
+			  numX1     => $startX,
+			  numY      => $y-2);
+
+	    $y += 15;
+
+	}
+ 
+	return;
+
+    }
+
+    my $y1 = $keyY;
+
+    my $boxH = 15;
+
+    my $boxW = 88;
+
+    my $startX = 48 + ($mapWidth - $boxW*6 - 35 - 48)/2;
+    
+    my $twoLine;
+
+    if ($startX < 48) {
+
+	$startX = 48 + ($mapWidth - $boxW*3 - 15 - 48)/2;
+
+	$twoLine = 1;
+
+	if (!$isTop) { $y1 -= 10; }
+
+    }
+
+    ####### new code
+
+    if ($isTop) {
+
+	#$startX = 48;
+
+    }
+
+    $gd->drawName(name      => 'pvalue:',
+		  nameColor => $gd->black,  
+		  numX1     => 10,
+		  numY      => $y1+1);
+    
+    my $i;
+    my $preX2 = $startX;
+
+    foreach my $name ('<=1e-10', '1e-10 to 1e-8', '1e-8 to 1e-6', 
+			'1e-6 to 1e-4', '1e-4 to 1e-2', '>0.01') {
+	
+	$i++;
+
+	my $pvalue = $name;
+
+	$pvalue =~ s/^<=//;
+
+	$pvalue =~ s/^>0.01/1/;
+
+	$pvalue =~ s/^.+ to (.+)$/$1/;
+
+	my $barColor = $self->_color4pvalue($gd, $pvalue);
+
+	if ($i == 4 && $twoLine) {
+	    $preX2 = $startX;
+	    $y1 += 20;
+	}
+
+	my $x1 = $preX2 + 5; 
+
+	my $x2 = $x1 + $boxW;
+
+	$gd->drawBar(barColor  => $barColor,
+	 	     numX1     => $x1,
+	 	     numX2     => $x2,
+	 	     numY      => $y1,
+	 	     barHeight => $boxH);
+
+	my $numX1 = $x1 + ($boxW-length($name)*6)/2;
+
+        my $numY1 = $y1 + 2;
+
+	$gd->drawName(name      => $name,
+		      nameColor => $gd->black,  
+		      numX1     => $numX1,
+		      numY      => $numY1);
+
+	$preX2 = $x2;
+
+    }
+
+}
+
+######################################################################
+sub _drawFrame {
+######################################################################
+#
+# =head2 _drawFrame
+#
+# Title   : _drawFrame
+# Usage   : $self->_drawFrame($gd, $width, $height);
+#           automatically called by _createAndShowImage().
+# Function: To draw a frame around the image map with date and label 
+#           on the bottom corner. 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $width, $height) = @_;
+
+    $gd->drawFrameWithLabelAndDate(width  => $width,
+				   height => $height,
+				   text   => $self->{IMAGE_LABEL});
+
+}
+
+################################################################
+sub _createGraphObject {
+################################################################
+#
+# =head2 _createGraphObject
+#
+# Title   : _createGraphObject
+# Usage   : my $self->_createGraphObject();
+#           automatically called by _createGraph().
+# Function: Gets newly created empty GraphViz instance. 
+# Returns : newly created empty GraphViz instance.
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self) = @_;
+
+    my %args;
+
+    if (defined $self->{MAKE_PS} && $self->{MAKE_PS}){
+
+	%args = (width => 7.5,
+		 height => 10,
+		 pagewidth => 8.5, 
+		 pageheight => 11.5);
+
+    }
+
+    $self->{GRAPH} = GraphViz->new(node => { shape => 'box',
+					     style => 'filled' },
+				   edge => { arrowhead => 'none' },
+				   overlap => 'false',
+				   name => "GOTermFinder",
+#				   mindist => 20,
+				   %args);
+
+}
+
+################################################################
+sub _addNode {
+################################################################
+#
+# =head2 _addNode 
+#
+# Title   : _addNode 
+# Usage   : $self->_addNode($goid);
+#           automatically called by _createGraph().
+# Function: Adds node to the GraphViz instance. 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $goid, %args) = @_;
+    
+    my $label;
+
+    my $stdGoid;  # moved up in scope -- mark
+
+    if ($goid !~ /^GO:[0-9]+$/) {
+
+	# if the label is not a goid (i.e. it's a list of genes that
+	# annotate a term), we'll process it, so that there are no
+	# lines longer than 30 characters
+
+	$label = $self->_processLabel($goid, 30);
+
+    }else{
+
+	# otherwise, we'll get the term for the goid
+	
+	$label = $self->{TERM_HASH_REF_FOR_GOID}->{$goid};
+	
+	if (!$label) {
+	    
+	    # if we didn't get a term, we'll go back to the
+	    # ontologyProvider to get one
+	    
+	    my $node = $self->_ontologyProvider->nodeFromId($goid);
+	    
+	    $label = $node->term;
+	    
+	}
+	
+	# reformat the goid
+	
+	if (!$self->{PVALUE_HASH_REF_FOR_GOID}) {
+	    
+	    $stdGoid = $self->_formatGoid($goid);
+	    
+	}else {
+	    
+	    $stdGoid = $goid;
+	    
+	}
+	
+	# append the goid to the processed label
+	
+	if ($label) {
+	    
+# See below for why this was changed. -- mark
+	    $label = $self->_processLabel($label);
+#	    $label = $self->_processLabel($label)."\n".$stdGoid;
+	    
+	}else { 
+	    
+	    $label = $stdGoid;
+	    
+	}
+
+    }
+
+    if ($goid eq $self->_goid) {
+	# remove the underscore from the root term
+	$label =~ s/_/ /;
+    }
+   
+# As seen above, previously the goid was stored in the
+# node label, which then appeared in SVG output.
+# Rather than storing information in the label,
+# we save it in a hash. -- mark
+#! this assumes all labels will be unique
+    my $modlabel = $label;
+    $modlabel =~ s/\n/\\n/g;
+    $self->{LABEL_TO_GOID}->{$modlabel} = $stdGoid;
+
+    # now add the node to the graph, with the appropriate label
+    
+    # helvetica 15 closely matches gdSmallFont
+    $self->graph->add_node($goid,
+			   label => $label,
+			   fontsize => 15,
+			   fontname => "helvetica",
+			   %args);
+  
+
+}
+
+################################################################
+sub _addEdge {
+################################################################
+#
+# =head2 _addEdge 
+#
+# Title   : _addEdge 
+# Usage   : $self->_addEdge($parentGoid, $childGoid);
+#           automatically called by _createGraph().
+# Function: Adds edge to the GraphViz instance. 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $parentGoid, $childGoid) = @_;
+
+    $self->graph->add_edge($parentGoid => $childGoid);
+
+}
+
+########################################################################
+sub _descendantCount {
+########################################################################
+#
+# =head2 _descendantCount
+#
+# Title   : _descendantCount
+# Usage   : my $nodeCount = 
+#                $self->_descendantCount($goid, $generationDown);
+#           automatically called by _createGraph().
+# Function: Gets total descendant node number down to a given generation. 
+# Returns : The total descendant node number down to a given generation.
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $goid, $generationDown) = @_;
+
+    my $node = $self->_ontologyProvider->nodeFromId($goid);
+
+    my %descendantCount4generation;
+
+    $self->_descendantCount4generation($node, \%descendantCount4generation); 
+
+    my $nodeNum = 0;
+    
+    foreach my $generation (1..$generationDown) {
+
+	$nodeNum += $descendantCount4generation{$generation};
+	
+    }
+
+    return $nodeNum;
+    
+}
+
+########################################################################
+sub _setGeneration {
+#######################################################################
+#
+# =head2 _setGeneration
+#
+# Title   : _setGeneration
+# Usage   : $self->_setGeneration($goid);
+#           automatically called by _createGraph().
+# Function: Sets the maximum generation number it will display.
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $goid) = @_;
+ 
+    my $node = $self->_ontologyProvider->nodeFromId($goid);
+
+    my %descendantCount4generation;
+
+    $self->_descendantCount4generation($node, \%descendantCount4generation); 
+    
+    my $nodeNum = 0; 
+    my $preNodeNum = 0;
+    
+    foreach my $generation (sort {$a<=>$b} (keys %descendantCount4generation)) {
+
+	$nodeNum += $descendantCount4generation{$generation};
+
+	if ($nodeNum == $self->{MAX_NODE}) { 
+
+	    $self->{GENERATION} = $generation;
+
+	    $self->{NODE_NUM} = $nodeNum;
+
+	    last;
+
+	}
+
+	if ($nodeNum > $self->{MAX_NODE}) {
+
+	    $self->{GENERATION} = $generation-1;
+
+	    $self->{NODE_NUM} = $preNodeNum;
+
+	    last;
+
+	}
+	
+        $preNodeNum = $nodeNum;
+
+    }
+
+    if (!$self->{GENERATION} || $self->{GENERATION} < 1) { 
+
+	$self->{GENERATION} = 1;
+
+	if (!$node->childNodes) {
+
+	    $self->{TREE_TYPE} = 'up';
+ 
+	}
+
+    }
+
+}
+
+########################################################################
+sub _descendantCount4generation {
+########################################################################
+#
+# =head2 _descendantCount4generation
+#
+# Title   : _descendantCount4generation
+# Usage   : $self->_descendantCount4generation($node, 
+#                                              $nodeCountHashRef,
+#                                              $generation);
+#           automatically called by _descendantCount(),
+#                                   _setGeneration(), and itself.
+# Function: Gets the descebdant count for each generation.
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $node, $nodeCountHashRef, $generation) = @_;
+    
+    if (!$generation) { $generation = 1; }
+    
+    my @childNodes = $node->childNodes;
+
+    $nodeCountHashRef->{$generation} += scalar(@childNodes);
+
+    foreach my $childNode ($node->childNodes) {
+
+	$self->_descendantCount4generation($childNode, 
+					   $nodeCountHashRef, 
+					   $generation++);
+
+    }
+
+}
+
+################################################################
+sub _setTopGoid {
+################################################################
+#
+# =head2 _setTopGoid
+#
+# Title   : _setTopGoid
+# Usage   : $self->_setTopGoid();
+#           automatically called by _createGraph().
+# Function: Sets the top ancestor goid. We want to display the 
+#           tree up to this goid.  
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self) = @_;
+
+    my $node = $self->_ontologyProvider->nodeFromId($self->_goid);
+
+    my $maxGenerationUp = $node->lengthOfLongestPathToRoot - 1;
+
+    my @pathsToRoot = $node->pathsToRoot;
+
+    my %count4goid;
+    my %generation4goid;
+
+    my $pathNum = scalar(@pathsToRoot);
+
+    foreach my $path (@pathsToRoot) {
+
+	my @nodeInPath = reverse(@{$path});
+
+	my $generation = 0;
+
+	foreach my $node (@nodeInPath) {
+
+	    $count4goid{$node->goid}++;
+
+	    $generation++;
+
+	    if (!$generation4goid{$node->goid} || 
+		$generation4goid{$node->goid} < $generation) {
+
+		$generation4goid{$node->goid} = $generation;
+
+	    }
+
+	    if ( $pathNum == $count4goid{$node->goid} ) {
+		### the same goid appears on all the paths.
+		
+		$self->{TOP_GOID} = $node->goid;
+		
+		$self->{GENERATION} = $generation4goid{$node->goid};
+
+		last;
+
+	    }
+ 
+	}
+
+    }
+
+}
+
+################################################################
+sub _initGoidFromOntology {
+################################################################
+#
+# =head2 _initGoidFromOntology
+#
+# Title   : _initGoidFromOntology
+# Usage   : my $goid = $self->_initGoidFromOntology;
+#           automatically called by _init().
+# Function: Gets the top goid for the given ontology  
+#           (biological_process, molecular_function, or 
+#           cellular_component)
+# Returns : goid
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self) = @_;
+
+    # gene_ontology super node
+
+    my $rootNode = $self->_ontologyProvider->rootNode;
+
+    # node for molecular_function, biological_process, or 
+    # cellular_component 
+
+    my ($topNode) = $rootNode->childNodes;
+
+    return $topNode->goid;
+
+}
+
+#######################################################################
+sub _initPvaluesGeneNamesDescendantGoids {
+#######################################################################
+#
+# =head2 _initPvaluesGeneNamesDescendantGoids
+#
+# Title   : _initPvaluesGeneNamesDescendantGoids
+# Usage   : $self->_initPvaluesGeneNamesDescendantGoids;
+#           automatically called by _init().
+# Function: Sets $self->{PVALUE_HASH_REF_FOR_GOID}, 
+#           $self->{GENE_NAME_HASH_REF_FOR_GOID},
+#           and $self->{DESCENDANT_GOID_ARRAY_REF} 
+# Returns : void
+#           
+# =cut
+#
+#########################################################################
+
+# This method reads through all of the hypotheses that came from the
+# TermFinder analysis, and records various information that is
+# subsequently used to construct the image.
+
+# TODO - naming of the $pvalue variable and _termFinder methods is
+# very poor, and misleading.  This should be changed.
+#
+# TODO - currently, when we compare the maxTopNodeToShow, we shouldn't
+# increment the count when a node that is being added is the parent or
+# child of a node already on the graph.
+
+    my ($self) = @_;
+ 
+    my %foundGoid;
+    my %foundGoidGene;
+
+    my @directAnnotatedGoid;
+    my %loci4goid;
+    my %pvalue4goid;
+
+    # $maxTopNodeToShow is used to limit the size of the output graph.
+    # The default value (set during initialization is 6, but can be
+    # modified via a user argument
+
+    # LINDA'S CHANGES
+#    my $maxTopNodeToShow = $self->{MAX_TOP_NODE_TO_SHOW};
+    my $maxChildGoidNum = $self->{MAX_CHILD_GOID_NUM};
+
+    my $maxTopNodeToShow = $self->{MAX_TOP_NODE_TO_SHOW};
+
+    my $count = 0;
+
+    # go through each hypothesis
+
+    foreach my $pvalue (@{$self->_termFinder}){
+
+	# map the goid to the corrected p-value for later retrieval
+	# when deciding on what color a node should have
+
+	$pvalue4goid{$pvalue->{NODE}->goid} = $pvalue->{CORRECTED_PVALUE};
+
+	# skip if it has a p-value worse than our threshold note
+
+	next if ($pvalue->{CORRECTED_PVALUE} > $self->{PVALUE_CUTOFF});
+        
+	# skip if we've exceeded the maxTopNodeToShow value
+	#
+	# NB, we do 'next', rather than 'last', so that the mapping of the
+	# goid to p-value still gets recorded for any subsequent nodes, which
+	# ensures that their colors will be correct
+
+	next if ($count >= $maxTopNodeToShow);
+
+	# grab the GO::Node object for this hypothesis
+
+	my $ancestorNode = $pvalue->{NODE};
+
+	# now we go through the list of genes directly annotated to
+	# this node and get every goid to which any of those genes are
+	# annotated - thus we'll build up a list of all nodes to which
+	# the genes are annotated.  We'll also record which nodes are
+	# directly annotated by the genes
+
+	foreach my $databaseId (keys %{$pvalue->{ANNOTATED_GENES}}) {
+
+	    # get every goid that the gene maps to.
+
+	    my $goidArrayRef = $self->_annotationProvider->goIdsByDatabaseId(databaseId => $databaseId,
+									     aspect     => $self->{ASPECT});
+
+	    # get the name of the gene, as it was passed to TermFinder
+
+	    my $gene = $pvalue->{ANNOTATED_GENES}->{$databaseId};
+	
+	    # now go through each goid that the gene was annotated to
+
+	    foreach my $goid (@{$goidArrayRef}) {
+
+		# get a GO::Node object for it
+
+		my $node = $self->_ontologyProvider->nodeFromId($goid);
+
+		# the following check is probably superfluous, but
+		# there may be cases where a node in the annotation
+		# provider is not in the ontology provider, so we just
+		# ignore them
+
+		next if (!$node);
+
+		# We only want to keep information about genes
+		# directly annotated to this node if it is a
+		# descendant of the '$ancestorNode', which we know
+		# passes the p-value cut-off, or if it is the node
+		# itself.  In this way, we only record nodes to which
+		# our genes of interest are annotated if they have
+		# contributed to the count associated with the
+		# significant '$ancestorNode', and thus prune the
+		# tree, as we don't show all nodes to which any of the
+		# genes are annotated.
+
+		next unless ($node->isADescendantOf($ancestorNode) || $goid eq $ancestorNode->goid);
+
+		# now record some information about the this goid and the gene
+
+		if (!exists $foundGoidGene{$goid."::".$gene}) {  
+
+		    # record the genes in the list that are annotated
+		    # to this node
+
+		    $loci4goid{$goid} .= ":".$gene;
+
+		    # and remember that we've recorded this annotation
+
+		    $foundGoidGene{$goid."::".$gene}++;
+
+		}
+
+		# skip if we've already seen the goid
+
+		next if ($foundGoid{$goid});
+
+		# record the goid as directly annotated by a gene of interest
+
+#		push(@directAnnotatedGoid, $goid);
+		# -- mark (from Linda)
+		# shouldn't this be if !defined(max) || (# < max)?
+		if ((defined($maxChildGoidNum)) && (scalar(@directAnnotatedGoid) < $maxChildGoidNum)) {
+		    push(@directAnnotatedGoid, $goid);
+		}
+	    
+		# and remember that we've seen it
+
+		$foundGoid{$goid}++;
+
+	    }
+
+	}
+
+	$count++; # keep a count of the number of nodes that exceed the cutoff
+   
+    }
+
+    # now record all of our information within ourselves
+
+    $self->{DESCENDANT_GOID_ARRAY_REF}   = \@directAnnotatedGoid;
+    $self->{PVALUE_HASH_REF_FOR_GOID}    = \%pvalue4goid;
+    $self->{GENE_NAME_HASH_REF_FOR_GOID} = \%loci4goid;
+
+    # and return the number of top nodes recorded, which is used to
+    # decide whether to print the graph or not.
+
+    return $count;
+
+}
+
+##########################################################################
+sub _initVariablesFromConfigFile {
+##########################################################################
+    my ($self, $configFile) = @_;
+
+    my $fh = IO::File->new($configFile, q{<}) || die "Can't open '$configFile' for reading : $!";
+
+    while(<$fh>) {
+
+	chomp;
+
+	# skip comments, blank, and whitespace only lines
+
+	if (/^\#/ || /^\s*$/) { next; }
+	
+	my ($name, $value) = split(/=/);
+
+	$value =~ s/^ *(.+) *$/$1/;
+	
+	if ($name =~ /^maxNode/i) {
+
+	    $self->{MAX_NODE} = $value;
+
+	}elsif ($name =~ /^maxNodeNameWidth/i) {
+
+	    $self->{MAX_NODE_NAME_WIDTH} = $value;
+
+	}elsif ($name =~ /^widthDisplayRatio/i) {
+
+	    $self->{WIDTH_DISPLAY_RATIO} = $value;
+
+	}elsif ($name =~ /^heightDisplayRatio/i) {
+
+	    $self->{HEIGHT_DISPLAY_RATIO} = $value;
+
+        }elsif ($name =~ /^minMapWidth\b/i) { # need the \b, as it is a substring of minMapWidth4OneLineKey
+
+	    $self->{MIN_MAP_WIDTH} = $value;
+
+	}elsif ($name =~ /^minMapHeight4TopKey/i) {
+
+	    $self->{MIN_MAP_HEIGHT_FOR_TOP_KEY} = $value;
+
+	}elsif ($name =~ /^minMapWidth4OneLineKey/i) {
+
+	    $self->{MIN_MAP_WIDTH_FOR_ONE_LINE_KEY} = $value;
+
+	}elsif ($name =~ /^mapNote/i) {
+
+	    $self->{MAP_NOTE} = $value;
+    
+	}elsif ($name =~ /^binDir/i) {
+
+	    $ENV{PATH} .= ":".$value;
+
+	}elsif ($name =~ /^libDir/i) {
+
+	    $ENV{LD_LIBRARY_PATH} .= ":".$value;
+
+	}elsif ($name =~ /^makePs/i){
+
+	    $self->{MAKE_PS} = $value;
+
+	}elsif ($name =~ /^makeSvg/i){   # add SVG support -- mark
+
+	    $self->{MAKE_SVG} = $value;
+
+	}
+    
+    }
+
+    $fh->close;
+
+}
+
+################################################################
+sub _getBoxColor {
+################################################################
+#
+# =head2 _getBoxColor
+#
+# Title   : _getBoxColor
+# Usage   : my $boxColor = $self->_getBoxColor($gd, $goid);
+#           automatically called by _drawNode().
+# Function: Gets the color for the node box in the display.
+# Returns : gd color
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $goid) = @_;
+    
+    if ($self->{PVALUE_HASH_REF_FOR_GOID} && 
+	$self->{PVALUE_HASH_REF_FOR_GOID}->{$goid}) {
+
+	return $self->_color4pvalue($gd, 
+				    $self->{PVALUE_HASH_REF_FOR_GOID}->{$goid});
+     
+    }
+
+    return $gd->tan;
+
+}
+
+################################################################
+sub _color4pvalue {
+################################################################
+#
+# =head2 _color4pvalue
+#
+# Title   : _color4pvalue
+# Usage   : my $boxColor = $self->_color4pvalue($gd, $pvalue);
+#           automatically called by _drawKeys() and _getBoxColor().
+# Function: Gets the color for the node box in the display.
+# Returns : gd color
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $gd, $pvalue) = @_;
+
+#! Different packages have different ideas of what these named
+#! colors are. For example, GraphViz and GD do not agree. It
+#! would probably be prudent to use RGB values instead of names.
+#! -- mark
+
+    if ($pvalue <= 1e-10) {
+
+	return $gd->orange; 
+
+    }elsif ($pvalue <= 1e-8) {
+
+	return $gd->yellow; 
+
+    }elsif ($pvalue <= 1e-6) {
+
+	return $gd->green4;
+
+    }elsif ($pvalue <= 1e-4) {
+
+	return $gd->lightBlue;
+
+    }elsif ($pvalue <= 1e-2) {
+
+	return $gd->blue4;
+
+    }else {
+
+	return $gd->tan;
+
+    }
+
+}
+
+################################################################
+sub _colorForNode{
+################################################################
+
+    my ($self, $goid) = @_;
+    
+    if ($self->{PVALUE_HASH_REF_FOR_GOID} && 
+	$self->{PVALUE_HASH_REF_FOR_GOID}->{$goid}) {
+
+	my $pvalue = $self->{PVALUE_HASH_REF_FOR_GOID}->{$goid};
+
+	if ($pvalue <= 1e-10) {
+	    
+	    return 'orange'; 
+	    
+	}elsif ($pvalue <= 1e-8) {
+	    
+	    return 'yellow'; 
+	    
+	}elsif ($pvalue <= 1e-6) {
+	    
+	    return 'green';
+	    
+	}elsif ($pvalue <= 1e-4) {
+	    
+	    return 'cyan';
+	    
+	}elsif ($pvalue <= 1e-2) {
+	    
+	    return 'royalblue1';
+	    
+	}else {
+	    
+	    return 'burlywood2';
+	    
+	}
+
+	
+    }
+    
+    return 'burlywood2';
+    
+}
+
+################################################################
+sub _processLabel {
+################################################################
+#
+# =head2 _processLabel
+#
+# Title   : _processLabel
+# Usage   : my $newLabel = $self->_processLabel($label,
+#                                               $maxLabelLen);
+#           automatically called by _drawKeys() and _addNode().
+# Function: Splits the label into multiple lines if the label is too 
+#           long. 
+# Returns : new label string
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $label, $maxLabelLen) = @_;
+    
+    if (!$maxLabelLen) { 
+
+	$maxLabelLen = $self->{MAX_NODE_NAME_WIDTH} || 15;
+
+    }
+
+    # separate the label into its constituent words
+
+    my @words = split(/ /, $label);
+
+    my (@lines, $line);
+
+    # go through each word
+
+    foreach my $word (@words) {
+
+	# if the line we're building up is too long already, or it'll
+	# be too long when we add the next word, add it to the array
+	# of lines, and start a new one
+
+	if ((defined $line && length($line) >= $maxLabelLen) ||
+	    (defined $line && (length($line)+length($word) > $maxLabelLen)) ) {
+
+	    # get rid of leading space
+
+	    $line =~ s/^ +//;
+
+	    push (@lines, $line);
+
+	    undef $line;
+
+	}
+
+	# add the current word onto the line
+
+	$line .= " ".$word;
+
+    }
+
+    # add in a final line if there is one
+
+    if (defined $line){
+
+	$line =~ s/^ +//;
+
+	push (@lines, $line);
+
+    }
+
+    return join("\n", @lines);
+
+}
+
+################################################################
+sub _formatGoid {
+################################################################
+#
+# =head2 _formatGoid
+#
+# Title   : _formatGoid
+# Usage   : my $goid = $self->_formatGoid($goid); 
+#           automatically called by _init() and _addNode().
+# Function: Reformats the goid (plain number) to STD GOID format 
+#           (GO:0000388)
+# Returns : std GOID
+#           
+# =cut
+#
+#########################################################################
+
+    my ($self, $goid) = @_;
+
+    my $len = length($goid);
+
+    for (my $i = 1; $i <= 7 - $len; $i++) {
+
+	$goid = "0".$goid;
+
+    }
+
+    $goid = "GO:".$goid;
+
+    return $goid;
+
+}
+
+#######################################################################
+sub DESTROY {
+#######################################################################
+
+    # nothing needs to be done
+
+}
+
+#######################################################################
+1;
+#######################################################################
+