#!/usr/local/bin/perl -w

# blast2adjLists_ed1.pl, version ed1.
# copyright Manuel J. Gómez, CNB-CSIC

# USAGE: blast2adjLists_ed1.pl blast_output.file e-value
# EXAMPLE: blast2adjLists_ed1.pl Myco.blast 1e-80


# This script parses a BLAST output in table format, and constructs adjacency
# lists by considering as neighbours those pairs of sequences that
# have been identified as similar with e-values under a certain value.

# For each line parsed that fulfils the two following conditions: i) that the
# line does not refer to a self-match, and, ii) that the e-value is under a given
# value, the two entries in the line are entered as first and second values in a
# hash of hashes, forcing the construction of a simmetrical matrix. The advantage
# of doing this in Perl is that even in this situation the result is not a complete
# matrix, because only the existing pairs of neighbours are stored. The e-values
# are also stored, and if there are several hits for the same pair of sequences,
# with different e-values, only the one that has the lowest e-value is kept.


use strict;


my ($threshold);
my (@column);
my ($prot1,$prot2);
my (%adMat);
my ($first,$second);

$threshold = $ARGV[1];          # Maximum e-value allowed.


# This part parses the blast output

open (FILE , "$ARGV[0]") or die "Can not open $ARGV[0]\n";

while (<FILE>){
        chomp;
        @column = split (/\t/,$_);
        if ($column[0] ne $column[1]) {
                if ($column[10]<$threshold) {
                        $column[0] =~ /gi\|(\d*)\|/;
                        $prot1 = $1;
                        $column[1] =~ /gi\|(\d*)\|/;
                        $prot2 = $1;
                        if (not exists $adMat{$prot1}{$prot2}) {
                                $adMat{$prot1}{$prot2} = $column[10];
                                $adMat{$prot2}{$prot1} = $column[10];
                        } elsif ($adMat{$prot1}{$prot2} > $column[10]){ 
                                $adMat{$prot1}{$prot2} = $column[10];
                                $adMat{$prot2}{$prot1} = $column[10];
                        }
                }
	}
}
                
                
  
# This part prints the adjacency list  
  
                
print "Nodes\t\tNeighbours\n";

foreach $first (sort keys %adMat) {
	print "$first\t";
	foreach $second (sort keys %{$adMat{$first}}) {
		print "$second ";
	}
	print "\n";
}                
                
#############################################################################