#!/usr/local/bin/perl -w

# adja2cluster_v2.pl, version 2.
# copyright Manuel J. Gómez, CNB-CSIC

# This script parses a standard adjacency list produced by blast2adjLists.pl,
# this is, without weights, and identifies cluster of connected nodes.

use strict;

if ((@ARGV) < 1) {
	print "adja2cluster.pl adjacency_list\n";
	exit;
}

my ($i);
my (@column);
my (%adMat);

my ($numCluster,$numPila,$node,$newNode);
my (@nodeList,@cola,@cluster);
my (%visited);


parsAdjTab();		# Parses an adjacency list in standard format.
findClusters();		# Finds clusters or connected components.
printClusters();	# Prints clusters.


##########################################################################

sub parsAdjTab {
	open (FILE , "$ARGV[0]") or die "Can not open $ARGV[0]\n";
	<FILE>;
	while (<FILE>){
		chomp;
		@column = split (/\s/,$_);
			$adMat{$column[0]}=();
		for ($i=1 ; $i < @column ; $i++) {
			$adMat{$column[0]}{$column[$i]} = 1;
		}
	}
}


##########################################################################

sub findClusters {
	$numCluster=0;
	@nodeList = (sort keys %adMat);
	foreach $node (@nodeList) {
		if (not $visited{$node}) {
			push (@cola, $node);
			$numPila = @cola;
			search();
			$numCluster++;
		}
	}	
}

sub search {
	while ($numPila > 0) {
		$node = shift @cola;
		$visited{$node} = 1;
		push (@{$cluster[$numCluster]}, $node);
		$numPila = @cola;		
		foreach $newNode (sort keys %{$adMat{$node}}) {
			if (not $visited{$newNode}) {
				push (@cola, $newNode);
				$numPila = @cola;
				search();
			}
		}
		
	}	
}


##########################################################################
			

sub printClusters {
	foreach (sort { @{$b} <=> @{$a}} @cluster){	# Sorts array of arrays by size of arrays.
		print "@{$_}\n";			# Prints each array of the array of arrays.
	}
}		


##########################################################################
