/********************************************
main.c
Gclust
This program reads a list file (generated from BLAST result by bl2ls.pl),
and assembles similarity groups. Output is a *.grp file.
This is a C version of the lsort2b.pl.
  Copyright Naoki Sato 2002.

Added functionality of using catenated list file. June 2, 2002.
Added msort, lnkann. June 16, 2002.
Added SQlist. June 17, 2002.
Added lnkdb. June 25, 2002.
Selection of master entry. June 25, 2002.
Added hom. July 2, 2002.
Added Unit. July 3, 2002.
Modified input format. July 4, 2002.
Matrix is now float. July 5, 2002.
Matrix is now int. July 6, 2002.
Added sublist5. July 7, 2002.
Added sublist6. sublist5 changed to reflist5. July 10, 2002.
Phase 1. July 19, 2002.
R,R1,R2 are made global. Aug. 9, 2002.
u is now global. Sept. 8, 2002.
R is now simple pointer. Sept. 9, 2002.
Bug fix for matrix. Sept. 10, 2002.
Bug fix for Unit matrix. Sept. 30, 2002.
Added mode homsub. Oct. 18, 2002.
Reconstruction. April 30, 2003.
Only hom and homsub are available.
Read and write of gclust2 format enabled.
score is now double. May 14, 2003.
Recurrent clustering. May 16, 2003.
Added print_N4_table switch. Sept. 16, 2004.
Clique mode. July, 2005. version 352.
Use of org information. July, 2005. version 352e13.
Tapering mode. Aug. 2005. version 352e14.
MergeDistantRelatives. Sept, 2005. version 352e16.
Modification in SearchBridge. Oct, 2005. version 352f1.
Reading of the -m8 table of BLAST. May 29, 2007. version 354.
*******************************************/

#ifndef __MAIN
#define __MAIN
#endif
#include "defines.h"
#include "defvar.h"
#include "gclust.h"
#include <time.h>

/* Prototype declaration */
void GetTime(void);
void PrintOutput(Node **a,unsigned nodes,Relatives *r);


char listfile[MAXLEN]="";
char listfile_bak[MAXLEN]="";
char listfile_org[MAXLEN]="";
char version_string[20]=VERSION;
char start_time[100];
char host_info[100];
char *dmstring;		/* domain list information, 352e */
unsigned swVar=0;
char hom[MAXLEN];
char out_mode[MAXLEN]="x";	/* output mode: 1, r, or s. */
char om[2]="1";
char *sw1;
int *matrix;
unsigned dim;
char print_sub = 0;
Boolean exhaustive_output=FALSE;	/* exhaustive output of group tables in clique mode. 352c5 */
Boolean regroup=FALSE;	/* re-grouping 352f12 */
Boolean regroup_bak=FALSE;	/* re-grouping 352f12 */

char time_string[50];
time_t systime;
struct tm *currtime;

/********************************************************/
/*     GetTime     */
/********************************************************/
void GetTime(void)
{

	systime=time(NULL);
	currtime=localtime(&systime);
	strcpy(time_string,asctime(currtime));
	time_string[strlen(time_string) - 1] = '\0';

	return;
}
/********************************************************/

/*********************************************************/
/*      Main      */
/*********************************************************/
int main(int argc,char **argv)
{
	char gclust_variables_file[20]=GCLUST_VARIABLES_FILE;
	char gclust_variables_file_new[20];
	char hom2[MAXLEN];
	char table[MAXLEN]="";
	char thr_char[MAXLEN]="";
	char mat_opt[MAXLEN]="1";	/* matrix option: 1 or M. */
	unsigned ct=0;
	int it=0;	/* counter for iteration cycle */
	int kaisuu=0;	/* counter for iteration */

	FILE *fout;
	FILE *fout2;
	FILE *fin;
	FILE *ftable;
	FILE *missing;
	Node **a;
	unsigned nodes=0;	/* number of menbers of a */
	unsigned i,j,k,k1;
	unsigned maxn3;
	unsigned maxn3i;
	unsigned maxn3_bak=0;
	int count=2;	/* counter for argc */
	Boolean print_table=FALSE;
	char newtable[]=NEWTABLE;
	char missingtable[]=MISSINGTABLE;
	Boolean read_mode=FALSE;	/* read saved data for list1 and list2*/
	Boolean save_mode=FALSE;	/* save data */
	Boolean write_variables=FALSE;	/* write varibles file */
	Boolean nocheck_mode=FALSE;	/* skip checking nodes */
	Boolean m8_mode=FALSE;	/* m8 mode. May 29, 2007 */
	double thr_bak=0.998;
	char org_list_file[20];
	int xysize;
	Relatives *r;
	int regroup_level_max=0;
	


	printf("gclust version %s.\n",version_string);
	GetTime();
	printf("Started at %s.\n",time_string);
	strcpy(start_time,time_string);
	strcpy(org_list_file,ORG_LIST_FILE);

	thr_list[0]=1e-40;
	thr_list[1]=1e-30;
	thr_list[2]=1e-20;
	thr_list[3]=1e-12;
	thr_list[4]=1e-8;

	if((uu.s = (SQlist*)calloc(1,sizeof(SQlist)))==NULL){
		fprintf(stderr,"Error in initializing uu.\n");
		exit(1);
	}
	uu.n = 0;

	maxn3 = 0;
	R = InitRegion(maxn3);
	R1 = InitRegion(maxn3);
	R2 = InitRegion(maxn3);

	for(i=0;i<GN_MAX;i++){
		gi[i].begin = 0;
		strcpy(gi[i].name,"");
	}

/** input of arguments **/

	if(argc<2){
		fprintf(stderr,"Specify name of the list file.\n");
		print_usage();
		exit(1);
	}
	if(!strncmp(argv[1],"-read=",6)) {
		sw1=&argv[1][6];
		strcpy(listfile,sw1);	/* data file */
		read_mode=TRUE;
	} else strcpy(listfile,argv[1]);	/* list file */
	if(!strcmp(listfile,"")){
		fprintf(stderr,"Specify name of the list file.\n");
		print_usage();
		exit(1);
	}

/* mode */
	if(argc>=3){
		if(argv[2][0]=='-'){
			sw1=&argv[2][1];
			for(swVar=0;swVar<maxVar;swVar++) {
				if(!strcmp(optVar[swVar],sw1)) break;
			}
			if(swVar>=maxVar) {
				print_usage();
				exit(1);
			}
		}
	}
	printf("Mode: %s.\n",optVar[swVar]);
	if(swVar<4){
		fprintf(stderr,"Only available modes are nocalc, hom, homsub, and save.\n");
		exit(1);
	}
	if(swVar == 4) no_sub=TRUE;
	else if(swVar == 5) no_sub=FALSE;

/* options */
	if(argc>=3 && swVar>=1){
		for(count=3;count<argc;count++){
			if(!strncmp(argv[count],"-tab=",5)) {
				sw1=&argv[count][5];
				strcpy(table,sw1);
			} else if(!strncmp(argv[count],"-thr=",5)) {
				sw1=&argv[count][5];
				strcpy(thr_char,sw1);
				thr=atof(thr_char);
				fprintf(stderr,"Threshold is %10.3e.\n",thr);
			} else if(!strncmp(argv[count],"-out=",5)) {
				sw1=&argv[count][5];
				if(strchr(sw1,'1')==NULL && strchr(sw1,'r')==NULL &&\
				strchr(sw1,'s')==NULL){
					fprintf(stderr,"Strange output option.\n");
					print_usage();
					exit(1);
				}
				strcpy(out_mode,sw1);
			} else if(!strncmp(argv[count],"-mat=",5)) {
				sw1=&argv[count][5];
				if(strcmp(sw1,"1") && strcmp(sw1,"M")){
					fprintf(stderr,"Strange matrix option.\n");
					print_usage();
					exit(1);
				}
				strcpy(mat_opt,sw1);
			} else if(!strcmp(argv[count],"-t")) {
				print_table=TRUE;
			} else if(!strcmp(argv[count],"-v")) {
				write_variables=TRUE;
			} else if(!strcmp(argv[count],"-repeat")) {
				repeat_mode=TRUE;
			} else if(!strcmp(argv[count],"-clique")) {		/* 350 */
				clique_mode=TRUE;
				repeat_mode=FALSE;
			} else if(!strcmp(argv[count],"-printN4")) {
				print_N4_table=TRUE;
			} else if(!strcmp(argv[count],"-2Dtable")) {
				print_2D_tables=TRUE;
			} else if(!strcmp(argv[count],"-verbous")) {
				print_N4_table=TRUE;
				print_2D_tables=TRUE;
				verbous_mode=TRUE;
			} else if(!strcmp(argv[count],"-save")) {
				save_mode=TRUE;
			} else if(!strcmp(argv[count],"-nocheck")) {
				nocheck_mode=TRUE;
			} else if(!strcmp(argv[count],"-org")) {		/* 352 */
				use_org=TRUE;
			} else if(!strncmp(argv[count],"-regroup",8)) {		/* 352f12 */
				regroup=TRUE;
				sw1=&argv[count][8];
				if(strlen(sw1) > 0){
					_regroup_level = atoi(sw1);		/* 353f */
					if(_regroup_level > 10 || _regroup_level < 0) _regroup_level = 5;
				} else _regroup_level = 5;
				fprintf(stderr,"Regroup level is %d.\n",_regroup_level);
			} else if(!strcmp(argv[count],"-taper") || !strcmp(argv[count],"-ashikiri")) {		/* 352 */
				ashikiri=TRUE;	
			} else if(!strcmp(argv[count],"-exh") || !strcmp(argv[count],"-exhaustive")) {		/* 352c5 */
				exhaustive_output=TRUE;
			} else if(!strcmp(argv[count],"-m8")) {			/* 354 */
				m8_mode = TRUE;
			} else {
				fprintf(stderr,"Strange command line arguments.\n");
				print_usage();
				exit(1);
			}
		}
	}

	if(swVar==6) save_mode=TRUE;
	else if(save_mode && !strcmp(out_mode,"x")) swVar=6;
	if((repeat_mode || clique_mode)&& !read_mode){		/* 350 */
		fprintf(stderr,"The current mode is only available with read mode.\n");
		exit(1);
	}
	strcpy(listfile_org,listfile);
	if(!repeat_mode && !clique_mode) verbous_mode=FALSE;	/* 350 */

	/* 352f13 */
	printf("Homology matrix. Out_mode: %s. Options: ",om);
	if(clique_mode) printf("-clique ");
	else if(repeat_mode) printf("-repeat ");
	if(use_org) printf("-org ");
	if(regroup){
		printf("-regroup ");
		printf("level %d",_regroup_level);
	}
	printf("\n");
/** end of input of arguments **/
/** reading from file **/

	if((fin=fopen(gclust_variables_file,"r"))!=NULL){
		if(ReadVariables2(fin)){
			fprintf(stderr,"Error in reading variables.\n");
			help_var_list();
		}
		fclose(fin);
	}

	if(write_variables){
		sprintf(gclust_variables_file_new,"%s.new",gclust_variables_file);
		if((fout=fopen(gclust_variables_file_new,"w"))!=NULL){
			PrintVariables2(fout);
		}
		fclose(fout);
	}

	if(swVar == 7) {
		printf("This is nocalc mode. Operation finished.\n");
		exit(0);
	}
	
    printf("\nList of variables used in this analysis.\n\n");
    PrintVariables2(stdout);
    printf("\n");
	fflush(stdout);
	num_org = (int)number_of_genomes;

	if(read_mode){
		printf("\nReading data file: %s.\n",listfile);
		if((fin=fopen(listfile,"r"))==NULL){
			fprintf(stderr,"\nUnable to open data file %s.\n",listfile);
			exit(1);
		}
		if((a=ReadData(fin,a,&nodes,listfile,table))==NULL){
			fprintf(stderr,"Error in reading data file.\n");
			exit(1);
		}
		fclose(fin);
    } else {
		if(swVar>=2){
			if(!strcmp(table,"")){
				printf("Table name: %s.\n",table);
				print_usage();
				exit(1);
			}
			if((ftable=fopen(table,"r"))==NULL){
				fprintf(stderr,"Unable to open table file %s.\n",table);
				exit(1);
			}
		}

		printf("\nReading table file: %s.\n",table);
		if((a=ReadAnnot2(a,&nodes,ftable))==NULL){
			fprintf(stderr,"Error in reading annotation table.\n");
			exit(1);
		}	
		fclose(ftable);

		if(!gclust3) {
			printf("\nTotal genomes: max_gi=%u.\n",max_gi);
			if(m8_mode){		/* 354 */
				fprintf(stderr,"The m8 mode is only used with the annotation table in the gclust3 format.\n");
				exit(1);
			}
		}
	
		printf("\nReading list file: %s.\n",listfile);
		if((fin=fopen(listfile,"r"))==NULL){
			fprintf(stderr,"\nUnable to open list file %s.\n",listfile);
			exit(1);
		}
		if(m8_mode){		/* 354 */
			if((a=ReadNodeM8(fin,a,&nodes))==NULL){
				fprintf(stderr,"Read error.\n");
				exit(1);
			}
		} else {
			if((a=ReadNode2(fin,a,&nodes))==NULL){
				fprintf(stderr,"Read error.\n");
				exit(1);
			}
		}
		fclose(fin);

/* If no valid homology entry is found, n1 is set to 1. */
		for(i=1;i<=nodes;i++){
			if(a[i]->n1==0 && strcmp(a[i]->name,"") && a[i]->sqlist1[0].Sstart != 0) a[i]->n1=1;

/* If there is really no valid entry, an alert is announced.*/
			if(a[i]->n1 < 1) printf("a[%u]->n1=%u\n",i,a[i]->n1);
		}

		SetItem0(a,nodes);
	
		if(print_table){
			if((ftable=fopen(newtable,"w"))==NULL){
				fprintf(stderr,"\nUnable to open new table file %s.\n",newtable);
			} else {
				printf("\nPrinting a new table.\n");
				if(gclust3){
					PrintTable3(ftable,a,nodes);
				} else {
					PrintTable(ftable,a,nodes);
				}
				fclose(ftable);
			}
		}
		printf("\nnodes = %u.\n",nodes);

		if(ashikiri){		/* 352e14 */
			printf("\nTapering mode. Removing low level homology .. \n");
			fflush(stdout);
			LimitScore(a,nodes);
		}
		printf("\n");

		printf("\nAdding citation list ...\n");
		fflush(stdout);

		if(!print_table) missing=stderr;
		else {
			if((missing=fopen(missingtable,"w"))==NULL){
				fprintf(stderr,"\nUnable to open missing table file %s.\n",missingtable);
				exit(1);
			}
		}
		if((a=AddList2(a,nodes,missing))==NULL){
			exit(1);
		}
		if(print_table) fclose(missing);
	
		if(save_mode){

			/* New in version 308. The flag active is set at this moment. */

			printf("\nThe flag 'active' is being set.\n");
			thr=0.998;
			for(i=1;i<=nodes;i++){
				if(a[i]->domain != 0) a[i]->active = FALSE;
			}

			printf("\nRemoving redundancies ...\n");
			fflush(stdout);
			if((a=CleanList(a,nodes))==NULL) exit(1);

			for(i=1;i<=nodes;i++){
				a[i]->n1b = a[i]->n1;
			}

		/* Initialize domain list */
			InitDomain(a,nodes);

			MakeIDlist(a,nodes);
			SearchBridge(a,nodes);

			PrintData(a,nodes,listfile,table);
			printf("\nData were saved.\n");
			if(swVar==6) exit(0);
		}
   	}

	InitClist(nodes);

/* backup n1 and n2 */
	for(i=1;i<=nodes;i++){
		a[i]->n1b = a[i]->n1;
		a[i]->n2b = a[i]->n2;
	}

/* Initialize domain list */

	if(!clique_mode) InitDomain(a,nodes);

/* Setting up thr_list */ /* 350 */

	thr_bak=thr;
	last_thr=thr;
	if(repeat_mode){
		if(thr_bak > thr_list[num_thr-1]){
			thr_list[num_thr]=thr_bak;
			thr_list[num_thr+1]=thr_bak;
		} else {
			thr_list[num_thr]=DEFAULT_THR;
			thr_list[num_thr+1]=DEFAULT_THR;
		}
		last_thr = thr_list[num_thr-1];
	} 


/* 350 Clique mode */

	if(clique_mode){

		thr = 0.998;
		thr_list[num_thr]=thr_bak;
		thr_list[num_thr+1]=thr_bak;

		if((xy=(int*)calloc((num_thr+1)*11+1,sizeof(int)))==NULL){
			fprintf(stderr,"Memory allocation error in making xy.\n");
			exit(1);
		}
		if((z=(int*)calloc((num_thr+1)*11+1,sizeof(int)))==NULL){
			fprintf(stderr,"Memory allocation error in making z.\n");
			exit(1);
		}
		if((d=(int*)calloc((num_thr+1)*11+1,sizeof(int)))==NULL){
			fprintf(stderr,"Memory allocation error in making d.\n");
			exit(1);
		}

		/* 352 organism list */
		if(use_org){
			if((fin=fopen(org_list_file,"r"))==NULL){
				fprintf(stderr,"\nUnable to open org_list file %s.\n",org_list_file);
				exit(1);
			}
			if((org = ReadOrgFile(fin))==NULL){
				fprintf(stderr,"Unable to read from org_list file %s.\n",org_list_file);
				exit(1);
			}
			fclose(fin);
			printf("\nOrganism list\tNumber of species: %d\n\n",num_org_all);
			for(i=1;i<=num_entries;i++){
				printf("%s\t%s\t%d\t%d\n",org[i].prefix,org[i].name,org[i].species,org[i].kingdom);
			}
			printf("\n");
			printf("org_list has been read.\n");
			fflush(stdout);

			xysize = 11 * (num_thr + 1);
			if((sp=(int*)calloc(xysize * (num_org_all+2) + 1, sizeof(int)))==NULL){
				fprintf(stderr,"Memory allocation error in making sp.\n");
				exit(1);
			}

			printf("\nAssigning organism information ...");
			AssignOrg(a,nodes);
			printf("done.\n\n");
			fflush(stdout);
		}
		
/*	this is read from data.out

		printf("\nRemoving redundancies ...\n");
		fflush(stdout);
		if((a=CleanList(a,nodes))==NULL) exit(1);
*/

	/* globals: R, R1, R2, mat_size, initial_mat_size */

		maxn3_bak = maxn3;
		maxn3i = GetMax_n3i(a,nodes);
		maxn3 = a[maxn3i]->n3;

/*		maxn3 = 5 * GetMax_n4(a,nodes);
*/
/*		if(maxn3==0){
			R = InitRegion(maxn3);
			R1 = InitRegion(maxn3);
			R2 = InitRegion(maxn3);
		} else {
*/
			R = ReallocRegion(R,maxn3_bak,maxn3);
			R1 = ReallocRegion(R1,maxn3_bak,maxn3);
			R2 = ReallocRegion(R2,maxn3_bak,maxn3);
/*		}
*/
		mat_size = maxn3; 
		if(initial_mat_size==0){
			if((matrix=InitMatrix(mat_size))==NULL){
				printf("Error of initializing matrix for node %u.\n",maxn3i);
			}
		} else if(initial_mat_size < mat_size){
			matrix = ReallocMatrix(matrix,mat_size);
		}
		if(initial_mat_size < mat_size){
			initial_mat_size = mat_size;
		}

		printf("\nBinary interaction data are being calculated ... \n");
		fflush(stdout);
		if((a = BinaryInteractions(a,nodes))==NULL) exit(1);
	
		for(i=1;i<=nodes;i++){
			if(a[i]->domain == 0){
				clearIDlist(a,nodes,i);	
				a[i]->n4 = 0;
			}else if(a[i]->domain & 0x04){
				a[i]->n4 = 1;
				a[i]->idlist4[0] = a[i]->n0;
			}
		}

		printf("\nAssembling clusters ...\n");
		fflush(stdout);
		if((a=CreateCliques(a,nodes))==NULL) exit(1);

		for(i=1;i<=nodes;i++){
			if(a[i]->domain & 0x01){
				a[i]->n4b = a[i]->n4;
				a[i]->n4 = 1;
			}else if(a[i]->domain & 0x04){
				a[i]->n4 = 1;
				a[i]->idlist4[0] = a[i]->n0;
			}
		}

		CleanupIDall(a,nodes);

		SortList(a,nodes,TRUE);
		printf("SortList completed.\n");
		SelectNode2(a,nodes);

		CleanupIDall(a,nodes);

	/* for debug */
	PrintN4(a,nodes,1);
		
		printf("\nMerging clusters  ...\n");
		fflush(stdout);
		if((a=MergeCliques(a,nodes))==NULL) exit(1);

	/* for debug */
	PrintN4(a,nodes,2);
		
		CleanupIDall(a,nodes);

		if(!nocheck_mode){
			ClearClist(nodes);
			CheckNode(a,nodes);
		}
	/* for debug */
	PrintN4(a,nodes,3);

		WriteParent(a,nodes);
		fflush(stdout);
		
		printf("\nAdding singletons to clusters  ...\n");
		fflush(stdout);
		if((a=AssignSingletons(a,nodes))==NULL) exit(1);

		SortList(a,nodes,TRUE);
		printf("SortList completed.\n");
		SelectNode2(a,nodes);

	/* for debug */
	PrintN4(a,nodes,4);

		printf("\nSecond check for nodes  ...\n");
		if(!nocheck_mode){
			ClearClist(nodes);
			CheckNode(a,nodes);
		}


	} else {


/* Beginning of iteration */

/* 350 This part is done before. 
	thr_bak=thr;
	last_thr=thr;
	if(repeat_mode){
		if(thr_bak > thr_list[num_thr-1]){
			thr_list[num_thr]=thr_bak;
			thr_list[num_thr+1]=thr_bak;
		} else {
			thr_list[num_thr]=DEFAULT_THR;
			thr_list[num_thr+1]=DEFAULT_THR;
		}
		last_thr = thr_list[num_thr-1];
	} 
*/

	kaisuu = 0;
	for(it=0;it<=num_thr+1;it++){
		if(!repeat_mode) {
			it=num_thr+1;
			thr=thr_bak;
		}
		else {
			if(kaisuu <= 0) kaisuu = 1 - kaisuu;
			else kaisuu = 1;
			
			thr=thr_list[it];
		}

		if(repeat_mode){
			printf("\nIteration %d of cycle %d with thr = %10.2e ...\n\n",kaisuu,it+1,thr);
			GetTime();
			printf("%s\n",time_string);
		}

		if(repeat_mode && it>0){

			clearN3list(a,nodes);

			for(i=1;i<=nodes;i++){
				a[i]->n1 = a[i]->n1b;
				a[i]->n2 = a[i]->n2b;
			}
		}

		printf("\nRemoving redundancies ...\n");
		fflush(stdout);
		if((a=CleanList(a,nodes))==NULL) exit(1);

		printf("\nAssembling clusters ...\n");
		fflush(stdout);
		if((a=MergeList(a,nodes))==NULL) exit(1);
		printf("Changed %u items.\n",a[0]->n3);

		MakeIDlist(a,nodes);

		if(!nocheck_mode){
			ClearClist(nodes);
			CheckNode(a,nodes);
		}

		SortList(a,nodes,TRUE);
		printf("SortList completed.\n");

		SelectNode2(a,nodes);

		if(repeat_mode){
			maxn3_bak = maxn3;
			maxn3 = GetMax_n3(a,nodes);
/*
			if(maxn3==0){
				R = InitRegion(maxn3);
				R1 = InitRegion(maxn3);
				R2 = InitRegion(maxn3);
			} else {
*/

				R = ReallocRegion(R,maxn3_bak,maxn3);
				R1 = ReallocRegion(R1,maxn3_bak,maxn3);
				R2 = ReallocRegion(R2,maxn3_bak,maxn3);
/*			}
*/

			mat_size = a[1]->n4 + 2; 	/* This is the largest. */

			if(initial_mat_size==0){
				matrix=InitMatrix(mat_size);
			} else if(initial_mat_size < mat_size){
				matrix = ReallocMatrix(matrix,mat_size);
			}

			if(initial_mat_size < mat_size){
				initial_mat_size = mat_size;
			}

			if(cmpIDlist(a,nodes,it)){
				it--;
				kaisuu = 0 - kaisuu;
				continue;
			}
		}

		for(i=1;i<=nodes;i++) ReassignDomainID(a,nodes,i);

		PrintN4(a,nodes,it);
	}



	}		/* end of else for if(clique_mode) */


	if(repeat_mode){
		SortList(a,nodes,TRUE);
		for(i=1;i<=nodes;i++){
			if(a[i]->final_thr < thr_bak){ 
				a[i]->n1 = a[i]->n1b;
				a[i]->n2 = a[i]->n2b;
			}
		}
	} else if(clique_mode){			/* 350 */
		for(i=1;i<=nodes;i++){
			if(a[i]->domain & 4){
				a[i]->n3 = 1;
				a[i]->n4 = 1;
				a[i]->idlist4[0] = a[i]->n0;
			}
		}
	} else {
		for(i=1;i<=nodes;i++){		/* 308 */
			if(a[i]->active && (a[i]->domain & 4)) {
				a[i]->n3 = 1;
				for(j=1;j<a[i]->n3b;j++){
					clearSQlist(&a[i]->sqlist3b[j]);
				}
				a[i]->n3b=1;
				recoverIDlist(a,nodes,i);
			}
		}
	}


/* End of iteration */



	strcpy(listfile_org,listfile);
	if(strcmp(thr_char,"")) strcat(listfile,thr_char);

	WriteParent(a,nodes);
	fflush(stdout);

	maxn3_bak = maxn3;
	maxn3 = GetMax_n3(a,nodes);

/*
	if(maxn3==0){
		R = InitRegion(maxn3);
		R1 = InitRegion(maxn3);
		R2 = InitRegion(maxn3);
	} else {
*/

		R = ReallocRegion(R,maxn3_bak,maxn3);
		R1 = ReallocRegion(R1,maxn3_bak,maxn3);
		R2 = ReallocRegion(R2,maxn3_bak,maxn3);
/*	}
*/

/* homsub */
	if(swVar==5){
		printf("\nMaking Sub-list...\n");
		ct=0;
		for(i=1;i<=nodes;i++){
			PrintProgress(i,10,1000);
			if(a[i]->n3 != 0){
				ct += 1;
				MakeSublist(a,nodes,i);
			}
		}
	}

/* for debug */

	if(!repeat_mode) PrintN4(a,nodes,it-1);

/* Matrix for homology */
	printf("\nMaking homology matrix ...\n");
	fflush(stdout);

	if(a[1]->n5 == 0){
		mat_size = a[1]->n4; 	/* This is the largest. */
		for(i=2;i<=nodes;i++){
			if(a[i]->n3==0 || a[i]->n4 < 2) continue;
			if(mat_size < a[i]->n4) mat_size = a[i]->n4;
		}
	}else{
		mat_size=0;
		for(i=1;i<=nodes;i++){
			if(a[i]->n3==0 || a[i]->n4 < 2) continue;
			j=FindNode(a,nodes,a[i]->reflist5[0]);
			if(j==0) continue;
			if(mat_size < a[j]->n6) mat_size = a[j]->n6;
			if(mat_size < a[j]->n1) mat_size = a[j]->n1;
		}
		if(mat_size == 0) mat_size = a[1]->n4;
	}
	mat_size += 10;

	if(initial_mat_size == 0) matrix=InitMatrix(mat_size);
	else if(initial_mat_size < mat_size) matrix = ReallocMatrix(matrix,mat_size);

	if(initial_mat_size < mat_size){
		initial_mat_size = mat_size;
	}

/* Modification Phase */
	ct=0;
	subgrpno=0;
	printf("Ordering matrices ...\n");
	for(i=1;i<=nodes;i++){
		PrintProgress(i,10,1000);
		if(!a[i]->active) continue;		/* 308 */
		if(a[i]->n3 == 0) continue;
		print_sub=0;
		ct += 1;
		if(!PrintSub(a,nodes,i)){
			if(a[i]->n4 == 0) continue;
			dim = a[i]->n4 + 2;
			if(dim>mat_size){
				matrix=ReallocMatrix(matrix,dim);
				mat_size=dim;
			}
			ClearMatrix(matrix,dim);
			WriteMatrix(a,nodes,i,matrix,dim);
			DiagonalMatrix(a,nodes,i,matrix,dim,4);
			continue;
		}

		j=FindNode(a,nodes,a[i]->reflist5[0]);
		if(j==0) continue;
		dim = a[j]->n1 + 2;
		if(dim>mat_size){
			matrix=ReallocMatrix(matrix,dim);
			mat_size=dim;
		}
		ClearMatrix(matrix,dim);

		for(j=0;j<a[i]->n4;j++){
			k=FindNode(a,nodes,a[i]->idlist4[j]);
			if(k==0 || a[k]->n1==0) continue;
			if(a[k]->n1 > mat_size){
				mat_size = a[k]->n1 + 2;
				matrix=ReallocMatrix(matrix,mat_size);
				ClearMatrix(matrix,mat_size);
			}
			WriteSubMatrix(a,nodes,k,matrix,a[k]->n1);
			for(k1=0;k1<10;k1++){
				if(RemoveUnrelated(a,nodes,k,matrix,a[k]->n1)==0) break;
			}
			EvaluateMatrix(a,nodes,k,matrix,a[k]->n1);
			RenewList6(a,nodes,k);
		}
		for(j=0;j<a[i]->n4;j++){
			k=FindNode(a,nodes,a[i]->idlist4[j]);
			if(k==0 || a[k]->n1==0) continue;
			WriteSubMatrix(a,nodes,k,matrix,a[k]->n1);
			DiagonalMatrix(a,nodes,k,matrix,a[k]->n1,6);
		}
	}


/* 352c clique mode. Assignment of group number. */
	if(clique_mode) {
/*		ct = 0;
		for(i=1;i<=nodes;i++){
			if(a[i]->n3 != 0){
				ct += 1;
				a[i]->grpno = ct;
			}
		}
		ListRelatives(a,nodes);
*/
PrintN4(a,nodes,5);

/*
	for(i=1;i<nodes;i++){
		if(a[i]->n3 == 0) continue;
		j = (a[i]->relatives)->number;
		printf("number=%u:",j);
		for(k=0;k<j;k++){
			printf(" %u",(a[i]->relatives)->rel_list[k]);
		}
		printf("\n");
	}
*/

/* Adding singletons to related groups 
		for(i=1;i<=nodes;i++){
			if(a[i]->n3 == 0) continue;
			if(a[i]->n4 == 1) {
				if(a[i]->nR == 1){
					j = a[i]->relatives[0].pID;
					if((k = FindNode(a,nodes,j)) == 0) continue;
					if(a[k]->domain != 0) continue;
					n4 = a[k]->n4;
					if((a[k]->idlist4=(unsigned*)realloc(a[k]->idlist4,(n4 + 1)*sizeof(unsigned)))==NULL){
						fprintf(stderr,"Memory allocation error for idlist4.\n");
						exit(1);
					}

					a[k]->idlist4[n4] = a[i]->n0;
					a[k]->n4 += 1;

					a[i]->n3 = 0;
					a[i]->pID = a[k]->n0;
					a[i]->grpno = 0;
				
				}
			}
		}
*/

		SortList(a,nodes,TRUE);
		printf("SortList again.\n");
		SelectNode2(a,nodes);
		SetGrpNo(a,nodes);
		ListRelatives(a,nodes);
		AddGrpno(a,nodes);
		SortRelatives(a,nodes);

		if(regroup){
			strcpy(listfile_bak,listfile);
			strcat(listfile,"_noRegroup");
			regroup_bak = regroup;
			regroup = FALSE;
			PrintOutput(a,nodes,r);
			strcpy(listfile,listfile_bak);
			regroup = regroup_bak;
		}

		regroup_level_max = _regroup_level;
		
		if(regroup){
			for(_regroup_level=1;_regroup_level<=regroup_level_max;_regroup_level++){

				MergeDistantRelatives(a,nodes);
	
				SortList(a,nodes,TRUE);
				printf("SortList again.\n");
				SelectNode2(a,nodes);
				SetGrpNo(a,nodes);
				ListRelatives(a,nodes);
				AddGrpno(a,nodes);
				SortRelatives(a,nodes);
	
				/* second time */
	
				MergeDistantRelatives(a,nodes);
	
				SortList(a,nodes,TRUE);
				printf("SortList again.\n");
				SelectNode2(a,nodes);
				SetGrpNo(a,nodes);
				ListRelatives(a,nodes);
				AddGrpno(a,nodes);
				SortRelatives(a,nodes);

				if(_regroup_level < regroup_level_max){
					strcpy(listfile_bak,listfile);
					sprintf(listfile,"%s_regroup_%d",listfile_bak,_regroup_level);
					PrintOutput(a,nodes,r);
					strcpy(listfile,listfile_bak);
				}
			}
		}
		if(_regroup_level > regroup_level_max) _regroup_level = regroup_level_max;
	}

/* Final output */

	printf("\nFinal output\n");
/*
	printf("out_mode=%s, listfile=%s, listfile_org=%s, number_of_genomes=%u, num_org=%d, num_org_all=%d.\n",\
			out_mode,listfile,listfile_org,number_of_genomes,num_org,num_org_all);
*/

	PrintOutput(a,nodes,r);

	if(swVar==5){
		sprintf(hom2,"%s.hom.md",listfile);
		if((fout2=fopen(hom2,"w"))==NULL){
			fprintf(stderr,"Unable to open output file %s.\n",hom2);
			exit(1);
		}
		PrintMultiDomain(fout2,a,nodes);
		fclose(fout2);
	}

	return 0;
}

/*************** end of main ***************************/


/*********************************************************************/
void PrintOutput(Node **a, unsigned nodes, Relatives *r)
/*********************************************************************/
{
	FILE *fout;
	unsigned i,j,k,k1,k1_bak,k2,k3,nR;
	unsigned count,count2,ct,totaloutput; 
	int a1;

	for(count=0;count<strlen(out_mode);count++){
	
		om[0] = out_mode[count];
		
		if(swVar==4){
			sprintf(hom,"%s.hom.%s",listfile,om);
		} else if(swVar==5){
			sprintf(hom,"%s.homsub.%s",listfile,om);
		} else exit(1);

		printf("\nPrinting %s ...\n",hom);
	
		if((fout=fopen(hom,"w"))==NULL){
			fprintf(stderr,"Unable to open output file %s.\n",hom);
			exit(1);
		}
		ct = 0;
		totaloutput=0;
		strcpy(host_info,getenv("HOST"));
		GetTime();
		fprintf(fout,"Output of gclust version %s.\n",version_string);
		fprintf(fout,"Started: %s. Output: %s.\n",start_time,time_string);
		fprintf(fout,"Host name: %s\n",host_info);
		fprintf(fout,"Homology matrix. Out_mode: %s. Options: ",om);
		if(clique_mode) fprintf(fout,"-clique ");
		else if(repeat_mode) fprintf(fout,"-repeat ");
		if(use_org) fprintf(fout,"-org ");
		if(regroup){
			fprintf(fout,"-regroup ");
			fprintf(fout,"level %d ",_regroup_level);
		}
		fprintf(fout,"\n");
	
		fprintf(fout,"List of variables used in this analysis.\n\n");
		PrintVariables2(fout);
		fprintf(fout,"\n");
		if(repeat_mode){
			fprintf(fout,"	repeat mode with %d levels of threshold: ",num_thr);
			for(a1=0;a1<num_thr;a1++) fprintf(fout,"%6.3e ",thr_list[a1]);
			fprintf(fout,"\n");
		} else if(clique_mode){
			fprintf(fout,"	clique mode with %d levels of threshold: ",num_thr);
			for(a1=0;a1<num_thr;a1++) fprintf(fout,"%6.3e ",thr_list[a1]);
			fprintf(fout,"\n");
		}
		fprintf(fout,"Input file is %s.\n",listfile_org);
		fprintf(fout,"Threshold is %10.3e.\n\n",thr);
		for(i=1;i<=nodes;i++){
			if(a[i]->n3 != 0){
				print_sub=0;
				subgrpno = 1;
				ct += 1;
				PrintProgress(ct,10,1000);
				if((print_sub = PrintSub(a,nodes,i)) == 0) dim = a[i]->n4;
				else{
					j=FindNode(a,nodes,a[i]->reflist5[0]);
					dim = a[j]->n1;
				}
				if(dim>mat_size){
					matrix=ReallocMatrix(matrix,dim);
					mat_size=dim;
				}
				ClearMatrix(matrix,dim);
				grpno = ct;

				if(!print_sub){
					if(om[0]=='1'){
						WriteMatrix(a,nodes,i,matrix,dim);
					} else {
						WriteUnitMatrix(a,nodes,i,matrix,dim,om);
					}
					fprintf(fout,"Group %u: %u sequences.",ct,dim);
					if((repeat_mode && a[i]->final_thr < DEFAULT_THR) || clique_mode){
						fprintf(fout," Final thr = %10.2e.",a[i]->final_thr);
					}
					if(a[i]->domain != 0){
						if(a[i]->domain & 0x2) fprintf(fout," Fragment.");
						if(a[i]->domain & 0x4) fprintf(fout," Large protein.");
						if(a[i]->domain & 0x1){
							fprintf(fout," Multidomain protein with %u domains.",a[i]->DMnum);
							/* Now multidomain proteins are allowed to form groups. 355q */
							if(a[i]->n4 > 1 && a[i]->n4 < nodes) a[i]->active = TRUE;
							/*
							k=FindNode(a,nodes,a[i]->pID);
							fprintf(fout," Parent node: Node %u, %s.",a[i]->pID,a[k]->name);
							*/
						}
					}
					fprintf(fout,"\n");
					fprintf(fout,"\n");
					PrintMatrix(fout,a,nodes,i,matrix,dim,om);
					fflush(fout);
					if(a[i]->active) totaloutput += a[i]->n4;
					else totaloutput += 1;

					if(clique_mode){	/* 352c */
						r = a[i]->relatives;
						if((nR = a[i]->nR) != 0){
							fprintf(fout,"  Related groups:");
							if(exhaustive_output && a[i]->domain == 0) fprintf(fout,"\n");
							k1_bak = 0;

							if(a[i]->domain & 0x01){
								reassignDMlist(a,nodes,i);

								printDomainList(fout,a,nodes,i);

								dmstring = RepeatedDomains(a,nodes,i);
								if(strcmp(dmstring,"")){
									fprintf(fout,"\n        Repeated domains\n");
									fprintf(fout,"\t                 %s\n\n",dmstring);
								}
								
								fprintf(fout,"        Group (sequences)  domain similarity     name\n");
							} else if(a[i]->domain != 0){

							}

							for(j=0;j<nR;j++){
								k1 = a[i]->relatives[j].grpno;
								k = FindNode(a,nodes,a[i]->relatives[j].ID);
								k2 = FindNode(a,nodes,a[k]->pID);
								if(k == 0 || k1 == 0 || k2 == 0){
									printf("%s k=%u k1=%u k2=%u\n",a[i]->name,k,k1,k2);
									continue;
								}
								
							/* For each multidomain protein, domain homology data are provided. */ 
								if(a[i]->domain == 1){
									dmstring = DomainInfo(a,nodes,i,j);
									fprintf(fout,"\n\t%7u (%5u)  %s  %s",k1,a[k2]->n4,dmstring,a[k]->name);
									continue;
								} else if(a[i]->domain != 0){
									fprintf(fout,"\n\t%7u (%5u) %s",k1,a[k2]->n4,a[k]->name);
									continue;
								}

								if(exhaustive_output){

									if(k1 == k1_bak) continue;
									else k1_bak = k1;

									fprintf(fout,"\n");


									dim = a[k2]->n4;
									if(dim > mat_size){
										matrix = ReallocMatrix(matrix,dim);
										mat_size=dim;
									}
									ClearMatrix(matrix,dim);
									if(om[0]=='1'){
										WriteMatrix(a,nodes,k2,matrix,dim);
									} else {
										WriteUnitMatrix(a,nodes,k2,matrix,dim,om);
									}
									fprintf(fout,"Related group %u: %u sequences.",a[k2]->grpno,dim);
									fprintf(fout," Final thr = %10.2e.",a[k2]->final_thr);
									if(a[k2]->domain != 0){
										if(a[k2]->domain & 0x2) fprintf(fout," Fragment.");
										if(a[k2]->domain & 0x4) fprintf(fout," Large protein.");
										if(a[k2]->domain & 0x1){
											fprintf(fout," Multidomain.");
											k3=FindNode(a,nodes,a[k2]->pID);
											fprintf(fout," Parent node: Node %u, %s.",a[k3]->pID,a[k3]->name);
										}
									}
									fprintf(fout,"\n");
									fprintf(fout,"\n");
									PrintMatrix(fout,a,nodes,k2,matrix,dim,om);
									fflush(fout);


								} else {
									fprintf(fout,"\n\t%u(%u): %s",k1,a[k2]->n4,a[k]->name);
								}
							}
							fprintf(fout,"\n");
							fprintf(fout,"  END Related groups.\n");
							fprintf(fout,"\n");
						}
					}
					fprintf(fout,"\n");
				} else {
					fprintf(fout,"Group %u: %u sequences.\n\n",ct,a[i]->n4);
					count2=1;
					for(j=0;j<a[i]->n4;j++){
						k=FindNode(a,nodes,a[i]->idlist4[j]);
						if(k==0 || a[k]->n1==0) continue;
						if(count==0 && a[k]->grpno==0) a[k]->grpno=grpno;
						subgrpno = count2;
						if(om[0]=='1'){
							WriteSubMatrix(a,nodes,k,matrix,a[k]->n1);
						} else {
							WriteSubUnitMatrix(a,nodes,k,matrix,a[k]->n1,om);
						}
						if(a[k]->n6==0) continue;
						fprintf(fout,"   Subgroup %d: %u sequences. ",count2++,a[k]->n6);
						if(a[k]->domain & 0x1) fprintf(fout," Multidomain.");
						if(a[k]->domain & 0x4) fprintf(fout," Large protein.");
						fprintf(fout,"\n\n");
						PrintSubMatrix(fout,a,nodes,k,matrix,a[k]->n1,om);
						fflush(fout);
						totaloutput += a[k]->n6;
					}
				}
			}
		}
		printf("\n");
	
	}
	printf("Total output = %u, while total nodes = %u.\n",totaloutput,nodes);
	printf("   hom printed.\n");

	return;

}
/***************** end of PrintOutput ***************************/


/*    end of file main.c    */
/**************************************************************************************/ 
