/*********************************************************/
void MergeDistantRelatives(Node **a,unsigned nodes)
/* In the clique mode, two cliques are merged if the smaller
 * one is largely included in the larger one.
 * This is done at the last stage. Changes are made on
 * only idlist4 but not on sqlist3.
 * The following criteria are applied in this order, namely
 * descending order of priority.
 * 1. Multidomain or large protein is not processed.
 * 2. Final size of merged cluster is limeted to 2 times 
 *    number of genomes.
 * 3. If the increment in number of organisms is small or
 *    if the overlap in organisms in the two nodes is large,
 *    merging is not attempted.
 * 5. If all relatives belong to a single cluster, and the 
 *    relatives of that clusters belong to the original 
 *    cluster, the two clusters are merged.
 * 4. Relatives are incorporated if more than 0.75 x ns4 IDs
 *    are listed as relatives. */
/* Regroup level 1 (weak) - 10 (hard), 5 is standard.*/ 
/*********************************************************/
{
	unsigned i,j,j2,k,m,n,mx,nj;
	unsigned n4, nR1, nR2, ID, IDj, n4x;
	int count;
	double level1 = 0.75;
	unsigned max_cluster = 2 * number_of_genomes;
	Boolean to_merge = FALSE;


	/* 353f */
	if(_regroup_level > 0){
		level1 = 0.95 - 0.04 * (double)_regroup_level;
		max_cluster = (1.25 + 0.15 * (double)_regroup_level) * (double)number_of_genomes;
	}

	printf("Merging distant relatives.\n");
	for(i=1;i<=nodes;i++){
		if(a[i]->n3 == 0 || a[i]->n4 == 0) continue;
		if(a[i]->domain != 0) continue;		/* criterion 1 */
		nR1 = a[i]->nR;
		n4 = a[i]->n4;
		if(n4 > max_cluster) continue;		/* criterion 2 */
		for(j=0;j<nR1;j++){
			to_merge = FALSE;
			ID = a[i]->relatives[j].pID;
			n = FindNode(a,nodes,ID);
			if((n4x = a[n]->n4) == 0) continue;
			if(a[n]->domain != 0) continue;		/* criterion 1 */
			count = 0;
			for(k=0;k<nR1;k++) if(ID == a[i]->relatives[k].pID) count++;
			if(count + n4 > max_cluster) continue;		/* criterion 2 */

			if(count >= level1 * (double)n4x) to_merge = TRUE;	/* criterion 4 */

			/* criterion 5 */
			if(count+0 >= nR1){	/* 353g */
				nR2 = a[n]->nR;
				for(j2=0;j2<nR2;j2++){
					if(a[n]->relatives[j2].ID != a[i]->n0) break;
				}
				if(j2+0 >= nR2) to_merge = TRUE;	/* 353g */
			}

			/* criterion 3 */
			if(!to_merge){
				if(CountOrgInIDlist(a,nodes,i,n)==0) to_merge = FALSE;
				else if(CountOrgInIDlist(a,nodes,i,n) < 0) to_merge = FALSE;	/* sign of error */
				else to_merge = TRUE;
			}

			if(to_merge){

				/* integrating a[n] into a[i] */
				if((a[i]->idlist4=(unsigned*)realloc(a[i]->idlist4,(n4 + n4x + 1)*sizeof(unsigned)))==NULL){
					fprintf(stderr,"Memory allocation error in MergeDistantRelatives1.\n");
					exit(1);
				}
				printf("Merging group %u %s to group %u %s.\n",a[n]->n0,a[n]->name,a[i]->n0,a[i]->name);
				for(k=0;k<n4x;k++){
					IDj = a[n]->idlist4[k];
					nj = FindNode(a,nodes,IDj);
					if(a[nj]->domain != 0) continue;
					m = a[i]->idlist4[n4+k] = a[n]->idlist4[k];
					mx = FindNode(a,nodes,m);
					a[mx]->pID = a[i]->n0;
				}
				n4 += n4x;
				a[i]->n4 = n4;
				a[n]->n4b = a[n]->n4;
				a[n]->n4 = 0;
				a[n]->n3b = a[n]->n3;
				a[n]->n3 = 0;
				for(k=j+1;k<nR1;k++){
					IDj = a[i]->relatives[k].ID;
					nj = FindNode(a,nodes,IDj);
					if(a[nj]->domain != 0) continue;
					if(a[i]->relatives[k].pID == ID){
						a[i]->relatives[k].ID=a[i]->relatives[nR1].ID;
						a[i]->relatives[k].pID=a[i]->relatives[nR1].pID;
						a[i]->relatives[k].grpno=a[i]->relatives[nR1].grpno;
						nR1--;
						k--;
					}
				}
				a[i]->nR = nR1;
			}
		}
	}

}
/*********************************************************/


/**********************************************************************************/
int CountOrgInIDlist(Node **a,unsigned nodes,unsigned i,unsigned j)
/* This routine counts number of organisms that are included in the IDlist4 
 * of nodes i and j, and estimates increment in  number of organisms upon
 * fusion of nodes i and j. If j = 0, then number of organisms that are not included
 * in a[i]->idlist4 is counted.
 * Return value is 1, if two nodes should be merged, while it is 0 if two nodes
 * should not be merged. Negative return value indicates error.*/
/* Regroup level (version 3.5.3f) 1 (weak) - 10 (hard). 5 is standard.*/
/**********************************************************************************/
{
	int s,m;
	unsigned n,k,offset;
	unsigned n4i, n4j;
	int xysize = (num_thr + 1) * 11;
	int count0,count1,count2,count0and1,count0xor1;
	int org_measure = 3;
	double allowance_level = 0.8;

	/* 353f */	
	if(_regroup_level > 0 && _regroup_level <= 10){
		org_measure = 5 - 0.4 * (double)_regroup_level;
		allowance_level = 0.65 + 0.03 * (double)_regroup_level;
	}

	count0 = count1 = count2 = 0;

	if(!use_org) return -1;	
	if(i > nodes || j > nodes || i == j) return -1;
	if((n4i = a[i]->n4) == 0 || a[i]->n3 == 0) return -1;
	if(j > 0 && ((n4j = a[j]->n4) == 0 || a[j]->n3 == 0)) return -1;

	for(m=0;m<xysize * num_org;m++) sp[m] = 0;

	/* positions 0, 1 and 2 are used for node i, node j and combined node i and j. */

	offset = 0;
	for(k=0;k<n4i;k++){
		n = FindNode(a,nodes,a[i]->idlist4[k]);
		sp[(xysize * a[n]->species) + offset] += 1;
	}

	offset = 1;
	for(k=0;k<n4j;k++){
		n = FindNode(a,nodes,a[j]->idlist4[k]);
		sp[(xysize * a[n]->species) + offset] += 1;
	}

	for(s=0;s<num_org;s++){
		sp[(xysize * s) + 2] = sp[(xysize * s)] + sp[(xysize * s) + 1];
	}

	for(s=0;s<num_org;s++){
		if(sp[(xysize * s)] > 0) count0 += 1;
		if(sp[(xysize * s) + 1] > 0) count1 += 1;
		if(sp[(xysize * s) + 2] > 0) count2 += 1;
		if(sp[(xysize * s)] > 0 && sp[(xysize * s) + 1] > 0) count0and1 += 1;
		if((sp[(xysize * s)] > 0 && sp[(xysize * s) + 1] == 0) || (sp[(xysize * s)] == 0 && sp[(xysize * s) + 1] > 0)) count0xor1 += 1;
	}
	if(count0and1 <= 1 && count0 > 1 && count1 > 0) return TRUE;	/* no overlap in species */
	else if((count2 - count0) < (num_org - count0) * org_measure /num_org) return FALSE;	/* increase should be large enough */
	/* In the next two if-phrases, OR was changed to AND. 353g */
	else if(count0and1 > allowance_level * count0 || count0and1 > allowance_level * count1) return FALSE; /* too much overlap in species */
	else if(count0xor1 < (num_org - count0) * org_measure /num_org || count0xor1 < (num_org - count1) * org_measure /num_org) return FALSE;
	else return TRUE;

}
/**********************************************************************************/

