#!/usr/bin/perl # The first line might be #!/usr/local/bin/perl etc # depending on the system. # # Copyright. Naoki Sato 2006 University of Tokyo # # Do not delete this comment.##################### if ( $#ARGV < 0 ) { print "usage: $0 input_file\n"; print "This program reads tab-delimited text file describing cell lineages\n"; print "of an Anabaena filament, and writes a tree file.\n"; exit 1; } $max = 10; # Size of initial cell name $| = 1; $infile = $ARGV[0]; # input file $tree = $infile . ".tre"; # output tree file $to_skip = 1; $first_line = 0; $count1 = -1; # counter for $num1 $Hflag = 0; open(INFILE, $infile) || die "Unable to open file: $infile.\n"; open(TREE, "> $tree") || die "Unable to open file: $tree.\n"; while(){ chomp($_); @line = split(/\t/,$_); if($to_skip == 1){ # print $line[0] . "\n"; if($line[0] eq "") { next; } else { if($line[0] == 0){ printf STDERR "Add total time in the first line.\n"; exit 1; } else { $total_time = $line[0]; print "Total time=", $total_time, "\n"; } $to_skip = 0; $first_line = 1; next; } } $max = $#line + 1; # Setting file format if($first_line == 1){ for($i = 0; $i < $max; $i++){ if($line[$i] eq ""){ $i--; last; } else { $A[$i] = $line[$i]; } } $len1 = $i + 1; for($i = $len1+1; $i < $max; $i++){ if($line[$i] eq ""){ $i--; last; } else { $B[$i] = $line[$i]; } } $len2 = $i - $len1 - 1; $num1 = $num2 = 1; for($i = 0; $i < $len1; $i++){ $num1 *= 2; } for($i = 0; $i < $len2; $i++){ $num2 *= 2; } print "num1=" . $num1 . ", num2=" . $num2 . "\n"; $first_line = 0; } # end of first_line if($line[0] ne ""){ $count1++; $Hflag = 0; if($count1 >= $num1){ printf STDERR "File format error. Count1=" . $count1 . ", Count2=" . $count2 . "\n"; exit 1; } $count2 = 0; if($line[$max-1] eq "H"){ $Hflag = 1; } next; } elsif($line[$len1+1] ne ""){ $count2++; if($count2 >= $num2){ printf STDERR "File format error 2. Count1=" . $count1 . ", Count2=" . $count2 . "\n"; exit 1; } if($line[$max-1] eq "H"){ $Hflag = 1; } next; } else{ if($count2 == $num2 - 1){ next; } $flag = 0; for($i = $len1+$len2+2; $i < $max; $i++){ if($line[$i] ne ""){ if($flag == 0){ $C1 = $line[$i]; $flag = 1; }else{ $C2 = $line[$i]; $flag = 0; last; } } } if($C2 eq "H" && $C1 == -1){ $C1 = -999; $C2 = ""; } elsif($Hflag == 1){ $C1 = $C1 + 900; $Hflag = 0; } $D[$count1*($num2-1)+$count2] = $C1; # print "Count1=", $count1, ", Count2=", $count2, ", C1=", $C1, "\n"; } } close INFILE; $num1 = $count1 + 1; #for($i = 0; $i < $num1; $i++){ # for($j = 0; $j < $num2-1; $j++){ # print $i, " ", $j, " ", $D[$i*($num2-1)+$j], "\n"; # } #} print TREE "(\n"; for($i = 0; $i < $num1; $i++){ $name1 = 0; for($j = 0; $j < $num2/2; $j++){ $num2_half = $num2/2; $name2 = $name1 + 1; $bin_name1 = ""; $bin_name2 = ""; $temp = $name1; for($k = 0; $k < $len2; $k++){ $bin_name1 = $temp % 2 . $bin_name1; $temp /= 2; } $temp = $name2; for($k = 0; $k < $len2; $k++){ $bin_name2 = $temp % 2 . $bin_name2; $temp /= 2; } $temp = $name2/2; $node_name = ""; for($k = 0; $k < $len2 - 1; $k++){ $node_name = $temp % 2 . $node_name; $temp /= 2; } $time1 = $total_time; $time2 = $total_time; $time3 = $D[$i*($num2-1)+$name1]; if($time3 > 900){ $time3 -= 900; $bin_name1 = "H"; } $time3b = $D[$i*($num2-1)+$name1+1]; if($time3b > 900){ $time3b -= 900; $bin_name2 = "H"; $D[$i*($num2-1)+$name1] = $time3b; } $time1 -= $time3; $time2 -= $time3; if($time3 > 0){ $node[$j] = "($bin_name1:$time1,$bin_name2:$time2):"; }elsif($time3 == -999){ $node[$j] = "H:"; }else{ $node[$j] = "$node_name:"; } $node_time[$j] = $time3; $name1 += 2; } # end of j for($m = 2; $m < 5; $m++){ #for test m=2 - 4 for($n = 0; $n < $num2_half; $n++){ $counter = (2**$m)*$n + 2**($m-1) - 1; $counter_half = ($counter - 1)/2; if($counter >= $num2 - 1){ last; } $delta = 2**($m-2); $counter_half1 = $counter_half + $delta; $new_node_time = $D[$i*($num2-1)+$counter]; if($new_node_time < 0){ $node_time[$counter_half] = $new_node_time; $node_time[$counter_half1] = $new_node_time; } else { if($node_time[$counter_half] < 0){ $node_time[$counter_half] = $total_time - $new_node_time; } else { $node_time[$counter_half] -= $new_node_time; } if($node_time[$counter_half1] < 0){ $node_time[$counter_half1] = $total_time - $new_node_time; } else { $node_time[$counter_half1] -= $new_node_time; } } if($new_node_time == -1){ # Feb. 26 # $node[$counter_half1] = "C:"; chop($node[$counter_half1]); chop($node[$counter_half1]); $node[$counter_half1] .= ":"; $node[$counter_half] = ""; }elsif($new_node_time == -999){ $node[$counter_half1] = "H:"; $node[$counter_half] = ""; }else{ $node[$counter_half] = "(" . $node[$counter_half] . $node_time[$counter_half]; $node[$counter_half1] = $node[$counter_half1] . $node_time[$counter_half1] . "):"; $node_time[$counter_half] = $new_node_time; $node_time[$counter_half1] = $new_node_time; $node[$counter_half1] = $node[$counter_half] . ", " . $node[$counter_half1]; $node[$counter_half] = ""; } } if($counter >= $num2 - 1 && $n == 0){ last; } } $m--; $counter = 2**($m-1) - 1; $counter_half = ($counter - 1)/2; $delta = 2**($m-2); $counter_half1 = $counter_half + $delta; if($node_time[$counter_half1] < 0){ $node[$counter_half1] = $node[$counter_half1] . $total_time; } else { $node[$counter_half1] = $node[$counter_half1] . $node_time[$counter_half1]; } if($i < $num1 - 1) { $node[$counter_half1] .= ","; } for($j = 0; $j < $num2/2; $j++){ print TREE $node[$j], "\n"; } } print TREE ");\n"; close TREE; exit 0;