#!/usr/bin/perl

use Math::Complex;
use POSIX;

my (@dist);
my (@minlen);
my (@skeleton);
my ( %maxcon, %atmcon );
my (%paths);
my ($i);
my ( $limit, $maxval ) = ( 0, 0 );

my ($debug) = 0;
my ($a2t)   = 0;

#double of the smallest bond lenght
my ($blimit) = 2;

#10% of the average of same type of bonds.
my ($percentmed) = 0.1;

my ( @atom, @nuatom );

my (@name);
my ($natom);

@name = split( "/", $0 );

print " $name[$#name] v0.5 (c)GAFit toolkit -  2010-2013\n";
print "    collects sets of equivalent atoms\n";
print "    input: any geometries input file\n";

%maxcon = (
    'AR' => 0,
    'XE' => 0,
    'LI' => 1,
    'F'  => 1,
    'H'  => 1,
    'O'  => 2,
    'N'  => 3,
    'SI' => 4,
    'C'  => 4,
    'P'  => 5,
    'S'  => 6,
    'AU' => 6,
);

foreach $k ( keys %maxcon ) {
    $atmcon{$k} = 0;
}

( $inputfile, $nprox, $nproxset, $debug ) = Arguments();

$i = 0;

if ($inputfile) {
    unless ( -e $inputfile ) {
        print "\n\tFile $inputfile doesn't exist!\n";
        exit;
    }
    open( INPUT, "<", $inputfile ) or die $!;
}
else { exit; }

while (<INPUT>) {
    last if ( $_ + 0 == $fin );
    ( $nada, $atom[$i], $x[$i], $y[$i], $z[$i] ) = split( /\s+/, " " . $_ );
    $fin = $_ + 0 if ( $i == 0 );
    $nuatom[$i] = $atom[$i];
    $atom[$i]   = uc( $atom[$i] );
    $i++;
}

while ( !$z[0] ) {
    splice( @atom,   0, 1 );
    splice( @nuatom, 0, 1 );
    splice( @x,      0, 1 );
    splice( @y,      0, 1 );
    splice( @z,      0, 1 );
}

foreach $k (@atom) {
    $atmcon{$k}++;
}

if ($debug) {
    print "\n";
    print "input:\n";
    print "-" x 30, "\n";
    for ( $j = 0 ; $j < @atom ; $j++ ) {
        print "$atom[$j]_$j $x[$j] $y[$j] $z[$j]\n";
    }
    print "\natoms:\n";
    foreach $k ( keys %atmcon ) {
        print "$k => $atmcon{$k}\n" if ( $atmcon{$k} != 0 );
    }
}

$natom = @atom;

DistMatrix();

if ($debug) {
    print "\n";
    print "Distance Matrix\n";
    print "-" x 30, "\n";
    print "    ";
    for ( $i = 0 ; $i < @atom ; $i++ ) {
        print "\t$atom[$i]_$i";
    }
    print "\n";
    for ( $i = 0 ; $i < @atom ; $i++ ) {
        print "$atom[$i]_$i\t";
        for ( $j = 0 ; $j < @atom ; $j++ ) {
            printf( " %6.4f\t", $dist[$i][$j] );
        }
        print "\n";
    }
    print "PASS 0\n";
}

foreach $i ( values %maxcon ) {
    $maxval = $i if ( $i > $maxval );
}

$maxval = @atom * $maxval;
$limit  = ceil( @atom * ( @atom - 2 ) / 2 );
$limit  = $maxval < $limit ? $maxval : $limit;

seamin();
minsea();

if ($debug) {
    print "maxval:$limit\n";
    print "\n";
    print "PASS 1\n";
    for ( $i = 0 ; $i < @minlen ; $i++ ) {
        print "$i minlen: $minlen[$i]\n";
    }
    print "PASS 2\n";
}

#set blimit with the smallest bond
setBlimit( $minlen[0] );

#---------------------------------------------
foreach $k ( sort { $maxcon{$a} cmp $maxcon{$b} } keys %maxcon ) {

    if ( $atmcon{$k} > 0 ) {
        if ($debug) {
            print "---------------------\n";
            print "*" x 50, "\n";
            print "KEY:$k $atmcon{$k}\n";
        }
        RevertAtom($k);
        SelectBonds($k);
        RefineSkel();
        ordermin();
        pinta() if ($debug);
    }
}

if ($debug) {
    pinta();
    print "PASS 3\n";
}

DistMatrix();

#check if there are broken bonds beetween atoms---
BondRadar();
if ($debug) {
    pinta();
    print "PASS 4\n";
}

#---------------------------------------------------
# building skeleton end here.
# now start building strings of connected atoms
#---------------------------------------------------

pskel() if ($debug);

#---------------------------------------------------
# build strings of connected atoms and compare them
# to find types of atoms.
#---------------------------------------------------
my ( %links, @anypath, @anystring );
my ($m);
my ( $a1, $a2, $a3 );
my ($any) = 1;
my ( @branch, @chbran, $i,     $k );
my ( @slist1, @slist2, @elist, $seen );

for ( $m = 0 ; $m < @skeleton ; $m++ ) {
    ( $a1, $a2, $a3 ) = splitit( $skeleton[$m] );
    print "links: $a1 - $a2\n" if ($debug);
    addlinks( $a1, $a2 );
}
foreach $k ( keys %links ) {
    print " $k->$links{$k}\n" if ($debug);
    @branch = ungluep( $links{$k} );
    for ( $i = 0 ; $i < @branch ; $i++ ) {
        push( @anypath, gluep( $k, $branch[$i] ) );
    }
}

panyp();

$m = 0;
while ($any) {
    $watermark = $#anypath;
    $any       = 0;
    for ( $i = $m ; $i < $watermark ; $i++ ) {
        @branch = ungluep( $links{ lastp( $anypath[$i] ) } );
        foreach $k (@branch) {
            if ( !isawyou( $anypath[$i], $k ) ) {
                push( @anypath, gluep( $anypath[$i], $k ) );
                $any = 1;
            }
        }
    }
    $m = $watermark;
}

panyp();

$seen  = "";
@elist = ();

for ( $i = 0 ; $i < @atom ; $i++ ) {
    print "$i (seen:$seen)\n" if ($debug);
    $elist[$i] = "";
    @slist1 = ();
    if ( !isawyou( $seen, $i ) ) {
        $elist[$i] = "$i";
        for ( $m = 0 ; $m < @anypath ; $m++ ) {
            @branch = ungluep( $anypath[$m] );
            if ( $branch[0] == $i ) {
                print "\t $anypath[$m] ->$anystring[$m]\n" if ($debug);
                push( @slist1, $anystring[$m] );
            }
        }
        for ( $j = $i + 1 ; $j < @atom ; $j++ ) {

            if ( $atom[$i] eq $atom[$j] ) {
                @slist2 = ();

                for ( $m = 0 ; $m < @anypath ; $m++ ) {
                    @branch = ungluep( $anypath[$m] );
                    if ( $branch[0] == $j ) {
                        print "\t\t $anypath[$m] ->$anystring[$m]\n"
                          if ($debug);
                        push( @slist2, $anystring[$m] );
                    }
                }
                if ( areequal( \@slist1, \@slist2, $i, $j ) ) {
                    $seen = gluep( $seen, $j );
                    print "$atom[$j]_$j equal $atom[$i]_$i\n" if ($debug);
                    $elist[$i] = gluep( $elist[$i], $j );
                }
            }

        }
    }
}

upelist();
pequal() if ($debug);

#-----PROGRAM OUTPUT-----------------------
print "\n";
print "Number(Atom)\n";
for ( $i = 0 ; $i < @elist ; $i++ ) {
    if ( $elist[$i] ) {
        $j      = $i + 1;
        @branch = ungluep( $elist[$i] );
        print "$j. ";
        for ( $k = 0 ; $k < @branch ; $k++ ) {
            print "$branch[$k](" . $nuatom[ $branch[$k] - 1 ] . ") ";
        }
        print "\n";
    }
}
print "\n";
print "Results:\n";
for ( $i = 0 ; $i < @elist ; $i++ ) {
    print "$elist[$i]\n" if ( $elist[$i] );
}

print "\n";
if ($nprox) {
    my ( $ptypes, $stypes, $ctypes, $din, @vector, @v, $i, $j );
    my ($datyp) = 0;
    my ( $tmpp, $tmps );
    $ptypes = 0;
    $stypes = 0;
    $ctypes = 0;
    for ( $i = 0 ; $i < @elist ; $i++ ) {
        if ( $elist[$i] ) {
            $tmpp = 0;
            $tmps = 0;
            $datyp++;
            $vector[0] = "nop";
            @branch = ungluep( $elist[$i] );
            for ( $j = 0 ; $j < @branch ; $j++ ) {
                if ( $branch[$j] <= $nprox ) {
                    $tmpp++;
                }
                else {
                    $tmps++;
                }
                $vector[ $branch[$j] ] = $datyp;
            }
            $ctypes++ if ( $tmpp > 0 && $tmps > 0 );
            $ptypes++ if ( $tmpp > 0 );
            $stypes++ if ( $tmps > 0 );
        }
    }
    print "Fragment A atoms:$nprox\n";
    print "There are $datyp different atom types.";
    print " Fragment A:$ptypes, Fragment B:$stypes, Common types:$ctypes\n";
    $din = $ptypes * $stypes - $ctypes * ( $ctypes - 1 ) / 2.0;
    print "Total diff interactions: a vector of $din coefs, X(k) \n";
    print "Vector Atom2Type:\n";

    print "Atom2Type(i)={";
    for ( $i = 1 ; $i < @vector ; $i++ ) {
        print "$vector[$i] ";
    }
    print "}\n";
    if ($a2t) {
        open( OUTPUT, ">atom2type.txt" ) or die $!;
        print OUTPUT " $nprox $natom ";
        print OUTPUT "\n";
        for ( $i = 1 ; $i < @vector ; $i++ ) {
            printf OUTPUT " %4d   %2s   %4d \n", $i, $nuatom[ $i - 1 ],
              $vector[$i];
        }
        print OUTPUT "\n";
        close(OUTPUT);

        open( OUTPUT, ">charges.txt" ) or die $!;
        for ( $i = 0 ; $i < $datyp ; $i++ ) {
            printf OUTPUT " %4d   %lf\n", $i + 1, 0;
        }
        close(OUTPUT);
        print "two files created: atom2type.txt and charges.txt\n";
    }
}

exit;
#############PROGRAM FINISH HERE####################

sub trim {
    my ($l) = @_;
    $l =~ s/^\s+|\s+$//g;
    return $l;
}

sub pequal {
    my ($i);
    for ( $i = 0 ; $i < @elist ; $i++ ) {
        print "$i->$elist[$i]\n";
    }
}

sub upelist {
    my ( $i, @branch, $j );
    for ( $i = 0 ; $i < @elist ; $i++ ) {
        @branch = ungluep( $elist[$i] );
        for ( $j = 0 ; $j < @branch ; $j++ ) {
            $branch[$j]++;
        }
        $elist[$i] = gluea(@branch);
    }
}

sub canystring {
    my ( $i, $j );
    @anystring = ();
    for ( $i = 0 ; $i < @anypath ; $i++ ) {
        @branch = ungluep( $anypath[$i] );
        $anystring[$i] = $atom[ $branch[0] ];
        for ( $j = 1 ; $j < @branch ; $j++ ) {
            $anystring[$i] = gluep( $anystring[$i], $atom[ $branch[$j] ] );
        }
    }
}

sub panyp {
    my ($i);
    canystring();
    for ( $i = 0 ; $i < @anypath ; $i++ ) {
        print "$anypath[$i] -> $anystring[$i]\n" if ($debug);
    }
    print "----------------------\n" if ($debug);
}

sub areequal {
    my ( $ra, $rb, $u, $v ) = @_;
    my ( @copy1, @copy2 );
    my ( $i,     $j );
    @copy1 = sort @$ra;
    @copy2 = sort @$rb;
    return 0 if ( $#copy1 != $#copy2 );
    return 0 if ( "@copy1" ne "@copy2" );
    return 1;
}

sub isawyou {
    my ( $a, $b ) = @_;
    my ($k);
    my (@temp) = ungluep($a);
    foreach $k (@temp) {
        return 1 if ( $k == $b );
    }
    return 0;
}

sub lastp {
    my ($a) = @_;
    my (@temp);
    @temp = ungluep($a);
    return $temp[$#temp];
}

sub addlinks {
    my ( $a, $b ) = @_;
    addonelink( $a, $b );
    addonelink( $b, $a );
}

sub addonelink {
    my ( $a, $b ) = @_;
    print "addone link: key:$a link:$links{$a}\n" if ($debug);
    if ( $links{$a} || $links{$a} eq '0' ) {
        $links{$a} = gluep( $links{$a}, $b );
    }
    else {
        $links{$a} = $b;
    }
}

#---------------------------------------------

sub gluep {
    my ( $a, $b ) = @_;
    return "$a $b";
}

sub ungluep {
    my ($a) = trim(@_);
    return split( / /, $a );
}

sub gluea {
    my (@ar) = @_;
    return join( ' ', @ar );
}

sub splitit {
    my ($i) = @_;
    my ( $n1, $n2, $n3 ) = split( /-/, $i );
    return ( $n1, $n2, $n3 );
}

sub glueit {
    my ( $a, $b, $c ) = @_;
    return ("$a-$b-$c");
}

#---------------------------------------------
# broken bonds (aka 'not catched bonds') generic routine.
# see below

sub Radar {
    my ( $a1, $a2 ) = @_;
    my ( $med, $ct, $i, $j, $k );
    my ( $a, $b, $c );
    my ( @first, @second );
    my ($delta);

    $med = 0;
    $ct  = 0;

    for ( $i = 0 ; $i < @skeleton ; $i++ ) {
        ( $a, $b, $c ) = skbond($i);
        if (   ( $atom[$a] eq $a1 && $atom[$b] eq $a2 )
            || ( $atom[$b] eq $a1 && $atom[$a] eq $a2 ) )
        {
            $ct++;
            $med += $c;
        }
    }
    if ($debug) {
        print "\n";
        print "Detecting $a1-$a2 bonds...\n";
    }
    return if ( $ct == 0 );
    $med /= $ct;
    if ($debug) {
        print "med=$med\n";
        print "-" x 30, "\n";
    }
    for ( $i = 0 ; $i < @atom ; $i++ ) {
        for ( $j = $i + 1 ; $j < @atom ; $j++ ) {
            $di    = $dist[$i][$j];
            $delta = abs( $di - $med );
            if ( $delta < $percentmed * $med ) {
                if (   ( $atom[$i] eq $a1 && $atom[$j] eq $a2 )
                    || ( $atom[$j] eq $a1 && $atom[$i] eq $a2 ) )
                {
                    print
"considering $atom[$i]_$i - $atom[$j]_$j with delta $delta\n"
                      if ($debug);

                    push( @first,  $i );
                    push( @second, $j );
                    push( @third,  $di );
                }
            }
        }
    }
    for ( $i = 0 ; $i < @skeleton ; $i++ ) {
        ( $a, $b, $c ) = skbond($i);
        for ( $j = 0 ; $j < @first ; $j++ ) {
            if (   ( $a == $first[$j] && $b == $second[$j] )
                || ( $b == $first[$j] && $a == $second[$j] ) )
            {
                splice( @first,  $j, 1 );
                splice( @second, $j, 1 );
                splice( @third,  $j, 1 );
                last;
            }
        }
    }
    for ( $i = 0 ; $i < @first ; $i++ ) {
        $toadd = glueit( $first[$i], $second[$i], $third[$i] );
        print
"adding $atom[$first[$i]]_$first[$i] - $atom[$second[$i]]_$second[$i]\n"
          if ($debug);
        push( @skeleton, $toadd );
    }
}

# these search for broken bonds.
#------------------------------

sub BondRadar {
    my ( @disatm, $k, $l );
    foreach $k ( keys %atmcon ) {
        if ( $atmcon{$k} > 0 ) {
            push( @disatm, $k );
        }
    }
    foreach ( $k = 0 ; $k < @disatm ; $k++ ) {
        foreach ( $l = $k ; $l < @disatm ; $l++ ) {
            Radar( $disatm[$k], $disatm[$l] );
        }
    }
}

#------------------------------

sub RefineSkel {
    my ( $i, $a, $b, $c, $u, $v, $w );
    my ($count);

    orderskel();

    for ( $i = 0 ; $i < @skeleton ; $i++ ) {
        ( $a, $b, $c ) = skbond($i);
        $count = 1;
        for ( $j = $i + 1 ; $j < @skeleton ; $j++ ) {
            ( $u, $v, $w ) = skbond($j);
            if ( $u ne '' ) {
                if ( $a == $u || $a == $v ) {
                    $count++;
                    if ( $maxcon{ $atom[$a] } < $count ) {
                        push( @minlen, glueit( $u, $v, $w ) );
                        deletesk($j);
                        redo;
                    }
                }
            }
        }
    }
}

sub SelectBonds() {
    my ($hmny)  = 0;
    my ($which) = @_;
    my ( $i, $a, $b, $c );
    for ( $i = 0 ; $i < @minlen ; $i++ ) {
        ( $a, $b, $c ) = bond($i);
        if ( $hmny < $atmcon{$which} ) {
            if ( $c < $blimit ) {
                if ( $atom[$a] eq $which ) {
                    if ( $minlen[$i] ) {
                        $hmny++;
                        push( @skeleton, $minlen[$i] );
                        deleteit($i);
                        redo;
                    }
                    else {
                        last;
                    }
                }
            }
        }
        else { last; }
    }
}

sub RevertAtom {
    my ($what) = @_;
    my ( $i, $a, $b, $c );
    for ( $i = 0 ; $i < @minlen ; $i++ ) {
        ( $a, $b, $c ) = bond($i);
        if ( $atom[$b] eq $what ) {
            swap($i);
        }
    }
}

#------------------------------------

sub deleteit {
    my ($what) = @_;
    my (@removed);
    @removed = splice( @minlen, $what, 1 );
}

sub deletesk {
    my ($what) = @_;
    my (@removed);
    @removed = splice( @skeleton, $what, 1 );
}

sub swap {
    my ($i) = @_;
    my ( $a, $b, $c );
    ( $a, $b, $c ) = bond($i);
    $minlen[$i] = glueit( $b, $a, $c );
    return ( $b, $a );
}

sub bond {
    my ($i) = @_;
    my ( $n1, $n2, $n3 );
    ( $n1, $n2, $n3 ) = splitit( $minlen[$i] );
    return ( $n1, $n2, $n3 );
}

sub skbond {
    my ($i) = @_;
    my ( $n1, $n2, $n3 );
    ( $n1, $n2, $n3 ) = splitit( $skeleton[$i] );
    return ( $n1, $n2, $n3 );
}

sub seamin {
    my ( $ii, $jj );
    my ( $min, $imin, $jmin );
    for ( $ii = 0 ; $ii < @atom ; $ii++ ) {
        print " $ii" if ($debug);
        for ( $jj = $ii + 1 ; $jj < @atom ; $jj++ ) {
            push( @minlen, glueit( $dist[$ii][$jj], $ii, $jj ) );
            @minlen = sort { $a <=> $b } @minlen;
            if ( @minlen > $limit ) {
                pop(@minlen);
            }
        }
    }
}

sub bylenght {
    my ( $n1, $n2, $n3 ) = splitit($a);
    my ( $n4, $n5, $n6 ) = splitit($b);
    return $n3 <=> $n6;
}

sub ordermin {
    @minlen = sort bylenght @minlen;
}

sub orderskel {
    @skeleton = sort bylenght @skeleton;
}

sub minsea {
    my ( $k, $a, $b, $c );
    for ( $k = 0 ; $k < @minlen ; $k++ ) {
        ( $a, $b, $c ) = splitit( $minlen[$k] );
        $minlen[$k] = glueit( $b, $c, $a );
    }
}

sub cdist {
    my ( $a, $b ) = @_;
    return
      sqrt( ( $x[$a] - $x[$b] )**2 +
          ( $y[$a] - $y[$b] )**2 +
          ( $z[$a] - $z[$b] )**2 );
}

sub DistMatrix {
    for ( $i = 0 ; $i < @atom ; $i++ ) {
        for ( $j = $i + 1 ; $j < @atom ; $j++ ) {
            $dist[$i][$j] = cdist( $i, $j );
        }
    }
}

sub setBlimit {
    my ( $a, $b, $c ) = splitit(@_);
    $blimit = $blimit * $c;
    print "bond limit: $blimit $c\n" if ($debug);
}

#---debug routines---
sub pinta {
    my ( $i, $a, $b, $c );
    print "****start print****\n";
    print "*" x 50, "\n";
    print "labels\n";
    print "-" x 30, "\n";
    patom();
    pminlen();
    print "***end pinta***\n";
}

sub pminlen {
    my ( $i, $a, $b, $c );
    print "\n";
    print "remained\n";
    print "-" x 30, "\n";

    for ( $i = 0 ; $i < @minlen ; $i++ ) {
        ( $a, $b, $c ) = bond($i);
        print "$i.\t$atom[$a]_$a - $atom[$b]_$b ($c)\n";
    }
    pskel();
}

sub patom {
    my ($i);
    for ( $i = 0 ; $i < @atom ; $i++ ) {
        print "$i=$atom[$i] ";
    }
    print "\n";
}

sub pskel {
    my ($i);
    my ( $a, $b, $c );
    my ( $d, $e );
    my ($f) = 0;
    print "\n";
    print "skeleton\n";
    print "-" x 30, "\n";
    for ( $i = @skeleton - 1 ; $i >= 0 ; $i-- ) {
        ( $a, $b, $c ) = skbond($i);
        $d = $a;    #+ 1;
        $e = $b;    #+ 1;
        print "$atom[$a]_$d - $atom[$b]_$e ($c)\n";
        $f++;
    }
    print "$f bonds\n";
}

#---ARGUMENTS AND HELP----
sub Arguments {
    my ( $nprox, $nproxset, $debug ) = ( 0, 0, 0 );
    my ($k);
    my (@temp) = @ARGV;
    while ( $k = shift(@temp) ) {
        if ( $k eq "-d" ) {
            $debug = 1;
            next;
        }
        if ( $k eq "-p" ) {
            $nprox   = shift(@temp);
            $proxset = 1;
            next;
        }
        if ( $k eq "-h" ) {
            Help();
            exit;
        }
        if ( $k eq "-o" ) {
            $a2t = 1;
            next;
        }
        $inputfile = $k;
    }
    return ( $inputfile, $nprox, $nproxset, $debug );
}

sub Help {
    print "\t-d  \t debug\n";
    print "\t-p N\tfragment A atoms\n";
    print "\t-o  \tcreates needed files\n";
}

