#! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh rtheory.h <<'END_OF_rtheory.h' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "rtheory.h" definitions for the 1-3-5-State machine X */ X#define fwd 0 X#define rev 1 X X#define adaptive 0 X#define fixed 1 X X#define BIG -1e7 X X/* X * definition for the 1State machine X */ Xtypedef struct { X float MesLen ; /* r-theory message length */ X float match, change, indel ; /* weighted frequency counters */ X } cell ; X Xtypedef struct { X float match, change, indel ; X } OneStatePBlock ; X Xfloat OS_messagelength() ; X X/*------------------------------------------------------------------------- X * definitions for 3 State machine X */ Xtypedef struct { X float D, H, V ; /* probs that the machine is in these states */ X float DM, DC, DH, DV ; /* probs of the arcs */ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X} ThreeStatePBlock ; X Xtypedef struct { X float D, H, V ; /* cost of the three states */ X float DM, DC, DH, DV ; /*weighted frequency counters*/ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X} TScell ; X Xtypedef struct { X float D, H, V ; X} TSpcell ; X X#define FREE 0 X#define SYMM 1 X/* X * the free adapting version and the symmetric version (where H = V) X */ X#define ThreeStateML(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy) \ XThreeStateMesLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,FREE) X X#define ThreeStateMLS(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy)\ XThreeStateMesLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,SYMM) X X X/*---------------------------------------------------------------------------- X * declarations for 5-State machine with rabit ears long indel . "5State.c" X */ Xtypedef struct { X float D, H, V, A, B ; /* probs that the machine is in these states */ X float DM, DC, DH, DV, DA, DB ; /* probs of the arcs */ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X float AD, AA ; X float BD, BB ; X} FiveStatePBlock ; X Xtypedef struct { X float D, H, V, A, B ; /* cost of the three states */ X float DM, DC, DH, DV, DA, DB ; /*weighted frequency counters*/ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X float AD, AA ; X float BD, BB ; X} FScell ; X Xtypedef struct { X float D, H, V, A, B ; X} FSpcell ; X X#define FIXED 1 /* fix the probs of the long indels */ X#define FiveStateML(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy) \ X FiveStateMesgLength(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy,FREE) X#define FiveState2ML(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy) \ X FiveStateMesgLength(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy,FIXED) END_OF_rtheory.h if test 3630 -ne `wc -c examplestrings <<'END_OF_examplestrings' X>example 1 Xexample string 1 XCAACCAAC TGCA XTTAGCT X>example 2 Xexample string 2 XACCAACCA TGCA XGGGG XTTAGCT END_OF_examplestrings if test 103 -ne `wc -c scale.c <<'END_OF_scale.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "scale.c" X * X * This program convert the matrix generated by the rtheory machines X * (c.f. 1State.c 3State.c, 5State.c) into a matrix of grey sale X * values 0..255. This output from this program can be piped to X * pimg (c.f. printimage.c) which transform it into postscript X * that can be display on a lazer writer X * X * "usage: scale l(og) [contrast] | p | m [treshold] X * X * The 'l' option is to display the matrix in logaritmetic in ML. X * The optional 'constrast' factor controls the sharpness of the X * high probability region. The default value is 1. A high value X * (e.g. 4) gives a sharper picture while a low value (e.g. 0.2) X * includes more ML's in the high intensity region. X * X * The 'p' option displays the image linear in probability (exp2 of ML). X * X * The 'm' option display the image linear in ML. If the optional 'treshold' X * value is supplied then the ground (or 'white')level is set to X * min ML + treshold. All values higher than that will appear white X * on the plot. X */ X#include X#include X#include "macro.h" X#include "Strings.h" X X#define Nkeys 8 /* # of stops in key row */ X Xfloat M[MATRIXSIZE][MATRIXSIZE] ; Xint m, n ; Xfloat contrastfactor, treshold ; X XFILE *fopen() ; X Xstatic findminmax(minv,maxv) X float *minv, *maxv; X{ X int i, j ; X float maxval,minval ; X X maxval = 0.0 ; X minval = MAXINT ; X for (i=0; i <= m ; i++) X for (j=0; j <= n ; j++) { X if (M[i][j] > maxval) maxval = M[i][j] ; X if (M[i][j] < minval) minval = M[i][j] ; X } X *minv = minval ; X *maxv = maxval ; X} X X X/* X * linear in probabilities X */ Xlinearprobs() X{ X int i, j ; X float minval, maxval ; X int c ; X X findminmax(&minval,&maxval) ; X X for (i=0; i <= m ; i++) { X for (j=0; j <= n ; j++) { X if (M[i][j] - minval > 20) c = 255 ; X else c = (int) ((1-exp2((double)minval-M[i][j]))*255+0.5) ; X printf(" %3d",c) ; X } X putchar('\n') ; X } X printf("Probs\n") ; X printf("%d ",Nkeys) ; X for ( i = 0; i <= Nkeys; i++) X printf("%3.2f ", 1.0-(float)i/Nkeys) ; X putchar('\n') ; X} X X/* X * Change matrix into grey scale value ... in linear scale X */ Xlinearscale() X{ X int i, j ; X float minval, maxval, scale ; X int c ; X X findminmax(&minval,&maxval) ; X maxval -= minval ; X treshold = fmin2(treshold,maxval) ; X X scale = 255.0/treshold ; X X for (i=0; i <= m ; i++) { X for (j=0; j <= n ; j++) { X if (M[i][j] - minval > treshold) c = 255 ; X else c = (int) ((M[i][j]-minval)*scale+0.5) ; X printf(" %3d",c) ; X } X putchar('\n') ; X } X printf("Mesg Length(bits)\n") ; X printf("%d ",Nkeys) ; X for ( i = 0; i <= Nkeys; i++) X printf("%d ",(int)(255*i/Nkeys/scale+0.5)) ; X putchar('\n') ; X} X X/* X * Change matrix into grey scale value ... in log scale X */ Xlogscale() X{ X int i, j ; X float minval, maxval, scale ; X int c ; X X findminmax(&minval,&maxval) ; X maxval -= minval ; X X scale = (float)(255.0/log((double)maxval*contrastfactor+1)) ; X for (i=0; i <= m ; i++) { X for (j=0; j <= n ; j++) { X c = (int) (log((double)(M[i][j]-minval)*contrastfactor+1)*scale+0.5) ; X printf(" %3d",c) ; X } X putchar('\n') ; X } X printf("Mesg Length(bits)\n") ; X printf("%d ",Nkeys) ; X for ( i = 0; i <= Nkeys; i++) X printf("%d ",(int)((exp((double)255*i/Nkeys/scale)-1)/contrastfactor+0.5)) ; X putchar('\n') ; X} X Xreadln() { while (getchar() != '\n') ; } X X/* X * read in the sequences and the density matrix X */ XReadInput() X{ X int i, j ; X char c ; X X scanf("%d %d",&m, &n) ; /* length of strings */ X readln() ; X if (m > MATRIXSIZE || n > MATRIXSIZE) { X printf("ERROR: array too big\n") ; X exit() ; X } X X /* size of matrix, one larger than length of strings */ X printf("%d %d\n", m+1,n+1) ; X X for (i=1; i<=m ; ) /* the first sequence */ X if ( (c = getchar()) != '\n') { putchar(c) ; i++ ;} X putchar('\n') ; X readln() ; X X for (i=1; i<=n ; ) /* the second sequence */ X if ( (c = getchar()) != '\n') {putchar(c) ; i++ ;} X putchar('\n') ; X readln() ; X X for ( i = 0; i <= m; i++) { /* the density matrix */ X for ( j = 0; j <= n ; j++) X scanf("%f",&M[i][j]) ; X readln() ; X } X} X Xmain(argc, argv) X int argc ; X char *argv[] ; X{ X char c ; X X if (argc < 2 || argc > 3) { X printf("usage: scale l(og) [contrast] | p | m [treshold] \n") ; X exit(0) ; X } X switch (*argv[1]) { X case 'l' : X if (argc == 3) sscanf(argv[2],"%f",&contrastfactor) ; X else contrastfactor = 1.0 ; X ReadInput() ; X logscale() ; X break ; X case 'p' : X ReadInput() ; X linearprobs() ; X break ; X case 'm' : X if (argc == 3) sscanf(argv[2],"%f",&treshold) ; X else treshold = 1000000.0 ; /* should be large enough */ X ReadInput() ; X linearscale() ; X break ; X default : X printf("usage: scale l(og) [contrast] | p | m [treshold] \n") ; X exit(0) ; X break ; X } X X /* the (optional) lines of caption */ X while ((c = getchar()) != '\n' && c != EOF) putchar(c) ; X putchar('\n') ; X while ((c = getchar()) != '\n' && c != EOF) putchar(c) ; X putchar('\n') ; X} END_OF_scale.c if test 6039 -ne `wc -c sumlg.c <<'END_OF_sumlg.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "sumlg.c" translated from Chris Wallace's fortran program X * X * entry : float sumlg(a, b) X * float a,b ; X * X * Returns log (2 ** a + 2 ** b) (log is to base 2) X * X * Overall error is less than 10 ** -6 X * Table of polynomial coefficients for the piecewize approximation X * of the function X * X * log (1 + 2 ** -x) (log is to base 2) (x .ge. 0) X * X * To use the table, form y = 8 * x. If y .ge. 239.5, fun = 0 X * Otherwise, form k = nint(y), z = y-k. Then k is in 0 ... 239, X * z is in -0.5 ... 0.5. X * Fun is approx given by the poly in z whose coeffs are in d() X * d(4k+3) is the constant term, d(4k) is the coeff of z ** 3 X * X * sumlg(a,b,c) float a,b,c ; X * same as above, but summing for 3 numbers X * X */ X#include X Xfloat d[] = {0.0, .13536972E-02, -.62500000E-01, .10000000E+01, -.16893029E-05, X.13511604E-02, -.59794086E-01, .93885338E+00, -.33533859E-05, .13435879E-02, X-.57098298E-01, .88040845E+00, -.49678251E-05, .13310923E-02, -.54422609E-01, X.82465008E+00, -.65097463E-05, .13138572E-02, -.51776695E-01, .77155331E+00, X-.79584962E-05, .12921317E-02, -.49169801E-01, .72108368E+00, -.92961979E-05, X.12662229E-02, -.46610610E-01, .67319779E+00, -.10508169E-04, .12364866E-02, X-.44107143E-01, .62784388E+00, -.11583186E-04, .12033178E-02, -.41666667E-01, X.58496250E+00, -.12513594E-04, .11671395E-02, -.39295628E-01, .54448739E+00, X-.13295268E-04, .11283925E-02, -.36999607E-01, .50634623E+00, -.13927446E-04, X.10875249E-02, -.34783295E-01, .47046159E+00, -.14412443E-04, .10449824E-02, X-.32650484E-01, .43675180E+00, -.14755287E-04, .10011996E-02, -.30604088E-01, X.40513181E+00, -.14963300E-04, .95659234E-03, -.28646166E-01, .37551412E+00, X-.15045644E-04, .91155179E-03, -.26777971E-01, .34780956E+00, -.15012861E-04, X.86643939E-03, -.25000000E-01, .32192809E+00, -.14876432E-04, .82158347E-03, X-.23312062E-01, .29777954E+00, -.14648369E-04, .77727702E-03, -.21713345E-01, X.27527422E+00, -.14340846E-04, .73377669E-03, -.20202483E-01, .25432356E+00, X-.13965887E-04, .69130274E-03, -.18777638E-01, .23484058E+00, -.13535104E-04, X.65003996E-03, -.17436565E-01, .21674036E+00, -.13059496E-04, .61013916E-03, X-.16176683E-01, .19994038E+00, -.12549294E-04, .57171928E-03, -.14995143E-01, X.18436087E+00, -.12013863E-04, .53486986E-03, -.13888889E-01, .16992500E+00, X-.11461638E-04, .49965371E-03, -.12854710E-01, .15655907E+00, -.10900103E-04, X.46610978E-03, -.11889298E-01, .14419266E+00, -.10335795E-04, .43425598E-03, X-.10989285E-01, .13275867E+00, -.97743347E-05, .40409200E-03, -.10151288E-01, X.12219341E+00, -.92204730E-05, .37560197E-03, -.93719401E-02, .11243655E+00, X-.86781515E-05, .34875702E-03, -.86479201E-02, .10343109E+00, -.81505692E-05, X.32351757E-03, -.79759752E-02, .95123352E-01, -.76402552E-05, .29983546E-03, X-.73529412E-02, .87462840E-01, -.71491413E-05, .27765586E-03, -.67757568E-02, X.80402187E-01, -.66786341E-05, .25691896E-03, -.62414760E-02, .73897027E-01, X-.62296846E-05, .23756141E-03, -.57472763E-02, .67905876E-01, -.58028526E-05, X.21951762E-03, -.52904640E-02, .62390013E-01, -.53983672E-05, .20272082E-03, X-.48684784E-02, .57313341E-01, -.50161820E-05, .18710399E-03, -.44788924E-02, X.52642258E-01, -.46560237E-05, .17260060E-03, -.41194129E-02, .48345522E-01, X-.43174364E-05, .15914520E-03, -.37878788E-02, .44394119E-01, -.39998197E-05, X.14667395E-03, -.34822581E-02, .40761128E-01, -.37024627E-05, .13512500E-03, X-.32006450E-02, .37421601E-01, -.34245719E-05, .12443874E-03, -.29412550E-02, X.34352432E-01, -.31652967E-05, .11455802E-03, -.27024202E-02, .31532241E-01, X-.29237492E-05, .10542834E-03, -.24825848E-02, .28941260E-01, -.26990216E-05, X.96997868E-04, -.22802991E-02, .26561222E-01, -.24902006E-05, .89217511E-04, X-.20942142E-02, .24375262E-01, -.22963787E-05, .82040916E-04, -.19230769E-02, X.22367813E-01, -.21166633E-05, .75424428E-04, -.17657239E-02, .20524515E-01, X-.19501841E-05, .69327039E-04, -.16210765E-02, .18832131E-01, -.17960983E-05, X.63710311E-04, -.14881354E-02, .17278461E-01, -.16535950E-05, .58538288E-04, X-.13659759E-02, .15852267E-01, -.15218980E-05, .53777395E-04, -.12537425E-02, X.14543201E-01, -.14002674E-05, .49396332E-04, -.11506448E-02, .13341737E-01, X-.12880012E-05, .45365959E-04, -.10559527E-02, .12239110E-01, -.11844351E-05, X.41659190E-04, -.96899224E-03, .11227255E-01, -.10889428E-05, .38250872E-04, X-.88914186E-03, .10298756E-01, -.10009353E-05, .35117676E-04, -.81582831E-03, X.94467930E-02, -.91986005E-06, .32237983E-04, -.74852332E-03, .86650971E-02, X-.84519970E-06, .29591781E-04, -.68674022E-03, .79479062E-02, -.77647093E-06, X.27160557E-04, -.63003084E-03, .72899258E-02, -.71322290E-06, .24927201E-04, X-.57798261E-03, .66862912E-02, -.65503574E-06, .22875907E-04, -.53021586E-03, X.61325338E-02, -.60151899E-06, .20992084E-04, -.48638132E-03, .56245491E-02, X-.55231000E-06, .19262271E-04, -.44615772E-03, .51585678E-02, -.50707228E-06, X.17674055E-04, -.40924967E-03, .47311287E-02, -.46549402E-06, .16215995E-04, X-.37538560E-03, .43390540E-02, -.42728646E-06, .14877552E-04, -.34431593E-03, X.39794263E-02, -.39218250E-06, .13649018E-04, -.31581130E-03, .36495674E-02, X-.35993519E-06, .12521458E-04, -.28966098E-03, .33470191E-02, -.33031642E-06, X.11486648E-04, -.26567138E-03, .30695254E-02, -.30311559E-06, .10537021E-04, X-.24366472E-03, .28150155E-02, -.27813839E-06, .96656197E-05, -.22347768E-03, X.25815895E-02, -.25520564E-06, .88660445E-05, -.20496035E-03, .23675038E-02, X-.23415218E-06, .81324129E-05, -.18797505E-03, .21711583E-02, -.21482583E-06, X.74593183E-05, -.17239540E-03, .19910852E-02, -.19708646E-06, .68417919E-05, X-.15810538E-03, .18259377E-02, -.18080506E-06, .62752689E-05, -.14499849E-03, X.16744802E-02, -.16586289E-06, .57555557E-05, -.13297700E-03, .15355790E-02, X-.15215071E-06, .52788004E-05, -.12195122E-03, .14081944E-02, -.13956805E-06, X.48414657E-05, -.11183882E-03, .12913722E-02, -.12802251E-06, .44403033E-05, X-.10256426E-03, .11842375E-02, -.11742917E-06, .40723310E-05, -.94058249E-04, X.10859876E-02, -.10770994E-06, .37348107E-05, -.86257182E-04, .99588610E-03, X-.98793095E-07, .34252290E-05, -.79102715E-04, .91325773E-03, -.90612716E-07, X.31412790E-05, -.72541319E-04, .83748303E-03, -.83108248E-07, .28808431E-05, X-.66523887E-04, .76799382E-03, -.76224072E-07, .26419783E-05, -.61005368E-04, X.70426899E-03, -.69909115E-07, .24229012E-05, -.55944435E-04, .64583060E-03, X-.64116472E-07, .22219753E-05, -.51303179E-04, .59224027E-03, -.58803078E-07, X.20376992E-05, -.47046825E-04, .54309597E-03, -.53929402E-07, .18686952E-05, X-.43143476E-04, .49802898E-03, -.49459147E-07, .17136993E-05, -.39563876E-04, X.45670113E-03, -.45359005E-07, .15715518E-05, -.36281187E-04, .41880228E-03, X-.41598404E-07, .14411888E-05, -.33270797E-04, .38404801E-03, -.38149277E-07, X.13216344E-05, -.30510129E-04, .35217747E-03, -.34985876E-07, .12119932E-05, X-.27978479E-04, .32295144E-03, -.32084575E-07, .11114440E-05, -.25656855E-04, X.29615052E-03, -.29423691E-07, .10192334E-05, -.23527840E-04, .27157354E-03, X-.26983326E-07, .93467034E-06, -.21575462E-04, .24903598E-03, -.24745241E-07, X.85712105E-06, -.19785069E-04, .22836864E-03, -.22692675E-07, .78600413E-06, X-.18143227E-04, .20941634E-03, -.20810286E-07, .72078633E-06, -.16637613E-04, X.19203679E-03, -.19083958E-07, .66097859E-06, -.15256927E-04, .17609948E-03, X-.17500773E-07, .60613233E-06, -.13990805E-04, .16148476E-03, -.16048879E-07, X.55583613E-06, -.12829744E-04, .14808286E-03, -.14717388E-07, .50971266E-06, X-.11765027E-04, .13579316E-03, -.13496324E-07, .46741588E-06, -.10788662E-04, X.12452336E-03, -.12376538E-07, .42862839E-06, -.98933174E-05, .11418884E-03, X-.11349635E-07, .39305914E-06, -.90722717E-05, .10471197E-03, -.10407912E-07, X.36044116E-06, -.83193599E-05, .96021588E-04, -.95443109E-08, .33052966E-06, X-.76289288E-05, .88052428E-04, -.87523500E-08, .30310010E-06, -.69957940E-05, X.80744637E-04, -.80260849E-08, .27794660E-06, -.64152012E-05, .74043330E-04, X-.73600818E-08, .25488033E-06, -.58827905E-05, .67898178E-04, -.67493276E-08, X.23372812E-06, -.53945637E-05, .62263025E-04, -.61892483E-08, .21433117E-06, X-.49468545E-05, .57095548E-04, -.56756451E-08, .19654384E-06, -.45363005E-05, X.52356935E-04, -.52046455E-08, .18023258E-06, -.41598184E-05, .48011593E-04, X-.47727402E-08, .16527492E-06, -.38145808E-05, .44026886E-04, -.43766643E-08, X.15155854E-06, -.34979949E-05, .40372883E-04, -.40134538E-08, .13898044E-06, X-.32076829E-05, .37022140E-04, -.36803900E-08, .12744617E-06, -.29414645E-05, X.33949488E-04, -.33749545E-08, .11686911E-06, -.26973401E-05, .31131849E-04, X-.30948728E-08, .10716983E-06, -.24734762E-05, .28548057E-04, -.28380288E-08, X.98275488E-07, -.22681914E-05, .26178705E-04, -.26024959E-08, .90119291E-07, X-.20799438E-05, .24005996E-04, -.23865142E-08, .82639982E-07, -.19073195E-05, X.22013611E-04, -.21884521E-08, .75781390E-07, -.17490219E-05, .20186583E-04, X-.20068331E-08, .69492003E-07, -.16038620E-05, .18511189E-04, -.18402825E-08, X.63724583E-07, -.14707495E-05, .16974844E-04, -.16875519E-08, .58435815E-07, X-.13486846E-05, .15566008E-04, -.15474937E-08, .53585975E-07, -.12367503E-05, X.14274099E-04, -.14190630E-08, .49138637E-07, -.11341060E-05, .13089412E-04, X-.13012954E-08, .45060396E-07, -.10399806E-05, .12003048E-04, -.11932939E-08, X.41320621E-07, -.95366703E-06, .11006847E-04, -.10942581E-08, .37891226E-07, X-.87451708E-06, .10093327E-04, -.10034417E-08, .34746447E-07, -.80193616E-06, X.92556241E-05, -.92016329E-09, .31862667E-07, -.73537909E-06, .84874470E-05, X-.84379219E-09, .29218223E-07, -.67434593E-06, .77830251E-05, -.77376258E-09, X.26793253E-07, -.61837822E-06, .71370671E-05, -.70954478E-09, .24569541E-07, X-.56705556E-06, .65447207E-05, -.65065992E-09, .22530385E-07, -.51999244E-06, X.60015365E-05, -.59665551E-09, .20660469E-07, -.47683533E-06, .55034342E-05, X-.54713947E-09, .18945746E-07, -.43726007E-06, .50466722E-05, -.50172730E-09, X.17373335E-07, -.40096937E-06, .46278195E-05, -.46008804E-09, .15931427E-07, X-.36769063E-06, .42437297E-05, -.42190440E-09, .14609190E-07, -.33717387E-06, X.38915178E-05, -.38688749E-09, .13396692E-07, -.30918988E-06, .35685380E-05, X-.35477896E-09, .12284826E-07, -.28352842E-06, .32723641E-05, -.32533169E-09, X.11265239E-07, -.25999676E-06, .30007714E-05, -.29833101E-09, .10330273E-07, X-.23841812E-06, .27517197E-05, -.27357304E-09, .94729061E-08, -.21863042E-06, X.25233383E-05, -.25086665E-09, .86866944E-08, -.20048500E-06, .23139116E-05, X-.23004341E-09, .79657369E-08, -.18384558E-06, .21218664E-05, -.21095432E-09, X.73046151E-08, -.16858716E-06, .19457602E-05, -.19344609E-09, .66983633E-08, X-.15459513E-06, .17842701E-05, -.17739265E-09, .61424275E-08, -.14176437E-06, X.16361830E-05, -.16266415E-09, .56326317E-08, -.12999852E-06, .15003865E-05, X-.14916607E-09, .51651462E-08, -.11920917E-06, .13758605E-05, -.13678463E-09, X.47364600E-08, -.10931530E-06, .12616697E-05, -.12543471E-09, .43433546E-08, X-.10024258E-06, .11569563E-05, -.11502303E-09, .39828747E-08, -.91922860E-07, X.10609336E-05, -.10547508E-09, .36523130E-08, -.84293639E-07, .97288043E-06, X-.96718548E-10, .33491858E-08, -.77297613E-07, .89213532E-06, -.88696527E-10, X.30712163E-08, -.70882226E-07, .81809171E-06, -.81335019E-10, .28163188E-08, X-.64999291E-07, .75019343E-06, -.74584160E-10, .25825760E-08, -.59604616E-07, X.68793042E-06, -.68392434E-10, .23682339E-08, -.54657676E-07, .63083499E-06, X-.62717860E-10, .21716795E-08, -.50121311E-07, .57847825E-06, -.57508249E-10, X.19914388E-08, -.45961447E-07, .53046690E-06, -.52736629E-10, .18261582E-08, X-.42146834E-07, .48644030E-06, -.48361701E-10, .16745935E-08, -.38648818E-07, X.44606773E-06, -.44349294E-10, .15356095E-08, -.35441123E-07, .40904591E-06, X-.40669920E-10, .14081606E-08, -.32499653E-07, .37509676E-06, -.37287787E-10, X.12912891E-08, -.29802315E-07, .34396525E-06, -.34197228E-10, .11841170E-08, X-.27328844E-07, .31541753E-06, -.31359936E-10, .10858397E-08, -.25060660E-07, X.28923915E-06, -.28754146E-10, .99571978E-09, -.22980728E-07, .26523347E-06, X-.26368332E-10, .91307848E-09, -.21073421E-07, .24322017E-06, -.24182253E-10, X.83729790E-09, -.19324411E-07, .22303388E-06, -.22173679E-10, .76780541E-09, X-.17720564E-07, .20452297E-06, -.20329254E-10, .70408002E-09, -.16249830E-07, X.18754839E-06, -.18648231E-10, .64564515E-09, -.14901159E-07, .17198264E-06, X-.17100008E-10, .59205772E-09, -.13664423E-07, .15770877E-06, -.15676626E-10, X.54291998E-09, -.12530332E-07, .14461958E-06, -.14379446E-10, .49786044E-09, X-.11490364E-07, .13261674E-06, -.13184171E-10, .45653932E-09, -.10536711E-07, X.12161009E-06, -.12092111E-10, .41864947E-09, -.96622065E-08, .11151694E-06, X-.11088230E-10, .38390185E-09, -.88602822E-08, .10226149E-06, -.10167978E-10, X.35204006E-09, -.81249149E-08, .93774199E-07, -.93231372E-11, .32282125E-09, X-.74505800E-08, .85991320E-07, -.85466576E-11, .29602889E-09, -.68322124E-08, X.78854389E-07, -.78430499E-11, .27146002E-09, -.62651653E-08, .72309793E-07, X-.71920919E-11, .24893069E-09, -.57451820E-08, .66308373E-07, -.65920864E-11, X.22827149E-09, -.52683558E-08, .60805046E-07, -.60427079E-11, .20932384E-09, X-.48311040E-08, .55758473E-07, -.55427286E-11, .19195185E-09, -.44301417E-08, X.51130745E-07, -.50830090E-11, .17602049E-09, -.40624576E-08, .46887100E-07, X-.46605884E-11, .16141109E-09, -.37252901E-08, .42995661E-07, -.42723486E-11, X.14801400E-09, -.34161063E-08, .39427195E-07, -.39191577E-11, .13572956E-09, X-.31325832E-08, .36154897E-07, -.35926979E-11, .12446535E-09, -.28725916E-08, X.33154187E-07, -.32936758E-11, .11413530E-09, -.26341785E-08, .30402523E-07, X-.30237219E-11, .10466238E-09, -.24155515E-08, .27879237E-07, -.27747129E-11, X.95975927E-10, -.22150704E-08, .25565373E-07, -.25438724E-11, .88009797E-10, X-.20312283E-08, .23443550E-07, -.23312751E-11, .80706000E-10, -.18626451E-08, X.21497830E-07, -.21371551E-11, .74007456E-10, -.17080531E-08, .19713597E-07, X-.19605597E-11, .67865236E-10, -.15662916E-08, .18077448E-07, -.17973297E-11, X.62233131E-10, -.14362958E-08, .16577093E-07, -.16454510E-11, .57067650E-10, X-.13170897E-08, .15201262E-07, -.15108803E-11, .52331644E-10, -.12077758E-08, X.13939618E-07, -.13859696E-11, .47987964E-10, -.11075357E-08, .12782686E-07, X-.12719362E-11, .44004899E-10, -.10156142E-08, .11721775E-07, -.11666183E-11, X.40352548E-10, -.93132250E-09, .10748915E-07, -.10671906E-11, .37003728E-10, X-.85402705E-09, .98567986E-08, -.97929913E-12, .33933071E-10, -.78314584E-09, X.90387241E-08, -.89629723E-12, .31117019E-10, -.71814840E-09, .82885466E-08, X-.82272553E-12, .28534731E-10, -.65854486E-09, .76006306E-08, -.75780781E-12, X.26165369E-10, -.60388740E-09, .69698091E-08, -.69396551E-12, .23994435E-10, X-.55376780E-09, .63913430E-08, -.63833578E-12, .22002903E-10, -.50780658E-09, X.58608875E-08, -.57996077E-12, .20176274E-10, -.46566179E-09, .53744575E-08, X-.53220838E-12, .18501864E-10, -.42701400E-09, .49283992E-08, -.48866885E-12, X.16966989E-10, -.39157296E-09, .45193619E-08, -.44716790E-12, .15558963E-10, X-.35907424E-09, .41442731E-08, -.41038205E-12, .14267819E-10, -.32927247E-09, X.38003151E-08, -.38029086E-12, .13082685E-10, -.30194323E-09, .34849045E-08, X-.34363437E-12, .11997218E-10, -.27688444E-09, .31956714E-08, -.31778095E-12, X.11001451E-10, -.25390376E-09, .29304437E-08, -.28998039E-12, .10089043E-10, X-.23283090E-09, .26872286E-08, -.26945258E-12, .92509322E-11, -.21350646E-09, X.24641995E-08, -.24572137E-12, .84834944E-11, -.19578601E-09, .22596809E-08, X-.22358395E-12, .77803874E-11, -.17953712E-09, .20721364E-08, -.20755869E-12, X.71334563E-11, -.16463573E-09, .19001575E-08, -.18916471E-12, .65417954E-11, X-.15097165E-09, .17424521E-08, -.16944952E-12, .59981559E-11, -.13844273E-09, X.15978357E-08, -.16027742E-12, .55007257E-11, -.12695141E-09, .14652218E-08}; X Xfloat sumlg (a,b) X float a, b ; X{ X float x, y, apr, abm ; X int i, k ; X X x = (a - b) ; X if (x > 0) X abm = a ; X else { X abm = b ; X x = - x ; X } X/* X * abm is max (a,b), x is abs (a-b) X */ X y = 8.0 * x ; X if (y+0.5 >= 240) return abm ; X X k = (int)(y + 0.5) ; /* k is integer rounding of y */ X X y -= k ; X k *= 4 ; X apr = d[k] ; X for (i = 1; i <= 3; i++) { X k++ ; X apr = apr * y + d[k] ; X } X return abm + apr ; X} X Xfloat sumlg3(a,b,c) X float a,b,c ; X{ return sumlg(a, sumlg(b,c)) ; } X Xfloat sumlg5(a,b,c,d,e) X float a,b,c,d,e ; X{ return sumlg(sumlg3(a,b,c), sumlg(d,e)) ; } END_OF_sumlg.c if test 17414 -ne `wc -c 1StateR0.c <<'END_OF_1StateR0.c' X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "1StateR0.c" -- X * computes a best R-model alignment usign Hirschberg's recursive algorithm X * X * Adapted from D. S. Hirschberg X * 'A linear space algorithm for computing maximal common subsequences.' X * CACM V18(6) p341-343 June 1975 (ie. LCS) X * X * Entries : X * X * RAlignString( AStr, BStr, Alignment, AlignLen, Matches, DiffFunct ) X * string *AStr, *BStr ; X * int *AlignLen, *Matches; X * align Alignment[] ; X * int (*DiffFunct)() ; X * X * RAlignSequence(ASeq, BSseq, Alen, Blen, Alignment, AlignLen, P, DiffFunct ) X * X * R0MesgLength(Alignment, Alignlength, A, B, Msglength, Table) X * align Alignment[] ; X * int Alignlength ; X * symbol A[], B[] ; X * AlphabetTable *Table ; X * float *Msglength ; X * X * computes the message length of an R-alignment using the X * (=, <>, indel) adaptive coding X * X * X * ROptAlignString(A,B,Alignment,AlignLen,msglength,Matches,Niteration,P,Table) X * string *A, *B ; X * int *AlignLen, *Matches, *Niteration ; X * align Alignment[] ; X * OneStatePBlock *P ; X * float *msglength ; X * AlphabetTable *Table ; X * X * iterates on the parameters until an fixed point is reached X * the resultant probabolities and number of iteration is returned X */ X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X Xstatic symbol *A, *B ; Xstatic align *AlignStr ; Xstatic int AlignLength, NMatches ; X Xstatic float l1[MAXSEQLENGTH+2], l2[MAXSEQLENGTH+2] ; Xstatic float (*Diff)() ; X X/***************************************************************************/ Xstatic copy(s1, s2, lo, hi) X float s1[], s2[] ; X int lo, hi ; X{ X int i ; X X for (i = lo; i <= hi ; i++) s1[i] = s2[i] ; X} X Xstatic printarray(s,lo,hi) X float s[] ; X int lo, hi ; X{ X int i ; X X for (i = lo; i <= hi ; i++) printf("%5.1f ",s[i]) ; X putchar('\n') ; X} X X Xstatic code(i, j ) X int i, j ; X{ X AlignStr[++AlignLength].a = i ; X AlignStr[AlignLength].b = j ; X if (i != 0 && j != 0 && A[i] == B[j]) NMatches++ ; X} X X X float c[2][MAXSEQLENGTH+2]; Xstatic XAlgB( loA, hiA, loB, hiB, D, l) X int loA, hiA, loB, hiB, D ; X float l[] ; X{ X int i, j ; X float *cost0, *cost1, *temp ; X X cost1 = c[1] ; X cost0 = c[0] ; X X switch (D) { X case fwd: X cost1[loB-1] = 0.0 ; /* (*Diff)(NULLSymbol, NULLSymbol);*/ X for (j = loB; j <= hiB ; j++) X cost1[j] = cost1[j-1] + (*Diff)(NULLSymbol, B[j]); X for (i = loA; i <= hiA; i++) { X swap(cost0, cost1) ; X cost1[loB-1] = cost0[loB-1] + (*Diff)(A[i], NULLSymbol); X for (j=loB ; j <= hiB; j++) X cost1[j]= fmin3(cost0[j-1]+(*Diff)(A[i],B[j]), X cost0[j] +(*Diff)(A[i], NULLSymbol), X cost1[j-1]+(*Diff)(NULLSymbol, B[j])) ; X } ; X break ; X case rev: X cost1[hiB+1] = 0.0; /* (*Diff)(NULLSymbol,NULLSymbol);*/ X for (j = hiB ; j >= loB ; j--) X cost1[j] = cost1[j+1] + (*Diff)(NULLSymbol, B[j]); X for (i = hiA ; i >= loA; i--) { X swap(cost0,cost1) ; X cost1[hiB+1] = cost0[hiB+1] + (*Diff)(A[i], NULLSymbol) ; X for (j = hiB; j >= loB; j --) X cost1[j]= fmin3(cost0[j+1]+(*Diff)(A[i], B[j]), X cost0[j] + (*Diff)(A[i], NULLSymbol), X cost1[j+1] + (*Diff)(NULLSymbol, B[j])) ; X } X break ; X } /*case*/; X X copy(l, cost1, loB-1, hiB+1) ; X} /*AlgB*/; X Xstatic XAlgC(mincost, loA, hiA, loB, hiB) X float *mincost ; X int loA, hiA, loB, hiB ; X{ X int midA, midB, i, j, k ; X float dontcare, skipB ; X X debug(printf("AlgC %d %d %d %d\n", loA, hiA, loB, hiB) ;) X if (hiB < loB) /*B is empty*/ X for (i=loA ; i <= hiA; i++) code( i, NULLSymbol ) ; X X else /*hiB >=loB*/ if (hiA == loA) /* |A| = 1 */ { X k=loB-1; X skipB = 0.0 ; X for (j=loB; j <= hiB; j++) skipB += (*Diff)(NULLSymbol, B[j]) ; X *mincost = (*Diff)(A[loA], NULLSymbol) + skipB; X for (j = loB; j <= hiB; j++) X if ((skipB - (*Diff)(NULLSymbol,B[j]) + (*Diff)(A[loA],B[j])) < *mincost ) { X k = j ; X *mincost = skipB - (*Diff)(NULLSymbol,B[j]) + (*Diff)(A[loA],B[j]) ; X } X for (j= loB; j <= k-1; j++) code(NULLSymbol, j); X if (k < loB) code( loA, NULLSymbol ) ; X else /*k>=loB*/ code( loA, k ); X for (j= k+1; j <= hiB; j++) code(NULLSymbol, j) ; X } /*hiA=loA*/ X X else { /* hiA > loA so divide and conquer*/ X midA = (int) ((loA + hiA) / 2) ; /* loA <= midA < hiA */ X AlgB( loA, midA, loB, hiB, fwd, l1); X AlgB(midA+1, hiA, loB, hiB, rev, l2); X debug( X if (loB == 1 ) { X printarray(l1,loB-1,hiB) ; printarray(l2,loB,hiB+1) ; } X ) X midB = loB-1; X *mincost = l1[loB-1]+l2[loB]; X for (j = loB ; j <= hiB ; j++) X if (l1[j]+l2[j+1] < *mincost) { X midB = j; X *mincost = l1[j]+l2[j+ 1] ; X } X AlgC(&dontcare, loA, midA, loB, midB) ; X AlgC(&dontcare, midA+1, hiA, midB+1, hiB) ; X } X debug(printf(" AlgC returns %d %d %d %d\n", loA, hiA, loB, hiB) ;) X} /*AlgC*/; X X XRAlignSequence(Aseq, Bseq, Alen, Blen, Alignment, AlignLen, Matches, DiffFunct) X symbol *Aseq, *Bseq ; X int *AlignLen, *Matches; X align Alignment[] ; X float (*DiffFunct)() ; X{ X float mincost ; X X A = Aseq ; B = Bseq ; X AlignStr = Alignment ; X Diff = DiffFunct ; X NMatches = AlignLength = 0 ; X X AlgC(&mincost, 1, Alen, 1, Blen) ; X X *AlignLen = AlignLength ; X *Matches = NMatches ; X} X XRAlignString(A, B, Alignment, AlignLen, Matches, DiffFunct ) X string *A, *B ; X int *AlignLen, *Matches; X align Alignment[] ; X float (*DiffFunct)() ; X{ X RAlignSequence(A->sequence, B->sequence, A->length, B->length, Alignment, AlignLen, Matches, DiffFunct) ; X} X X/*----------------------------------------------------------------------------*/ X X#define MAXITERATION 15 X Xtypedef struct { X float cost ; X float PMatches, PChanges; X } ihist ; X Xihist iterationhistory[MAXITERATION] ; Xstatic int iterations ; X X/* X * maintains iteration history and detect iteration cycle X */ Xstatic bool Xrepeat(cost,PMatches,PChanges) X float cost,PMatches,PChanges ; X{ X int i ; X X for ( i = iterations-1 ; i >= 0 ; i--) { X if ((PMatches == iterationhistory[i].PMatches) X && (PChanges == iterationhistory[i].PChanges)) { X break ; X } X } X if (i < 0) {/* not found, register this iteration */ X iterationhistory[iterations].cost = cost ; X iterationhistory[iterations].PMatches = PMatches ; X iterationhistory[iterations].PChanges = PChanges ; X return FALSE ; X } else X return TRUE ; X} X Xstatic Xlookformin(where,PMatches,PChanges) X int *where ; X float *PMatches, *PChanges ; X{ X int i ; X *where = iterations - 1 ; X for ( i = iterations-2 ; i >= 0; i-- ) X if (iterationhistory[i].cost < iterationhistory[*where].cost) X *where = i ; X *PMatches = iterationhistory[*where].PMatches ; X *PChanges = iterationhistory[*where].PChanges ; X} X Xstatic float CostMatch, CostChange, CostInsDel ; X Xstatic float XInitRCostFunction(P) X OneStatePBlock *P ; X{ X CostMatch = (float)- log2((double)P->match) + 2 ; X CostChange = (float)-log2((double)P->change) + log2((double)12) ; X CostInsDel = (float)- log2((double)(P->indel)) + 3 ; X} X Xstatic float XRCostFunct(a, b) X symbol a, b ; X{ X if (a == b) return CostMatch ; X if (a == NULLSymbol || b == NULLSymbol) return CostInsDel ; X else return CostChange ; X} X X/* X * iterates on the parameters until an fixed point is reached X */ XROptAlignString(A,B,Alignment,AlignLen,msglength,Niteration,P,Table) X string *A, *B ; X int *AlignLen, *Niteration ; X align Alignment[] ; X float *msglength ; X OneStatePBlock *P ; X AlphabetTable *Table ; X{ X int where, M ; X X iterations = 0 ; X while (TRUE) { X InitRCostFunction(P) ; X RAlignString(A, B, Alignment, AlignLen, &M, RCostFunct) ; X X R0MesgLength(Alignment,*AlignLen,A->sequence,B->sequence,msglength,Table); X OS_computeparameters(Alignment,*AlignLen,A->sequence,B->sequence,P) ; X printf(" r-0 :%1.3f %1.3f %1.3f ML=%5.2f AlignLen : %4d\n",P->match,P->change,P->indel,*msglength,*AlignLen); X fflush(stdout) ; X X debug( X PrintAlignment(Alignment,*AlignLen,A->sequence,B->sequence,Table) ; X ) X if (repeat(*msglength,P->match,P->change)) { X break ; X } X iterations++ ; X } X lookformin(&where,&P->match,&P->change) ; X if (where != iterations-1) { X P->indel = 1 - P->match - P->change ; X InitRCostFunction(P) ; X RAlignString(A, B, Alignment, AlignLen, &M, RCostFunct) ; X R0MesgLength(Alignment,*AlignLen,A->sequence,B->sequence,msglength,Table); X } X *Niteration = iterations ; X} X X/**********************************************************************/ X/* X * compute the message length of an alignment using the X * (=,<>,Indel) coding X */ XR0MesgLength(Alignment, Alignlength, A, B, Msglength, Table) X align Alignment[] ; X int Alignlength ; X symbol A[], B[] ; X AlphabetTable *Table ; X float *Msglength ; X{ X int i, NInsDel, NMatches, NChanges ; X float msglength, loga, logab ; X X NInsDel = NMatches = NChanges = 0 ; X msglength = 0.0 ; X X loga = (float) log2((double)Table->size) ; X logab = (float) log2((double)(Table->size * (Table->size-1))) ; X X for ( i = 1; i <= Alignlength; i++) { X if ((Alignment[i].a == 0) || (Alignment[i].b == 0 )) { X msglength += (float) -log2((double)++NInsDel/(i+2)) + 1 + loga ; X } else if (A[Alignment[i].a] == B[Alignment[i].b]) { X msglength += (float)-log2((double)++NMatches/(i+2)) + loga ; X } else { X msglength += (float)-log2((double)++NChanges/(i+2)) + logab ; X } X } X *Msglength = msglength ; X} X X/* X * this routine computes the relative frequencies of X * Matches, Chnages and InsDel of an One State R0-alignment X */ XOS_computeparameters(Alignment, Alignlength, A, B, P ) X align Alignment[] ; X int Alignlength ; X symbol A[], B[] ; X OneStatePBlock *P ; X{ X int i, NInsDel, NMatches, NChanges ; X X NInsDel = NMatches = NChanges = 0 ; X X for ( i = 1; i <= Alignlength; i++) { X if ((Alignment[i].a == 0) || (Alignment[i].b == 0 )) NInsDel++ ; X else if (A[Alignment[i].a] == B[Alignment[i].b]) NMatches++ ; X else NChanges ++ ; X } X changetoprobability3(NMatches,NInsDel,NChanges,&P->match,&P->indel,&P->change) ; X} X X/*----------------------------------------------------------------------------*/ X Xstruct element { X int M, C, ID ; X float ways ; X } ; X Xstatic float Xcost(c) X struct element *c ; X{ X return c->M * CostMatch + c->C * CostChange + c->ID * CostInsDel ; X} X Xstatic bool Xclose(a, b) X float a, b ; X{ X if (a != 0 || b !=0) return (flabs((a-b)/(a+b)) < 0.0001) ; X return TRUE ; X} X X/* X * returns log2 the number of ways of reaching X * a particular optimal alignment X */ Xfloat XRCount(AStr, BStr, P) X string *AStr, *BStr ; X OneStatePBlock *P ; X{ X symbol *A, *B ; X int Alength, Blength, i, j ; X struct element l1[MAXSEQLENGTH], l2[MAXSEQLENGTH], X left, top, diag, *c1, *c2, *temp ; X float leftcost,topcost,diagcost,mincost ; X float ways ; X X InitRCostFunction(P) ; X X A = AStr->sequence ; B = BStr->sequence ; X Alength = AStr->length ; Blength = BStr->length ; X c1 = l1 ; c2 = l2 ; X X /* boundary conditions */ X for ( j = 0 ; j <= Blength ; j++) { X c1[j].M = c1[j].C = 0 ; X c1[j].ID = j ; X c1[j].ways = 0 ; X } X X for ( i = 1 ; i <= Alength; i++) { X c2[0].M = c2[0].C = 0 ; X c2[0].ID = i ; X c2[0].ways = 0 ; X X for (j = 1 ; j <= Blength; j++) { X left.M = c2[j-1].M ; left.C = c2[j-1].C; left.ID = c2[j-1].ID+1; X top.M = c1[j].M ; top.C = c1[j].C; top.ID = c1[j].ID+1; X if (A[i] == B[j]) { X diag.M = c1[j-1].M+1 ; diag.C = c1[j-1].C; diag.ID = c1[j-1].ID; X } else { X diag.M = c1[j-1].M ; diag.C = c1[j-1].C+1; diag.ID = c1[j-1].ID; X } X X leftcost = cost(&left) ; X topcost = cost(&top) ; X diagcost = cost(&diag) ; X X mincost = fmin3(leftcost,topcost,diagcost) ; X ways = -1999 ; X if (close(mincost, leftcost)) { X c2[j] = left ; X ways = sumlg(ways, c2[j-1].ways) ; X } X if (close(mincost, diagcost)) { X c2[j] = diag ; X ways = sumlg(ways,c1[j-1].ways) ; X } X if (close(mincost, topcost)) { X c2[j] = top ; X ways = sumlg(ways,c1[j].ways) ; X } X c2[j].ways = ways ; X } X swap(c1,c2) ; X } X debug( X ) X printf(" Rcount - cost %1.2f ways %5.3f\n",mincost, c1[Blength].ways) ; X return c1[Blength].ways ; X} X END_OF_1StateR0.c if test 12916 -ne `wc -c <1StateR0.c`; then echo shar: \"1StateR0.c\" unpacked with wrong size! fi # end of overwriting check fi if test -f 3StateR0.c -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"3StateR0.c\" else echo shar: Extracting \"3StateR0.c\" \(7933 characters\) sed "s/^X//" >3StateR0.c <<'END_OF_3StateR0.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* "3StateR0.c" best alignment under the 3State model X * X * Compute the best alignment of 2 strings under 3 state R-theory X * Requires some routines in 3State.c X * X * Entries: X * X * ThreeStateR0ML(A,B,MesgLength,MatrixPart,P,Symmetric) X * string *A, *B ; X * int Symmetric ; - if true then force state H and V to have same parameters X * float *MesgLength,*MatrixPart ; - the message length, and the matrix part of the ML X * X */ X#include X#include X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X X#define MAXITERATION 40 X X/* X * some global variables X */ Xstatic symbol *a, *b ; Xstatic int m,n ; X Xstatic float costDM, costDC, costDH, costDV ; Xstatic float costHM, costHC, costHH, costHV ; Xstatic float costVM, costVC, costVH, costVV ; X X#define Horiz 0 X#define Vert 1 X#define Diag 2 X Xstatic TSR0_DPA(P,Alignment,Alignlength,MatrixPart) X ThreeStatePBlock *P ; X align Alignment[] ; X int *Alignlength ; X float *MatrixPart ; X{ X char wherefrom[MATRIXSIZE][MATRIXSIZE] ; X TSpcell c1[MAXSEQLENGTH], c2[MAXSEQLENGTH] ; X TSpcell *d1, *d2, *d, *temp ; X int i, j ; X int x, y, tempa, tempb, mid ; X X TS_ProbToCosts(P,&costDM,&costDC,&costDH,&costDV,&costHM,&costHC,&costHH,&costHV,&costVM,&costVC,&costVH,&costVV) ; X X d1 = c1 ; X d2 = c2 ; X /* Boundary Conditions */ X d1[0].D = d1[0].H = d1[0].V = 0 ; X X for (j = 1 ; j <= n; j++) { X d1[j].H = d1[j-1].H + costHH ; X d1[j].D = d1[j].V = -infinity ; X wherefrom[0][j] = Horiz ; X } X X for (i = 1 ; i <= m ; i++) { /* General Step */ X d2[0].V = d1[0].V + costVV ; X d2[0].D = d2[0].H = -infinity ; X wherefrom[i][0] = Vert ; X for (j = 1; j <= n; j++) { X X d = &d1[j-1] ; X if (a[i] == b[j]) X d2[j].D =fmax3(d->D+costDM,d->V+costVM,d->H+costHM); X else X d2[j].D = fmax3(d->D+costDC,d->V+costVC,d->H+costHC) ; X X d = &d2[j-1] ; X d2[j].H = fmax3(d->D+costDH,d->H+costHH,d->V+costVH) ; X X d = &d1[j] ; X d2[j].V = fmax3(d->D+costDV,d->H+costHV,d->V+costVV) ; X X if (d2[j].D >= d2[j].H && d2[j].D >= d2[j].V) X wherefrom[i][j] = Diag ; X else if (d2[j].H >= d2[j].D && d2[j].H >= d2[j].V) X wherefrom[i][j] = Horiz ; X else X wherefrom[i][j] = Vert ; X } /* for j */ X swap(d1, d2) ; X } /* for i */ X *MatrixPart = -fmax3(d1[n].D, d1[n].H, d1[n].V) ; X X /* X * trace alignment X */ X x = m; y = n ; X *Alignlength = 0 ; X while (x != 0 || y != 0) { X (*Alignlength) ++ ; X if (wherefrom[x][y] == Diag) { X Alignment[*Alignlength].a = x ; X Alignment[*Alignlength].b = y ; X x--; y-- ; X } else if (wherefrom[x][y] == Horiz) { X Alignment[*Alignlength].a = 0 ; X Alignment[*Alignlength].b = y ; X y-- ; X } else { X Alignment[*Alignlength].a = x ; X Alignment[*Alignlength].b = 0 ; X x-- ; X } X } X /* reverse direction */ X mid = *Alignlength / 2 ; X for (i = 1, j= *Alignlength ; i <= mid; i++, j--) { X tempa = Alignment[i].a ; X tempb = Alignment[i].b ; X Alignment[i].a = Alignment[j].a ; X Alignment[i].b = Alignment[j].b ; X Alignment[j].a = tempa ; X Alignment[j].b = tempb ; X } X X} /*TSR0_DPA*/ X XTSR0_ComputeParameters(Alignment,Alignlength,A,B,P,MesgLength) X align Alignment[] ; X int Alignlength ; X symbol A[], B[] ; X ThreeStatePBlock *P ; X float *MesgLength ; X{ X int i,laststate ; X float DM,DC,DH,DV,HM,HC,HH,HV,VM,VC,VH,VV ; X float ML ; X float log12 ; X X log12 = log2((double)12) ; X DM=DC=DH=DV=HM=HC=HH=HV=VM=VC=VH=VV=0 ; X ML = 0 ; X laststate = Diag ; X for (i=1; i <= Alignlength ; i++) { X if (Alignment[i].a != 0 && Alignment[i].b != 0) { /* diag move */ X switch (laststate) { X case Diag : X if (A[Alignment[i].a] == B[Alignment[i].b]) { X DM++ ; X ML += -log2((double) DM/(DM+DC+DH+DV+3)) +2 ; X } else { X DC ++ ; X ML += -log2((double) DC/(DM+DC+DH+DV+3)) + log12 ; X } X break ; X case Horiz : X if (A[Alignment[i].a] == B[Alignment[i].b]) { X HM ++ ; X ML += -log2((double) HM/(HM+HC+HH+HV+3)) +2 ; X } else { X HC ++ ; X ML += -log2((double) HC/(HM+HC+HH+HV+3)) + log12 ; X } X break ; X case Vert : X if (A[Alignment[i].a] == B[Alignment[i].b]) { X VM ++ ; X ML += -log2((double) VM/(VM+VC+VH+VV+3)) +2 ; X } else { X VC ++ ; X ML += -log2((double) VC/(VM+VC+VH+VV+3)) + log12 ; X } X break ; X } X laststate = Diag ; X } else if (Alignment[i].a == 0 ) { /* Horiz move */ X switch (laststate) { X case Diag : X DH++ ; X ML += -log2((double) DH/(DM+DC+DH+DV+3)) +2 ; X break ; X case Horiz : X HH++ ; X ML += -log2((double) HH/(HM+HC+HH+HV+3)) +2 ; X break ; X case Vert : X VH++ ; X ML += -log2((double) VH/(VM+VC+VH+VV+3)) +2 ; X break ; X } X laststate = Horiz ; X } else { X switch (laststate) { X case Diag : X DV++ ; X ML += -log2((double) DV/(DM+DC+DH+DV+3)) +2 ; X break ; X case Horiz : X HV++ ; X ML += -log2((double) HV/(HM+HC+HH+HV+3)) +2 ; X break ; X case Vert : X VV++ ; X ML += -log2((double) VV/(VM+VC+VH+VV+3)) +2 ; X break ; X } X laststate = Vert ; X } X } X DH=DV=(DH+DV)/2 ; X HM=VM=(HM+VM)/2 ; X HC=VC=(HC+VC)/2 ; X HH=VV=(HH+VV)/2 ; X HV=VH=(HV+VH)/2 ; X fchangetoprobability4(DM,DC,DH,DV,&P->DM,&P->DC,&P->DH,&P->DV) ; X fchangetoprobability4(HM,HC,HH,HV,&P->HM,&P->HC,&P->HH,&P->HV) ; X fchangetoprobability4(VM,VC,VH,VV,&P->VM,&P->VC,&P->VH,&P->VV) ; X *MesgLength = ML ; X} /* TSR0_ComputeParameters() */ X X X/* X * look through the stack to detect cycle X */ Xbool found(val, stack, maxstack) X float val, stack[] ; X int maxstack ; X{ X int i ; X X for (i=0; i< maxstack; i++) X if (stack[i] == val) return TRUE ; X return FALSE ; X} X/*-------------------------------------------------------------------------- X * Computes the 3 states R-theory message length, and return the optimal probs X */ XThreeStateR0ML(A,B,Alignment,Alignlength,MesgLength,P) X string *A, *B ; X float *MesgLength ; X align Alignment[] ; X int *Alignlength ; X ThreeStatePBlock *P ; X{ X float MatrixPart ; X int iterations ; X float stack[MAXITERATION] ; /* for cycle detection */ X X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X X iterations = 0 ; X X printf(" Iteration 0 :\n") ; X *MesgLength = infinity ; X do { X TS_printparameters(P) ; X TSR0_DPA(P,Alignment,Alignlength,&MatrixPart) ; X TSR0_ComputeParameters(Alignment,*Alignlength,a,b,P,MesgLength) ; X printf(" Matrix Part = %7.2f ML = %7.2f\n",MatrixPart,*MesgLength) ; X X fflush(stdout) ; X stack[iterations] = *MesgLength ; X iterations++ ; X if (found(*MesgLength,stack,iterations-1)) {/* cycle deteted */ X if (*MesgLength != stack[iterations-2]) X printf("....CYCLE DETECTED....\n") ; X break ; X } X if (iterations > MAXITERATION) { X printf("3State R-0 : max iteration exceeded\n") ; X break ; X } X printf(" Iteration %2d :\n",iterations) ; X } while (TRUE) ; X} END_OF_3StateR0.c if test 7933 -ne `wc -c <3StateR0.c`; then echo shar: \"3StateR0.c\" unpacked with wrong size! fi # end of overwriting check fi echo shar: End of archive 4 \(of 4\). cp /dev/null ark4isdone MISSING="" for I in 1 2 3 4 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 4 archives. rm -f ark[1-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0