#! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 1State.c <<'END_OF_1State.c' X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* "1State.c" X * X * Compute One-State r-theory: all weighted alignments X * after idea of CSW X * X */ X#include X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X X#define K 3 /* degree of freedom = no. arcs - 1 */ X X/* X * some global variables X */ Xstatic symbol *a, *b ; /* the two sequences */ Xstatic int m,n ; /* length of a and b respectively */ Xstatic float costMatch, costChange, costIns, costDel ; XAlphabetTable *Table = &DNASymbol ; X X/* X * some routines to compute the no. of bits to send the parametes, X * length etc. X */ X X/* X * Gives log2 of gamma(n) X */ Xstatic float LogGamma(n) X float n ; X{ X double sum ; X X sum = 0 ; X while (n > 2) X sum += log2((double) --n) ; X if (n <= 2) X if ((n>1.25) && (n<1.75)) sum += log2(sqrt((double)3.141593)/2.0) ; X return (float) sum ; X X} X Xdouble GammaApprox(n) /*very rough !*/ X float n ; X/* Gamma(N) = (N-1)! Gamma continuous fn */ X{ X if (n <=2) { X if ((n>1.25) && (n<1.75)) return sqrt((double)3.141593)/2 ; X else return (double) 1 ; X } else /*n>2*/ return (n-1)*GammaApprox(n-1) ; X} X XOS_printparameters(P) X OneStatePBlock *P ; X{ printf("PM=%3.3f PC=%3.3f PID=%3.3f",P->match,P->change,P->indel) ;} X X/* X * Calculate cost of sending one alignment by an adaptive code, X * estimate for fractional chars! using gamma instead of factorial X */ Xstatic float AdaptiveML(/*frequencies:*/ fm/*atch*/, fc/*hange*/, fid/*indels*/) X float fm, fc, fid ; X{ X float N ; X X N=fm+fc+fid; X X return ((float)( LogGamma(N+K)-LogGamma((float)K) X - LogGamma(fm+1) X - LogGamma(fc+1) X - LogGamma(fid+1) X + fm*2 + fc*log2((double)12) + fid*3 /*the chars*/ X + U(fm + fc + fid) X )) ; X} X X/*---------------------------------------------------------------------------*/ Xstatic float sum( C ) X cell *C ; X{ return C->match+C->change+C->indel ; } X Xfloat OS_messagelength(C) X cell *C ; X{ X return (float)(paramcost3(C->match, C->change, C->indel) /*to state params*/ X + C->MesLen /*the union of all alignments by fixed code*/ X + U(C->match+C->change+C->indel) ); X} X Xstatic plot(A,B,D, result, ofile) X string *A, *B ; X float D[][MATRIXSIZE] ; X cell *result ; X FILE *ofile ; X{ X int i, j ; X float ml, rth, nullth, oddsratio, pcost, lencost, charcost ; X char ch ; X X printf("Fwd[i,j]+Rev[i+1,j+1]-MML\n"); X rth = OS_messagelength(result); X nullth = (float)U((float)m+n)+DiffCode(m,n)+(m+n)*2; X lencost =(float)U( sum(result) ); X charcost=result->match*2+result->change*log2((double)12)+result->indel*3; X printf(" 1 avg algn=%8.3f ( - chars - len = %8.3f )\n", X AdaptiveML(result->match,result->change,result->indel), X AdaptiveML(result->match,result->change,result->indel) X -charcost-lencost ); X X printf(" null-theory= %6.2f = %6.2f(sumlen) + %6.2f(difflen) + %6.2f(chars) bits\n", nullth, U((float)m+n), DiffCode(m,n), (m+n)*2.0) ; X pcost = paramcost3(result->match, result->change, result->indel); X printf(" OneState= %6.2f = %6.2f(pars) + %6.2f(algn) + %6.2f(len)\n", X OS_messagelength(result), pcost, result->MesLen, lencost) ; X printf("algn - chars= %6.2f\n", result->MesLen-charcost); X X oddsratio = (float) exp2((double) nullth - rth ); X printf("probability related=%8.4f\n", oddsratio/(1+oddsratio)) ; X X if (msequence ; b = B->sequence ; X m = A->length ; n = B->length ; X X DPA(rev, fixed, P, &result, Drev); X DPA(fwd, fixed, P, &result, Dfwd); X SumMatrix(Dfwd,Drev,D,m,n) ; X plot(A,B,D, &result, outfile) ; X } X} X X XOS_ProbToCosts(P) X OneStatePBlock *P ; X{ X costMatch = -(float)log2((double)P->match) + 2 ; X costChange = -(float)log2((double)P->change) + (float) log2((double)12) ; X costDel = costIns = -(float)log2((double)P->indel) + 3 ; X} X X XOS_initcell(c) cell *c ; { c->MesLen=infinity; c->match=c->change=c->indel=0; } X X Xstatic OS_AdaptiveCost(left,diag,top,equalchar) X cell *left, *diag, *top ; X bool equalchar ; X{ X costIns = -log2((double)(left->indel+1)/(sum(left)+3)) + 3 ; X costDel = -log2((double)(top->indel+1)/(sum(top)+3)) + 3 ; X if (equalchar) X costMatch = -log2((double)(diag->match+1)/(sum(diag)+3)) + 2 ; X else X costChange = -log2((double)(diag->change+1)/(sum(diag)+3)) + log2((double)12) ; X} X Xstatic weightedfreq(current, left, diag, top, MLleft, MLdiag, MLtop,equalchar) X cell *current, *left, *diag, *top ; X float MLleft, MLdiag, MLtop ; X bool equalchar ; /* true if the current characters in A and B are equal */ X{ X float MLmin, Pleft, Pdiag, Ptop, Psum ; X X MLmin = fmin3(MLleft, MLdiag, MLtop) ; X X Pleft = exp2((double)MLmin-MLleft) ; X Pdiag = exp2((double)MLmin-MLdiag) ; X Ptop = exp2((double)MLmin-MLtop) ; X Psum = Pleft+Pdiag+Ptop ; X X current->match = ((equalchar+diag->match)*Pdiag + left->match*Pleft + top->match*Ptop)/Psum ; X current->change = (((!equalchar)+diag->change)*Pdiag + left->change*Pleft + top->change*Ptop)/Psum ; X current->indel = (diag->indel*Pdiag + (left->indel+1)*Pleft + (top->indel+1)*Ptop)/Psum ; X} X XOS_ComputeCurrentCell(current,left,diag,top,equalchar) X cell *current, *left, *diag, *top ; X bool equalchar ; X{ X float MLvert, MLhori, MLdiag ; X X MLvert = top->MesLen + costDel ; X MLhori = left->MesLen + costIns ; X if (equalchar) MLdiag = diag->MesLen + costMatch ; X else MLdiag = diag->MesLen + costChange ; X X current->MesLen = -sumlg3(-MLdiag, -MLvert, -MLhori) ; X X weightedfreq(current,left,diag,top,MLhori,MLdiag,MLvert,equalchar); X} X X/* X * M points to the space to store the matrix of message length X * If M is null then it is ignored X */ Xstatic DPA(dirn, mode, P, result, M) X int dirn, mode ; X OneStatePBlock *P ; X cell *result ; X float M[][MATRIXSIZE] ; X{ X cell c1[MAXSEQLENGTH], c2[MAXSEQLENGTH], boundary ; X cell *d1, *d2, *temp ; X int i, iloop, j, jloop, step, abdry, bbdry ; X X if (mode == fixed) OS_ProbToCosts(P) ; X X d1 = c1 ; d2 = c2 ; X if (dirn == fwd) { abdry = bbdry = 0; step = 1 ;} X else { abdry = m+1; bbdry = n+1 ; step = -1 ;} X X OS_initcell(&boundary) ; X OS_initcell(&d1[bbdry]) ; X d1[bbdry].MesLen = 0.0 ; X X i = abdry ; X j = bbdry ; X for (jloop = 1 ; jloop <= n; jloop++) { X j += step ; X if (mode == adaptive) X OS_AdaptiveCost(&d1[j-step],&boundary,&boundary,FALSE) ; X OS_ComputeCurrentCell(&d1[j],&d1[j-step],&boundary,&boundary,FALSE) ; X if (M) M[i][j] = d1[j].MesLen ; X } X X for (iloop = 1 ; iloop <= m ; iloop++) { /* General Step */ X i += step ; X j = bbdry ; X if (mode == adaptive) X OS_AdaptiveCost(&boundary,&boundary,&d1[j],FALSE) ; X OS_ComputeCurrentCell(&d2[j],&boundary,&boundary,&d1[j],FALSE) ; X if (M) M[i][j] = d2[j].MesLen ; X X for (jloop = 1 ; jloop <= n; jloop++) { X j += step ; X if (mode == adaptive) X OS_AdaptiveCost(&d2[j-step],&d1[j-step],&d1[j],a[i]==b[j]) ; X OS_ComputeCurrentCell(&d2[j],&d2[j-step],&d1[j-step],&d1[j],a[i]==b[j]) ; X if (M) M[i][j] = d2[j].MesLen ; X } /* for j */ X swap(d1, d2) ; X } /* for i */ X *result = d1[n] ; X} /*DPA*/ X X X/*-------------------------------------------------------------------------- X * Computes the OneState r-theory message length, and return the optimal probs X * If mode = adaptive then the first run is an adaptive run to estimate X * the starting iteration parameters. Otherwise the parameters in P X * is used for the 1st iterations. If Outfile is not null (and is a legal X * pointer to a file) the density plot will be generated and output to it X */ XOneStateML(A, B, MesgLength, MatrixPart, mode, outfile, P, Accurarcy) X string *A, *B ; X OneStatePBlock *P ; X int mode ; X FILE *outfile ; X float Accurarcy, *MesgLength, *MatrixPart ; X{ X float SumFreq, ML ; X int iterations ; X cell result ; X X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X X if (outfile && (m > MATRIXSIZE || n > MATRIXSIZE)) { X printf("OneState: string length too long, no density plot produced\n") ; X outfile = NULL ; X } X X *MesgLength = infinity ; /* any big number will do */ X iterations = 1 ; X X if (mode == fixed) { X printf("iteration 0: ",iterations) ; OS_printparameters(P) ; X putchar('\n') ; X } X X do { X ML = *MesgLength ; X DPA(fwd, mode, P, &result, (float **) NULL); X X mode = fixed; X SumFreq = sum(&result); X X *MesgLength = OS_messagelength(&result) ; X *MatrixPart = result.MesLen ; X fchangetoprobability3(result.match,result.change,result.indel,&P->match,&P->change,&P->indel) ; X printf("iteration %2d: ",iterations) ; OS_printparameters(P) ; X printf(" ML=%8.3f (%8.3f) SumFq:%7.1f\n", *MesgLength, result.MesLen,SumFreq) ; X fflush(stdout) ; X iterations++ ; X } while (flabs((ML-*MesgLength)/ *MesgLength) > Accurarcy) ; X X printf("iteration %2d:\n",iterations) ; X OS_densityplot( A, B, P, outfile) ; X} X X/*--------------------------------------------------------------------------*/ X X#define M 1 X#define C 2 X#define I 3 X#define D 4 X Xstatic int Xnextstate(P) X OneStatePBlock *P ; X{ X float t ; X int event ; X X t = (float)(rand() % 100000) / 100000 ; /* 0 <= t < 1 */ X if ((t -= P->match) < 0) event = M ; X else if ((t -= P->change) < 0) event = C ; X else if ((t -= (P->indel/2)) < 0) event = I ; X else event = D ; X X return event ; X} X X/* X * A One-State machine to generate a pair of strings according to X * the given set of parameters. X * Length refer to the average length of the two strings X * The actual generative alignment is also returned X */ XOneStateGen(A, B, length, P, Alignment, Alignlength) X string *A, *B ; X OneStatePBlock *P ; X align Alignment[] ; X int *Alignlength ; X{ X int event, i ; X int alen, blen ; X symbol *s ; X symbol a[MAXSEQLENGTH], b[MAXSEQLENGTH] ; X X *Alignlength = alen = blen = 0 ; X while ((alen + blen) < 2*length) { X event = nextstate(P) ; X ++(*Alignlength) ; X if (event == M) { X a[++alen] = b[++blen] = randomsymbol(&DNASymbol) ; X Alignment[*Alignlength].a = alen ; X Alignment[*Alignlength].b = blen ; X } else if (event == C) { X randomsymbolpair(&DNASymbol, &a[++alen], &b[++blen] ) ; X Alignment[*Alignlength].a = alen ; X Alignment[*Alignlength].b = blen ; X } else if (event == I) { X b[++blen] = randomsymbol(&DNASymbol) ; X Alignment[*Alignlength].a = 0 ; X Alignment[*Alignlength].b = blen ; X } else { X a[++alen] = randomsymbol(&DNASymbol) ; X Alignment[*Alignlength].a = alen ; X Alignment[*Alignlength].b = 0 ; X } X X if (alen > MAXSEQLENGTH || blen > MAXSEQLENGTH) { X printf("OneStateGen : string too long\n") ; X exit(99) ; X } X } X s = A->sequence = (symbol *) malloc((unsigned)(alen+1)*sizeof(symbol)) ; X A->length = alen ; X s[0] = NULLSymbol ; X for (i = 1; i <= alen; i++) s[i] = a[i] ; X X s = B->sequence = (symbol *) malloc((unsigned)(blen+1)*sizeof(symbol)) ; X B->length = blen ; X s[0] = NULLSymbol ; X for (i = 1; i <= blen; i++) s[i] = b[i] ; X} END_OF_1State.c if test 12434 -ne `wc -c <1State.c`; then echo shar: \"1State.c\" unpacked with wrong size! fi # end of overwriting check fi if test -f 1State.h -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"1State.h\" else echo shar: Extracting \"1State.h\" \(1532 characters\) sed "s/^X//" >1State.h <<'END_OF_1State.h' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X#define fwd 0 X#define rev 1 X X#define adaptive 0 X#define fixed 1 X Xtypedef struct { X float MesLen ; /* r-theory message length */ X float match, change, indel ; /* weighted frequency counters */ X } cell ; X Xtypedef struct { X float match, change, indel ; X } OneStatePBlock ; X Xfloat OS_messagelength() ; END_OF_1State.h if test 1532 -ne `wc -c <1State.h`; then echo shar: \"1State.h\" unpacked with wrong size! fi # end of overwriting check fi if test -f 3State.c -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"3State.c\" else echo shar: Extracting \"3State.c\" \(22797 characters\) sed "s/^X//" >3State.c <<'END_OF_3State.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* "3State.c" prefix TS X * X * Compute the prob of 2 strings being related under 3 state R-theory X * X */ X#include X#include X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X Xfloat sumD(), sumH(), sumV() ; /* forward declarations */ X X/* X * some global variables X */ Xstatic symbol *a, *b ; Xstatic int m,n ; X Xfloat costDM, costDC, costDH, costDV ; Xfloat costHM, costHC, costHH, costHV ; Xfloat costVM, costVC, costVH, costVV ; X X/*--------------------------------------------------------------------------- X * Computes the corresponding edit cost from a set of arcs probabilities X */ XTS_ProbToCosts(P,CostDM,CostDC,CostDH,CostDV,CostHM,CostHC,CostHH,CostHV,CostVM,CostVC,CostVH,CostVV) X ThreeStatePBlock *P ; X float *CostDM,*CostDC,*CostDH,*CostDV,*CostHM,*CostHC,*CostHH,*CostHV,*CostVM,*CostVC,*CostVH,*CostVV ; X{ X float log12 ; X X log12 = (float) log2((double)(12)) ; X X *CostDM = (float)log2((double)P->DM) - 2 ; X *CostDC = (float)log2((double)P->DC) - log12 ; X *CostDH = (float)log2((double)P->DH) - 2 ; X *CostDV = (float)log2((double)P->DV) - 2 ; X X *CostHM = (float)log2((double)P->HM) - 2 ; X *CostHC = (float)log2((double)P->HC) - log12 ; X *CostHH = (float)log2((double)P->HH) - 2 ; X *CostHV = (float)log2((double)P->HV) - 2 ; X X *CostVM = (float)log2((double)P->VM) - 2 ; X *CostVC = (float)log2((double)P->VC) - log12 ; X *CostVH = (float)log2((double)P->VH) - 2 ; X *CostVV = (float)log2((double)P->VV) - 2 ; X} X X/*---------------------------------------------------------------------------- X * The following two routines is a O(n^4) brute force algo to compute the X * density plot for 3 state model --- for debugging and comparison only. X * The fwd matrix is not neccessary, but will save half the time X */ Xstatic DensityDPA(start, result, m1, n1) X int n1, m1 ; /* the mid starting point */ X TSpcell *start ; /* the initial condition */ X float *result ; X{ X TSpcell e1[MAXSEQLENGTH], e2[MAXSEQLENGTH] ; X TSpcell *d1, *d2, *temp ; X int i, j ; X X d1 = e1 ; X d2 = e2 ; X /* Boundary Conditions */ X d1[n1] = *start ; X X for (j = n1+1 ; j <= n; j++) { X if (j == n1+1) X d1[j].H = sumlg3(d1[n1].D+costDH, d1[n1].H+costHH, d1[n1].V+costVH); X else X d1[j].H = d1[j-1].H + costHH ; X X d1[j].D = d1[j].V = BIG ; X } X X for (i= m1+1 ; i<= m ; i++) { /* General Step */ X if (i == m1+1) X d2[n1].V = sumlg3(d1[n1].D+costDV,d1[n1].H+costHV,d1[n1].V+costVV); X else X d2[n1].V = d1[n1].V + costVV ; X d2[n1].D = d2[n1].H = BIG ; X X for (j = n1+1; j <= n; j++) { X if (a[i] == b[j]) d2[j].D= sumlg3(d1[j-1].D+costDM,d1[j-1].H+costHM,d1[j-1].V+costVM); X else d2[j].D = sumlg3(d1[j-1].D+costDC,d1[j-1].H+costHC,d1[j-1].V+costVC); X d2[j].H =sumlg3(d2[j-1].D+costDH,d2[j-1].H+costHH,d2[j-1].V+costVH); X d2[j].V = sumlg3(d1[j].D+costDV,d1[j].H+costHV, d1[j].V+costVV) ; X } /* for j */ X swap(d1, d2) ; X } /* for i */ X *result = -sumlg3(d1[n].D,d1[n].H,d1[n].V) ; X} /*DensityDPA*/ X Xstatic XBruteForceDensity(Fwd,Rslt,P) X TSpcell Fwd[][MATRIXSIZE] ; X float Rslt[][MATRIXSIZE] ; X ThreeStatePBlock *P ; X{ X int i, j ; X X TS_ProbToCosts(P,&costDM,&costDC,&costDH,&costDV,&costHM,&costHC,&costHH,&costHV,&costVM,&costVC,&costVH,&costVV) ; X X for (i = 0; i <= m; i++) X for (j = 0; j <= n ; j++) X DensityDPA(&Fwd[i][j],&Rslt[i][j],i,j) ; X} X X/*------------------------------------------------------------------------*/ X Xstatic float MessageLength(C) X TScell *C ; X{ X float t1,t2,t3,t4,t5 ; X t1 = paramcost4(C->DM, C->DC, C->DH, C->DV) ; X t2 = paramcost4(C->HM, C->HC, C->HH, C->HV) ; X t3 = paramcost4(C->VM, C->VC, C->VV, C->VH) ; X t4 = -sumlg3(C->D, C->H, C->V) ; X t5 = U(sumH(C) + sumV(C) + sumD(C)) ; X /* X printf(" %4.2f(parD) %4.2f(parH) %4.2f(parV) %4.2f(mat) %4.2f(U)\n" X ,t1 ,t2 ,t3 ,t4 ,t5) ; X */ X return (t1+t2+t3+t4+t5) ; X} X X/* X * when states H & V are identical X */ Xstatic float MessageLength2(C) X TScell *C ; X{ X float t1,t2,t3,t4 ; X X t1 = paramcost3(C->DM, C->DC, C->DH+C->DV) ; X t2 = paramcost4(C->HM+C->VM, C->HC+C->VC, C->HH+C->VV, C->HV+C->VH) ; X t3 = -sumlg3(C->D, C->H, C->V) ; X t4 = U(sumH(C) + sumV(C) + sumD(C)) ; X /* X printf(" %4.2f(parD) %4.2f(parHV) %4.2f(mat) %4.2f(U)\n" ,t1 ,t2 ,t3 ,t4) ; X */ X return (float)(t1 + t2 + t3 + t4) ; X} X X X/*-------------------------------------------------------------------------- X * Some general utility routines X */ Xstatic float sumD(C ) X TScell *C ; X{ return C->DM + C->DC + C->DH + C->DV ; } X Xstatic float sumH(C ) X TScell *C ; X{ return C->HM + C->HC + C->HH + C->HV ; } X Xstatic float sumV(C ) X TScell *C ; X{ return C->VM + C->VC + C->VH + C->VV ; } X Xstatic copy(p, q) X TScell *q ; TSpcell *p ; X{ p->D = q->D ; p->H = q->H ; p->V = q->V ; } X XTS_initcell(p) X TScell *p ; X{ X p->D = p->H = p->V = BIG ; X p->DM = p->DC = p->DH = p->DV = 0.0 ; X p->HM = p->HC = p->HH = p->HV = 0.0 ; X p->VM = p->VC = p->VH = p->VV = 0.0 ; X} X/*-------------------------------------------------------------------------*/ X X#ifdef debug Xstatic TS_printcell(i,j,c) X int i,j; X TScell *c ; X{ X printf("(%d,%d) %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f\n", X i,j,c->D,c->H,c->V,c->DM,c->DC,c->DH,c->DV,c->HM,c->HC,c->HH,c->HV,c->VM,c->VC,c->VH,c->VV) ; X} X#endif X XTS_printparameters(P) X ThreeStatePBlock *P ; X{ X printf(" M C H V\n") ; X printf(" D %5.3f %5.3f %5.3f %5.3f\n",P->DM,P->DC,P->DH,P->DV) ; X printf(" H %5.3f %5.3f %5.3f %5.3f\n",P->HM,P->HC,P->HH,P->HV) ; X printf(" V %5.3f %5.3f %5.3f %5.3f\n",P->VM,P->VC,P->VH,P->VV) ; X} X XTS_printparameters2(P,result,MesgLength) X ThreeStatePBlock *P ; X float MesgLength ; X TScell *result ; X{ X printf(" M C H V sumfq \n") ; X printf(" D %5.4f %5.3f %5.3f %5.3f %5.3f %5.1f\n",P->D,P->DM,P->DC,P->DH,P->DV, sumD(result)) ; X printf(" H %5.4f %5.3f %5.3f %5.3f %5.3f %5.1f\n",P->H,P->HM,P->HC,P->HH,P->HV, sumH(result)) ; X printf(" V %5.4f %5.3f %5.3f %5.3f %5.3f %5.1f\n",P->V,P->VM,P->VC,P->VH,P->VV, sumV(result)) ; X printf(" ML=%10.4f (matrix part=%10.4f)\n", MesgLength,-sumlg3(result->D,result->H,result->V)) ; X X} X XTS_statistics(P,result,MesgLength) X ThreeStatePBlock *P ; X float *MesgLength ; X TScell *result ; X{ X fchangetoprobability4(result->DM,result->DC,result->DH,result->DV,&P->DM,&P->DC,&P->DH,&P->DV) ; X fchangetoprobability4(result->HM,result->HC,result->HH,result->HV,&P->HM,&P->HC,&P->HH,&P->HV) ; X fchangetoprobability4(result->VM,result->VC,result->VH,result->VV,&P->VM,&P->VC,&P->VH,&P->VV) ; X X /* X the fractional time that the machine is in the various states X this is deternimed by the no. of times the machine enters X the various state, according to the weighted frequency counter X */ X fchangetoprobability3( X result->DM+result->HM+result->VM+result->DC+result->HC+result->VC, X result->DH+result->HH+result->VH, X result->DV+result->HV+result->VV, X &P->D,&P->H,&P->V) ; X X *MesgLength = MessageLength(result) ; X} X X/* X * this one make states 2 and 3 symmetric(equal) X */ XTS_statistics2(P,result,MesgLength) X ThreeStatePBlock *P ; X float *MesgLength ; X TScell *result ; X{ X fchangetoprobability3(result->DM,result->DC,result->DH+result->DV,&P->DM,&P->DC,&P->DH) ; X P->DH = P->DV = P->DH/2 ; X X fchangetoprobability4(result->HM+result->VM, result->HC+result->VC, result->HH+result->VV,result->HV+result->VH, &P->HM,&P->HC,&P->HH,&P->HV) ; X P->VM = P->HM ; P->VC = P->HC ; X P->VV = P->HH ; P->VH = P->HV ; X X /* probs of each state according to weighted count */ X fchangetoprobability2(result->DM+result->HM+result->VM+result->DC+result->HC+result->VC, result->DH+result->HH+result->VH + result->DV+result->HV+result->VV,&P->D,&P->H) ; X P->V = P->H ; X X *MesgLength = MessageLength2(result) ; X} X X/*------------------------------------------------------------------------*/ Xstatic printmatrix(M) X float M[][MATRIXSIZE] ; X{ X int i, j ; X X printf(" ") ; X for ( j = 1 ; j <= n ; j++) printf(" %c ",SymbTable->Symbol[b[j]]) ; X putchar('\n') ; X for ( i = 0 ; i <= m ; i++) { X if ( i != 0) printf(" %c ",SymbTable->Symbol[a[i]]) ; X else printf(" ") ; X for ( j = 0 ; j <= n ; j++) X printf(" %3d",(int) (M[i][j]+0.5)) ; X putchar('\n') ; X } X} X X/* X * combine Fwd and Rev X */ Xstatic combine(Fwd,Rev,DMatrix) X TSpcell Fwd[][MATRIXSIZE],Rev[][MATRIXSIZE] ; X float DMatrix[][MATRIXSIZE] ; X{ X int i, j ; X X for ( i = 0 ; i <= m ; i++) X for ( j = 0 ; j <= n ; j++) { X DMatrix[i][j] = -sumlg3(Fwd[i][j].D+Rev[i+1][j+1].D, X Fwd[i][j].H+Rev[i+1][j+1].H, X Fwd[i][j].V+Rev[i+1][j+1].V); X } X} X X/* X * Do a forward run then a reverse run using the parameters P X * then combine the fwd and rev result output into OFILe the X * resulting density plot X */ Xstatic DensityPlot(A,B,P,ofile) X string *A, *B ; X ThreeStatePBlock *P ; X FILE *ofile ; X{ X TSpcell Fwd[MATRIXSIZE][MATRIXSIZE], Rev[MATRIXSIZE][MATRIXSIZE] ; X float DMatrix[MATRIXSIZE][MATRIXSIZE] ; X int i, j ; X TScell result ; X X if (ofile || (m < SCREENHEIGHT && n < SCREENWIDTH)) { X X DPA(fixed,P,&result, Fwd) ; X DPArev(P,Rev) ; X X combine(Fwd,Rev,DMatrix) ; X scale(DMatrix,m,n) ; X if (m < SCREENHEIGHT && n < SCREENWIDTH) PlotMatrix(a,b,DMatrix,m,n,SymbTable) ; X X if (ofile) OutputMatrix(A,B,DMatrix,ofile,"Three-State R-Theory",SymbTable) ; X X } X} X X/*-------------------------------------------------------------------------- X * Computes the 3 states R-theory message length, and return the optimal probs X */ XThreeStateMesLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,Symmetric) X string *A, *B ; X FILE *outfile ; /* if not NULL the density plot will be generated and X output to it */ X int mode ; /* if equal fixeed then use the supplied parameters */ X /* else use adaptive coding for the first run to guess a set X of good parameters */ X int Symmetric ; X float *MesgLength,*MatrixPart ; X float Accurarcy ; /* the stopping accurarcy */ X ThreeStatePBlock *P ; X{ X float OldML ; X int iterations ; X TScell result ; X X char filename[100] ; X X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X X if (outfile && (m > MATRIXSIZE || n > MATRIXSIZE)) { X printf("ThreeStateML() : wraning : string too long, no density plot produced\n") ; X outfile = (FILE *) NULL ; X } X X iterations = 1 ; X if (mode==adaptive) P->D = P->H = P->V = 1.0/3 ; X else { X printf("Iteration 0:\n") ; X TS_printparameters(P) ; X } X X *MesgLength = infinity ; X /* X * Matrix part always decreases, however the total message length (including par costs) X * may increase slightly. We should stop the iteration then X */ X do { X OldML = *MesgLength ; X DPA(mode,P,&result,(TSpcell *) NULL) ; X X if (Symmetric) TS_statistics2(P,&result,MesgLength) ; X else TS_statistics(P,&result,MesgLength) ; X X printf(" Iteration %2d :\n",iterations) ; X TS_printparameters2(P,&result,*MesgLength) ; X fflush(stdout) ; X X mode = fixed; X iterations++ ; X } while (((OldML-*MesgLength)/ *MesgLength) > Accurarcy) ; X X DensityPlot(A,B,P,outfile) ; X X *MatrixPart = -sumlg3(result.D,result.H,result.V) ; X} X X/*------------------------------------------------------------------------*/ X/* X * a count of 1 is distrubuted to the three counters S1, S2 and S3 X * weighted by the message length (log probabilities) L1, L2 and L3. X */ Xstatic scaledprobability3(L1,L2,L3,P1,P2,P3,Psum) X float L1,L2,L3,*P1,*P2,*P3,*Psum ; X{ X float Lmin ; X X Lmin = fmin3(L1,L2,L3) ; X *P1 = exp2((double)Lmin-L1) ; X *P2 = exp2((double)Lmin-L2) ; X *P3 = exp2((double)Lmin-L3) ; X *Psum = *P1 + *P2 + *P3 ; X} X Xstatic TS_weightedincrement(L1, L2, L3, S1, S2, S3) X float L1, L2, L3 ; X float *S1, *S2, *S3 ; X{ X float Lmin, P1, P2, P3, Psum ; X X scaledprobability3(L1,L2,L3,&P1,&P2,&P3,&Psum) ; X *S1 += P1/Psum ; X *S2 += P2/Psum ; X *S3 += P3/Psum ; X} X X/* X * Compute the average of the counters in left, diag and top weighted X * by the probabilities of enter from the corresponding directions X * X */ Xstatic TS_weightedaverage(current, left, diag, top) X TScell *current, *left, *diag, *top ; X{ X float Pleft, Pdiag, Ptop, Psum ; X X scaledprobability3(-current->H,-current->D,-current->V,&Pleft,&Pdiag,&Ptop,&Psum) ; X current->DM = (diag->DM*Pdiag + left->DM*Pleft + top->DM*Ptop)/Psum ; X current->DC = (diag->DC*Pdiag + left->DC*Pleft + top->DC*Ptop)/Psum ; X current->DH = (diag->DH*Pdiag + left->DH*Pleft + top->DH*Ptop)/Psum ; X current->DV = (diag->DV*Pdiag + left->DV*Pleft + top->DV*Ptop)/Psum ; X X current->HM = (diag->HM*Pdiag + left->HM*Pleft + top->HM*Ptop)/Psum ; X current->HC = (diag->HC*Pdiag + left->HC*Pleft + top->HC*Ptop)/Psum ; X current->HH = (diag->HH*Pdiag + left->HH*Pleft + top->HH*Ptop)/Psum ; X current->HV = (diag->HV*Pdiag + left->HV*Pleft + top->HV*Ptop)/Psum ; X X current->VM = (diag->VM*Pdiag + left->VM*Pleft + top->VM*Ptop)/Psum ; X current->VC = (diag->VC*Pdiag + left->VC*Pleft + top->VC*Ptop)/Psum ; X current->VH = (diag->VH*Pdiag + left->VH*Pleft + top->VH*Ptop)/Psum ; X current->VV = (diag->VV*Pdiag + left->VV*Pleft + top->VV*Ptop)/Psum ; X} X X/* X * compute the current cell from the three adjacent incomming cell X * this routine depends on the global variables costDH, costHH, ... etc X */ XComputeCurrentCell(current,hori,ddiag,vert,match) X TScell *current, *hori, *ddiag, *vert ; X bool match ; /* true if the current characters matches */ X{ X float CDD, CHD, CVD, CDH, CHH, CVH, CDV, CHV, CVV ; X TScell left,diag,top ; X X diag = *ddiag ; X left = *hori ; X top = *vert ; X X /* the costs of entrance from the diagonal direction */ X if (match) { X CDD = diag.D+costDM ; X CHD = diag.H+costHM ; X CVD = diag.V+costVM ; X } else { X CDD = diag.D+costDC ; X CHD = diag.H+costHC ; X CVD = diag.V+costVC ; X } X /* from the left */ X CDH = left.D+costDH ; X CHH = left.H+costHH ; X CVH = left.V+costVH ; X /* and from the right */ X CDV = top.D+costDV ; X CHV = top.H+costHV ; X CVV = top.V+costVV ; X X if (match) TS_weightedincrement(-CDD,-CHD,-CVD,&diag.DM,&diag.HM,&diag.VM) ; X else TS_weightedincrement(-CDD,-CHD,-CVD,&diag.DC,&diag.HC,&diag.VC) ; X X TS_weightedincrement(-CDH,-CHH,-CVH,&left.DH,&left.HH,&left.VH) ; X TS_weightedincrement(-CDV,-CHV,-CVV,&top.DV,&top.HV,&top.VV) ; X X current->H = sumlg3(CDH, CHH, CVH) ; X current->V = sumlg3(CDV, CHV, CVV) ; X current->D = sumlg3(CDD, CHD, CVD) ; X X TS_weightedaverage(current,&left,&diag,&top) ; X} X X#define K 4 X/* X * compute the various cost from adaptive coding X */ Xstatic TS_AdaptiveCost(left,diag,top,equalchar) X TScell *left,*diag,*top ; X bool equalchar ; X{ X float PDM,PHM,PVM,PDC,PHC,PVC,PDH,PVH,PHH,PDV,PHV,PVV ; X float log12 ; X X log12 = (float) log2((double)12) ; X if (equalchar) { X PDM = (diag->DM+1)/(sumD(diag)+K) ; X PHM = (diag->HM+1)/(sumH(diag)+K) ; X PVM = (diag->VM+1)/(sumV(diag)+K) ; X costDM = (float)log2((double)PDM) - 2 ; X costHM = (float)log2((double)PHM) - 2 ; X costVM = (float)log2((double)PVM) - 2 ; X } else { X PDC = (diag->DC+1)/(sumD(diag)+K) ; X PHC = (diag->HC+1)/(sumH(diag)+K) ; X PVC = (diag->VC+1)/(sumV(diag)+K) ; X costDC = (float)log2((double)PDC) - log12 ; X costHC = (float)log2((double)PHC) - log12 ; X costVC = (float)log2((double)PVC) - log12 ; X } X X PDH = (left->DH+1)/(sumD(left)+K) ; X PVH = (left->VH+1)/(sumV(left)+K) ; X PHH = (left->HH+1)/(sumH(left)+K) ; X costDH = (float)log2((double)PDH) - 2 ; X costHH = (float)log2((double)PHH) - 2 ; X costVH = (float)log2((double)PVH) - 2 ; X X PDV = (top->DV+1)/(sumD(top)+K) ; X PVV = (top->VV+1)/(sumV(top)+K) ; X PHV = (top->HV+1)/(sumH(top)+K) ; X costDV = (float)log2((double)PDV) - 2 ; X costHV = (float)log2((double)PHV) - 2 ; X costVV = (float)log2((double)PVV) - 2 ; X} X X X/* X * The last argument is the forward matrix. X * The calling routine can choose to supply NULL to this argument X * when the matrix is not required. X */ Xstatic DPA( mode, P, result, matrix) X int mode ; X TScell *result ; X ThreeStatePBlock *P ; X TSpcell matrix[][MATRIXSIZE] ; X{ X TScell c1[MAXSEQLENGTH], c2[MAXSEQLENGTH] ; X TScell *d1, *d2, *temp, boundarycell ; X int i, j ; X float log12 ; X X TS_initcell(&boundarycell) ; X log12 = (float) log2((double)(12)) ; X if (mode == fixed) X TS_ProbToCosts(P,&costDM,&costDC,&costDH,&costDV,&costHM,&costHC,&costHH,&costHV,&costVM,&costVC,&costVH,&costVV) ; X X d1 = c1 ; X d2 = c2 ; X /* Boundary Conditions */ X TS_initcell(&d1[0]) ; X d1[0].D = (float)log2((double)P->D) ; X d1[0].H = (float)log2((double)P->H) ; X d1[0].V = (float)log2((double)P->V) ; X if (matrix) copy(&matrix[0][0],&d1[0]) ; X debug(TS_printcell(0,0,&d1[0]) ;) X X for (j = 1 ; j <= n; j++) { X if (mode == adaptive) X TS_AdaptiveCost(&d1[j-1],&boundarycell,&boundarycell,FALSE) ; X ComputeCurrentCell(&d1[j],&d1[j-1],&boundarycell,&boundarycell,FALSE) ; X if (matrix != NULL) copy(&matrix[0][j],&d1[j]) ; X debug(TS_printcell(0,j,&d1[j]) ;) X } X X for (i = 1 ; i <= m ; i++) { /* General Step */ X if (mode == adaptive) X TS_AdaptiveCost(&boundarycell,&boundarycell,&d1[0],FALSE) ; X ComputeCurrentCell(&d2[0],&boundarycell,&boundarycell,&d1[0],FALSE) ; X if (matrix != NULL) copy(&matrix[i][0],&d2[0]) ; X debug(TS_printcell(i,0,&d2[0]) ;) X X for (j = 1; j <= n; j++) { X if (mode == adaptive ) X TS_AdaptiveCost(&d2[j-1],&d1[j-1],&d1[j],a[i]==b[j]) ; X ComputeCurrentCell(&d2[j],&d2[j-1],&d1[j-1],&d1[j],a[i]==b[j]) ; X if (matrix != NULL) copy(&matrix[i][j],&d2[j]) ; X debug(TS_printcell(i,j,&d2[j]) ;) X } /* for j */ X swap(d1, d2) ; X } /* for i */ X *result = d1[n] ; X} /*DPA*/ X X/* X * Fwd[i][j] given by DPA gives the prob that the machine ends in the X * various states after generating A[1..i], B[1..j]. X * This routine computes the matrix Rev[i][j] which gives the prob X * that FROM the various starting state the machine will X * generate A[i+1..m], B[j+1..n]. X * So Fwd[i][j] + Rev[i][j] gives the density plot. X */ Xstatic DPArev(P,matrix) X ThreeStatePBlock *P ; X TSpcell matrix[][MATRIXSIZE] ; X{ X TSpcell e1[MAXSEQLENGTH], e2[MAXSEQLENGTH] ; X TSpcell *d1, *d2, *temp ; X int i, j ; X X TS_ProbToCosts(P,&costDM,&costDC,&costDH,&costDV,&costHM,&costHC,&costHH,&costHV,&costVM,&costVC,&costVH,&costVV) ; X X d1 = e1 ; X d2 = e2 ; X /* Boundary Conditions */ X d1[n+1].D = d1[n+1].H = d1[n+1].V = 0 ; X matrix[m+1][n+1] = d1[n+1] ; X X for (j = n ; j >= 1; j--) { X d1[j].D = d1[j+1].H + costDH ; X d1[j].H = d1[j+1].H + costHH ; X d1[j].V = d1[j+1].H + costVH ; X matrix[m+1][j] = d1[j] ; X } X X for (i = m ; i >= 1 ; i--) { /* General Step */ X d2[n+1].D = d1[n+1].V + costDV ; X d2[n+1].H = d1[n+1].V + costHV ; X d2[n+1].V = d1[n+1].V + costVV ; X matrix[i][n+1] = d2[n+1] ; X X for (j = n; j >= 1; j--) { X d2[j].D = sumlg3(d2[j+1].H + costDH, X d1[j+1].D + (a[i]==b[j]?costDM:costDC), X d1[j].V + costDV) ; X d2[j].H = sumlg3(d2[j+1].H + costHH, X d1[j+1].D + (a[i]==b[j]?costHM:costHC), X d1[j].V + costHV) ; X d2[j].V = sumlg3(d2[j+1].H + costVH, X d1[j+1].D + (a[i]==b[j]?costVM:costVC), X d1[j].V + costVV) ; X matrix[i][j] = d2[j] ; X } /* for j */ X swap(d1, d2) ; X } /* for i */ X} /*DPArev*/ X X/*-------------------------------------------------------------------------- X * Computes the 3 state R-theory message length with a fixed set of parameters X */ XFixParmThreeState(A,B,P,result) X string *A, *B ; X ThreeStatePBlock *P ; X TScell *result ; X{ X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X DPA(fixed,P,result,(TSpcell *) NULL) ; X} X X X/*--------------------------------------------------------------------------*/ X X#define D 0 X#define H 1 X#define V 2 X#define M 3 X#define C 4 X Xstatic int Xtransition(PM, PC, PH) X float PM, PC, PH ; X{ X float t ; X int event ; X X t = (float)(rand() % 10000000) / 10000000 ; /* 0 <= t < 1 */ X if ( (t -= PM) < 0) event = M ; X else if ((t -= PC) < 0) event = C ; X else if ((t -= PH) < 0) event = H ; X else event = V ; X X return event ; X} X X/* X * A 3 State machine to generate a pair of strings according to X * the given set of parameters. X * Length refer to the average length of the two strings X * X * If Alignment is not NULL then it contains the generative X * alignment of the strings. X */ XThreeStateGen(A, B, length, P, Alignment, Alignlength) X string *A, *B ; X ThreeStatePBlock *P ; X align *Alignment ; X int *Alignlength ; X{ X int state, event, i, alignlen ; X int alen, blen ; X symbol *s ; X symbol a[MAXSEQLENGTH], b[MAXSEQLENGTH] ; X X alen = blen = 0 ; X state = D ; X alignlen = 0 ; X while ((alen + blen) < 2*length) { X switch ( state ) { X case D : X event = transition(P->DM, P->DC, P->DH) ; X break ; X case H : X event = transition(P->HM, P->HC, P->HH) ; X break ; X case V : X event = transition(P->VM, P->VC, P->VH) ; X break ; X } X if (event == M) { X state = D ; X a[++alen] = b[++blen] = randomsymbol(&DNASymbol) ; X if (Alignment) { X Alignment[++alignlen].a = alen ; X Alignment[alignlen].b = blen ; X } X } else if (event == C) { X state = D ; X randomsymbolpair(&DNASymbol, &a[++alen], &b[++blen] ) ; X if (Alignment) { X Alignment[++alignlen].a = alen ; X Alignment[alignlen].b = blen ; X } X } else if (event == H) { X state = H ; X a[++alen] = randomsymbol(&DNASymbol) ; X if (Alignment) { X Alignment[++alignlen].a = alen ; X Alignment[alignlen].b = 0 ; X } X } else { X state = V ; X b[++blen] = randomsymbol(&DNASymbol) ; X if (Alignment) { X Alignment[++alignlen].a = 0 ; X Alignment[alignlen].b = blen ; X } X } X X if (alen > MAXSEQLENGTH || blen > MAXSEQLENGTH) { X printf("ThreeStateGen : string too long\n") ; X exit(99) ; X } X } X s = A->sequence = (symbol *) malloc((unsigned)(alen+1)*sizeof(symbol)) ; X A->length = alen ; X s[0] = NULLSymbol ; X for (i = 1; i <= alen; i++) s[i] = a[i] ; X X s = B->sequence = (symbol *) malloc((unsigned)(blen+1)*sizeof(symbol)) ; X B->length = blen ; X s[0] = NULLSymbol ; X for (i = 1; i <= blen; i++) s[i] = b[i] ; X X if (Alignment) *Alignlength = alignlen ; X} X END_OF_3State.c if test 22797 -ne `wc -c <3State.c`; then echo shar: \"3State.c\" unpacked with wrong size! fi # end of overwriting check fi if test -f 3State.h -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"3State.h\" else echo shar: Extracting \"3State.h\" \(2234 characters\) sed "s/^X//" >3State.h <<'END_OF_3State.h' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "3States.h" declarations for 3 States R-theory. X */ X#define K 4 /* number of arcs in each state */ X X#define fwd 0 X#define rev 1 X X#define adaptive 0 X#define fixed 1 X X#define BIG -1e7 X Xtypedef struct { X float D, H, V ; /* probs that the machine is in these states */ X float DM, DC, DH, DV ; /* probs of the arcs */ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X} ThreeStatesPBlock ; X Xtypedef struct { X float D, H, V ; /* cost of the three states */ X float DM, DC, DH, DV ; /*weighted frequency counters*/ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X} cell ; X Xtypedef struct { X float D, H, V ; X} pcell ; X X#define FREE 0 X#define SYMM 1 X/* X * the free adapting version and the symmetric version X */ X#define ThreeStatesML(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy) \ XThreeStatesMesLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,FREE) X X#define ThreeStatesMLS(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy)\ XThreeStatesMesLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,SYMM) END_OF_3State.h if test 2234 -ne `wc -c <3State.h`; then echo shar: \"3State.h\" unpacked with wrong size! fi # end of overwriting check fi if test -f 5State.c -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"5State.c\" else echo shar: Extracting \"5State.c\" \(26352 characters\) sed "s/^X//" >5State.c <<'END_OF_5State.c' X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* "5State.c" prefix FS X * X * 5 States machine ... extension of the fully connected X * 3 States model with two 'ears' for long indels X * X */ X#include X#include X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X X#define FREEADAPT /* let everything adapt freely on 1st adaptive run */ X#undef FREEADAPT X X#define ProbAA 0.99 /* the fixed probs for the long indels */ X#define ProbAD 0.01 X#define ProbDA 0.001 X X/* forward declarations */ Xfloat sumD(), sumH(), sumV(), sumA(), sumB() ; X X/* X * some global variables X */ Xstatic symbol *a, *b ; Xstatic int m,n ; X Xstatic float costDM, costDC, costDH, costDV, costDA, costDB ; Xstatic float costHM, costHC, costHH, costHV ; Xstatic float costVM, costVC, costVH, costVV ; Xstatic float costAD, costAA ; Xstatic float costBD, costBB ; X X/*--------------------------------------------------------------------------- X * Computes the corresponding edit cost from a set of arcs probabilities X */ XFS_ProbToCosts(P) X FiveStatePBlock *P ; X{ X float log12 ; X X log12 = (float) log2((double)(12)) ; X X costDM = (float)log2((double)P->DM) - 2 ; X costDC = (float)log2((double)P->DC) - log12 ; X costDH = (float)log2((double)P->DH) - 2 ; X costDV = (float)log2((double)P->DV) - 2 ; X costDA = (float)log2((double)P->DA) - 2 ; X costDB = (float)log2((double)P->DB) - 2 ; X X costHM = (float)log2((double)P->HM) - 2 ; X costHC = (float)log2((double)P->HC) - log12 ; X costHH = (float)log2((double)P->HH) - 2 ; X costHV = (float)log2((double)P->HV) - 2 ; X X costVM = (float)log2((double)P->VM) - 2 ; X costVC = (float)log2((double)P->VC) - log12 ; X costVH = (float)log2((double)P->VH) - 2 ; X costVV = (float)log2((double)P->VV) - 2 ; X X costAD = (float)log2((double)P->AD) - 2 ; X costAA = (float)log2((double)P->AA) - 2 ; X X costBD = (float)log2((double)P->BD) - 2 ; X costBB = (float)log2((double)P->BB) - 2 ; X} X X/*------------------------------------------------------------------------*/ Xstatic float MessageLength(C) X FScell *C ; X{ X float t1,t2,t3,t4 ; X X t1 = paramcost6(C->DM, C->DC, C->DH, C->DV, C->DA, C->DB) ; X t2 = paramcost4(C->HM, C->HC, C->HH, C->HV) + paramcost4(C->VM, C->VC, C->VV, C->VH) + paramcost2(C->AD, C->AA) + paramcost2(C->BD, C->BB) ; X t3 = -sumlg5(C->D, C->H, C->V, C->A, C->B) ; X t4 = U(sumH(C) + sumV(C) + sumD(C) + sumA(C) + sumB(C)) ; X return (t1+t2+t3+t4) ; X} X X/* X * Message length when the probs for the long indels are fixed, X * so need not be coded X */ Xstatic float MessageLength2(C) X FScell *C ; X{ X float t1,t2,t3 ; X X /* parameters cost */ X t1 = paramcost4(C->DM, C->DC, C->DH, C->DV) + X paramcost4(C->HM, C->HC, C->HH, C->HV) + X paramcost4(C->VM, C->VC, C->VV, C->VH) ; X /* the matrix part */ X t2 = -sumlg5(C->D, C->H, C->V, C->A, C->B) ; X /* number of instructions */ X t3 = U(sumH(C) + sumV(C) + sumD(C) + sumA(C) + sumB(C)) ; X return (t1+t2+t3) ; X} X X/*-------------------------------------------------------------------------- X * Some general utility routines X */ Xstatic float sumD(C) FScell *C ; { return C->DM + C->DC + C->DH + C->DV + C->DA + C->DB ; } Xstatic float sumH(C) FScell *C ; { return C->HM + C->HC + C->HH + C->HV ; } Xstatic float sumV(C) FScell *C ; { return C->VM + C->VC + C->VH + C->VV ; } Xstatic float sumA(C) FScell *C ; { return C->AD + C->AA ; } Xstatic float sumB(C) FScell *C ; { return C->BD + C->BB ; } X Xstatic copy(p, q) FScell *q; FSpcell *p ; X { p->D=q->D; p->H=q->H; p->V=q->V; p->A=q->A; p->B=q->B; } X Xstatic FS_initcell(p) X FScell *p ; X{ X p->D = p->H = p->V = p->A = p->B = BIG ; X p->DM = p->DC = p->DH = p->DV = p->DA = p->DB = 0.0 ; X p->HM = p->HC = p->HH = p->HV = 0.0 ; X p->VM = p->VC = p->VH = p->VV = 0.0 ; X p->AD = p->AA = 0.0 ; X p->BD = p->BB = 0.0 ; X} X Xstatic FS_initpcell(p) X FSpcell *p ; X{ p->D = p->H = p->V = p->A = p->B = BIG ; } X X/*-------------------------------------------------------------------------*/ X X#ifdef debug XTS_printcell(i,j,c) X int i,j; X FScell *c ; X{ X printf("(%2d,%2d) %4.2f %4.2f %4.2f %4.2f %4.2f", X i,j,c->D,c->H,c->V,c->A,c->B); X printf(" | %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f\n", X c->DM,c->DC,c->DH,c->DV,c->DA,c->DB) ; X printf(" (H) %4.2f %4.2f %4.2f %4.2f (V) %4.2f %4.2f %4.2f %4.2f (A) %4.2f %4.2f (B) %4.2f %4.2f\n", X c->HM,c->HC,c->HH,c->HV,c->VM,c->VC,c->VV,c->VH,c->AD,c->AA, c->BD, c->BB) ; X} X#endif X XFS_printparameters(P) X FiveStatePBlock *P ; X{ X printf(" D %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V) %5.3f(A) %5.3f(B)\n",P->DM,P->DC,P->DH,P->DV,P->DA,P->DB) ; X printf(" H %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V)\n",P->HM,P->HC,P->HH,P->HV) ; X printf(" V %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V)\n",P->VM,P->VC,P->VH,P->VV) ; X printf(" A %5.3f(D) %5.3f(A)\n",P->AD,P->AA) ; X printf(" B %5.3f(D) %5.3f(B)\n",P->BD,P->BB) ; X} X XFS_printparameters2(P,result,MesgLength) X FiveStatePBlock *P ; X float MesgLength ; X FScell *result ; X{ X printf(" D %5.4f %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V) %5.3f(A) %5.3f(B) %5.1f(sumfq)\n",P->D,P->DM,P->DC,P->DH,P->DV,P->DA,P->DB,sumD(result)) ; X printf(" H %5.4f %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V) %5.1f(sumfq)\n",P->H,P->HM,P->HC,P->HH,P->HV, sumH(result)) ; X printf(" V %5.4f %5.3f(M) %5.3f(C) %5.3f(H) %5.3f(V) %5.1f(sumfq)\n",P->V,P->VM,P->VC,P->VH,P->VV, sumV(result)) ; X printf(" A %5.4f %5.3f(D) %5.3f(A) %5.1f(sumfq)\n",P->A,P->AD,P->AA,sumA(result)) ; X printf(" B %5.4f %5.3f(D) %5.3f(B) %5.1f(sumfq)\n",P->B,P->BD,P->BB,sumB(result)) ; X printf(" ML=%10.4f (matrix part=%10.4f)\n", MesgLength,-sumlg5(result->D,result->H,result->V,result->A,result->B)) ; X X} X XFS_statistics(P,result,MesgLength) X FiveStatePBlock *P ; X float *MesgLength ; X FScell *result ; X{ X float sum ; X X fchangetoprobability6(result->DM,result->DC,result->DH,result->DV,result->DA X ,result->DB,&P->DM,&P->DC,&P->DH,&P->DV,&P->DA,&P->DB) ; X fchangetoprobability4(result->HM,result->HC,result->HH,result->HV,&P->HM,&P->HC,&P->HH,&P->HV) ; X fchangetoprobability4(result->VM,result->VC,result->VH,result->VV,&P->VM,&P->VC,&P->VH,&P->VV) ; X fchangetoprobability2(result->AD,result->AA,&P->AD,&P->AA) ; X fchangetoprobability2(result->BD,result->BB,&P->BD,&P->BB) ; X X /* prob that the machine is in the various states as determined by X the weighted count */ X sum = sumD(result)+sumH(result)+sumV(result)+sumA(result)+sumB(result); X P->D = (result->DM+result->DC+result->HM+result->HC+result->VM+result->VC+result->AD+result->BD+0.5)/(sum+2.5) ; X P->H = (result->DH+result->HH+result->VH+0.5)/(sum+2.5) ; X P->V = (result->DV+result->VV+result->HV+0.5)/(sum+2.5) ; X P->A = (result->DA+result->AA+0.5)/(sum+2.5) ; X P->B = (result->DB+result->BB+0.5)/(sum+2.5) ; X X *MesgLength = MessageLength(result) ; X} X X/* X * fix the probs for the long indels X */ XFS_statistics2(P,result,MesgLength) X FiveStatePBlock *P ; X float *MesgLength ; X FScell *result ; X{ X float sum, f ; X X fchangetoprobability4(result->DM,result->DC,result->DH,result->DV, X &P->DM,&P->DC,&P->DH,&P->DV) ; X fchangetoprobability4(result->HM,result->HC,result->HH,result->HV,&P->HM,&P->HC,&P->HH,&P->HV) ; X fchangetoprobability4(result->VM,result->VC,result->VH,result->VV,&P->VM,&P->VC,&P->VH,&P->VV) ; X X P->DA = P->DB = ProbDA ; X P->AA = P->BB = ProbAA ; X P->AD = P->BD = ProbAD ; X X /* P->DM+P->DC+P->DH+P->DV equals 1 now X * make adjustments so that P->DM+P->DC+P->DH+P->DV+P->DA+P->DB equals 1 X */ X f = 1 - 2*ProbDA ; X P->DM *= f ; X P->DC *= f ; X P->DH *= f ; X P->DV *= f ; X X /* prob that the machine is in the various states as determined by X the weighted count */ X sum = sumD(result)+sumH(result)+sumV(result)+sumA(result)+sumB(result); X P->D = (result->DM+result->DC+result->HM+result->HC+result->VM+result->VC+result->AD+result->BD+0.5)/(sum+2.5) ; X P->H = (result->DH+result->HH+result->VH+0.5)/(sum+2.5) ; X P->V = (result->DV+result->VV+result->HV+0.5)/(sum+2.5) ; X P->A = (result->DA+result->AA+0.5)/(sum+2.5) ; X P->B = (result->DB+result->BB+0.5)/(sum+2.5) ; X X *MesgLength = MessageLength2(result) ; X} X X X/*------------------------------------------------------------------------*/ Xstatic printmatrix(M) X float M[][MATRIXSIZE] ; X{ X int i, j ; X X printf(" ") ; X for ( j = 1 ; j <= n ; j++) printf(" %c ",SymbTable->Symbol[b[j]]) ; X putchar('\n') ; X for ( i = 0 ; i <= m ; i++) { X if ( i != 0) printf(" %c ",SymbTable->Symbol[a[i]]) ; X else printf(" ") ; X for ( j = 0 ; j <= n ; j++) X /* printf(" %3d",(int) (M[i][j]+0.5)) ;*/ X printf(" %5.0f", M[i][j]) ; X putchar('\n') ; X } X} X X/* X * combine Fwd and Rev X */ Xstatic combine(Fwd,Rev,DMatrix) X FSpcell Fwd[][MATRIXSIZE],Rev[][MATRIXSIZE] ; X float DMatrix[][MATRIXSIZE] ; X{ X int i, j ; X X for ( i = 0 ; i <= m ; i++) X for ( j = 0 ; j <= n ; j++) { X DMatrix[i][j] = -sumlg5(Fwd[i][j].D+Rev[i+1][j+1].D, X Fwd[i][j].H+Rev[i+1][j+1].H, X Fwd[i][j].V+Rev[i+1][j+1].V, X Fwd[i][j].A+Rev[i+1][j+1].A, X Fwd[i][j].B+Rev[i+1][j+1].B); X } X} X X/* X * Do a forward run then a reverse run using the parameters P X * then combine the fwd and rev result output into OFILe the X * resulting density plot X */ Xstatic DensityPlot(A,B,P,ofile) X string *A, *B ; X FiveStatePBlock *P ; X FILE *ofile ; X{ X FSpcell Fwd[MATRIXSIZE][MATRIXSIZE], Rev[MATRIXSIZE][MATRIXSIZE] ; X float DMatrix[MATRIXSIZE][MATRIXSIZE] ; X int i, j ; X FScell result ; X X if (ofile || (m < SCREENHEIGHT && n < SCREENWIDTH)) { X DPA(fixed,P,&result, Fwd) ; X DPArev(P,Rev) ; X X combine(Fwd,Rev,DMatrix) ; X scale(DMatrix,m,n); X if (m < SCREENHEIGHT && n < SCREENWIDTH) PlotMatrix(a,b,DMatrix,m,n,SymbTable) ; X X if (ofile) OutputMatrix(A,B,DMatrix,ofile,"FiveState R-Theory",SymbTable) ; X } X} X/*-------------------------------------------------------------------------- X * Computes the 5 states R-theory message length, and return the optimal probs X */ XFiveStateMesgLength(A,B,MesgLength,MatrixPart,mode,outfile,P,Accurarcy,flag) X string *A, *B ; X FILE *outfile ; /* if not NULL the density plot will be generated and X output to it */ X int mode ; /* if equal fixeed then use the supplied parameters */ X /* else use adaptive coding for the first run to guess a set X of good parameters */ X int flag ; /* if flag equals FREE then allow all parms to adapt freely */ X /* otherwise fix the probs for the long indels */ X float *MesgLength, *MatrixPart ; X float Accurarcy ; /* the stopping accurarcy */ X FiveStatePBlock *P ; X{ X float ML, minML, minMatrixPart ; X int iterations ; X FScell result ; X X char filename[100] ; X X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X X X if (outfile && (m > MATRIXSIZE || n > MATRIXSIZE)) { X printf("FiveStateML() : Warning : string too long, no density plot produced\n") ; X outfile = (FILE *) NULL ; X } X X X *MesgLength = minML = minMatrixPart = infinity ; X iterations = 1 ; X if (mode==adaptive) P->D = P->H = P->V = P->A = P->B = 1.0/5 ; X X do { X ML = *MesgLength ; X DPA(mode,P,&result,(FSpcell *) NULL) ; X X if (flag == FREE) FS_statistics(P,&result,MesgLength) ; X else FS_statistics2(P,&result,MesgLength) ; X X *MatrixPart = -sumlg5(result.D,result.H,result.V,result.A,result.B) ; X X printf("Iteration %2d :\n",iterations) ; X FS_printparameters2(P,&result,*MesgLength) ; X fflush(stdout) ; X X mode = fixed; X iterations++ ; X if (*MesgLength < minML) { X minML = *MesgLength ; X minMatrixPart = *MatrixPart ; X } X } while (flabs((ML-*MesgLength)/ *MesgLength) > Accurarcy) ; X X printf("minimun MesgLength is %7.2f\n",minML) ; X *MesgLength = minML ; X *MatrixPart = minMatrixPart ; X X DensityPlot(A,B,P,outfile) ; X} X X/*=======================================================================*/ X X/* X * prob of entering from each of the directions, scaled up by MLmin X * so that at least one of them equal 1 X */ Xstatic scaledprobability5(L1, L2, L3, L4, L5, P1, P2, P3, P4, P5, Psum) X float L1, L2, L3, L4, L5 ; X float *P1, *P2, *P3, *P4, *P5, *Psum ; X{ X float Lmin ; X Lmin = fmin5(L1, L2, L3, L4, L5) ; X *P1 = exp2((double)Lmin-L1) ; X *P2 = exp2((double)Lmin-L2) ; X *P3 = exp2((double)Lmin-L3) ; X *P4 = exp2((double)Lmin-L4) ; X *P5 = exp2((double)Lmin-L5) ; X *Psum = *P1+*P2+*P3+*P4+*P5 ; X} X X/* X * a count of 1 is distrubuted to the three counters S1, S2 and S3 X * weighted by the message length (log probabilities) L1, L2 and L3. X */ Xstatic scaledprobability3(L1,L2,L3,P1,P2,P3,Psum) X float L1,L2,L3,*P1,*P2,*P3,*Psum ; X{ X float Lmin ; X X Lmin = fmin3(L1,L2,L3) ; X *P1 = exp2((double)Lmin-L1) ; X *P2 = exp2((double)Lmin-L2) ; X *P3 = exp2((double)Lmin-L3) ; X *Psum = *P1 + *P2 + *P3 ; X} X X/* X * L1 corresponds to the log prob of self transition (HH or VV) X * L2 corresponds to the log prob of transition from D to H (or V), X * and L3, L4, L5 correspond to the transitions that have to done via state X * D (HA and VA). X */ Xstatic weightedincrement2(L1, L2, L3, L4, L5, S1, S2, S3, S4, S5) X float L1, L2, L3, L4, L5 ; X float *S1, *S2, *S3, *S4, *S5 ; X{ X float P1, P2, P3, P4, P5, Psum ; X X scaledprobability5(L1,L2,L3,L4,L5,&P1,&P2,&P3,&P4,&P5,&Psum) ; X *S1 += P1/Psum ; X *S2 += (P2+P3+P4+P5)/Psum ; X *S3 += P3/Psum ; X *S4 += P4/Psum ; X *S5 += P5/Psum ; X} X X/* X * similar to weightedincrement2 X * L1 corresponds to the log prob of self transition (AA or BB) X * L2 corresponds to the log prob of transition from D to H (or V or A or B), X * and L3 corresponds to the transition that have to done via state D X * (BA or AB). X */ Xstatic weightedincrement3(L1, L2, L3, S1, S2, S3) X float L1, L2, L3 ; X float *S1, *S2, *S3 ; X{ X float P1, P2, P3, Psum ; X X scaledprobability3(L1,L2,L3,&P1,&P2,&P3,&Psum) ; X *S1 += P1/Psum ; X *S2 += (P2+P3)/Psum ; X *S3 += P3/Psum ; X} X X/* X * transition into state D. If current state is H (or V or A or B) X * we have to make a transition from it to state D first X * then in all three cases we will make a definite transition of X * DM (if we have a match) or DC (if we have a change). X * Refer to how this routine is called. X */ Xstatic weightedincrement(L1, L2, L3, L4, L5, S1, S2, S3, S4, S5) X float L1, L2, L3, L4, L5 ; X float *S1, *S2, *S3, *S4, *S5 ; X{ X float P1, P2, P3, P4, P5, Psum ; X X scaledprobability5(L1,L2,L3,L4,L5,&P1,&P2,&P3,&P4,&P5,&Psum) ; X *S1 += 1 ; /* in all three cases we have to increment this counter */ X *S2 += P2/Psum ; X *S3 += P3/Psum ; X *S4 += P4/Psum ; X *S5 += P5/Psum ; X} X X/* X * current is the weighted average of the five set of counters X */ Xstatic weightedaverage(current, D, H, V, A, B) X FScell *current, *H, *D, *V, *A, *B ; X{ X float PD, PH, PV, PA, PB, Psum ; X X scaledprobability5(-current->D,-current->H,-current->V,-current->A,-current->B,&PD,&PH,&PV,&PA,&PB,&Psum) ; X X current->DM = (D->DM*PD + H->DM*PH + V->DM*PV + A->DM*PA + B->DM*PB)/Psum ; X current->DC = (D->DC*PD + H->DC*PH + V->DC*PV + A->DC*PA + B->DC*PB)/Psum ; X current->DH = (D->DH*PD + H->DH*PH + V->DH*PV + A->DH*PA + B->DH*PB)/Psum ; X current->DV = (D->DV*PD + H->DV*PH + V->DV*PV + A->DV*PA + B->DV*PB)/Psum ; X current->DA = (D->DA*PD + H->DA*PH + V->DA*PV + A->DA*PA + B->DA*PB)/Psum ; X current->DB = (D->DB*PD + H->DB*PH + V->DB*PV + A->DB*PA + B->DB*PB)/Psum ; X X current->HM = (D->HM*PD + H->HM*PH + V->HM*PV + A->HM*PA + B->HM*PB)/Psum ; X current->HC = (D->HC*PD + H->HC*PH + V->HC*PV + A->HC*PA + B->HC*PB)/Psum ; X current->HH = (D->HH*PD + H->HH*PH + V->HH*PV + A->HH*PA + B->HH*PB)/Psum ; X current->HV = (D->HV*PD + H->HV*PH + V->HV*PV + A->HV*PA + B->HV*PB)/Psum ; X X current->VM = (D->VM*PD + H->VM*PH + V->VM*PV + A->VM*PA + B->VM*PB)/Psum ; X current->VC = (D->VC*PD + H->VC*PH + V->VC*PV + A->VC*PA + B->VC*PB)/Psum ; X current->VH = (D->VH*PD + H->VH*PH + V->VH*PV + A->VH*PA + B->VH*PB)/Psum ; X current->VV = (D->VV*PD + H->VV*PH + V->VV*PV + A->VV*PA + B->VV*PB)/Psum ; X X current->AD = (D->AD*PD + H->AD*PH + V->AD*PV + A->AD*PA + B->AD*PB)/Psum ; X current->AA = (D->AA*PD + H->AA*PH + V->AA*PV + A->AA*PA + B->AA*PB)/Psum ; X X current->BD = (D->BD*PD + H->BD*PH + V->BD*PV + A->BD*PA + B->BD*PB)/Psum ; X current->BB = (D->BB*PD + H->BB*PH + V->BB*PV + A->BB*PA + B->BB*PB)/Psum ; X} X X/* X * compute the current cell from the three adjacent incomming cell X * this routine depends on the global variables costDH, costHH, ... etc X */ Xstatic FS_ComputeCurrentCell(current,hori,ddiag,vert,match) X FScell *current, *hori, *ddiag, *vert ; X bool match ; /* true if the current characters matches */ X{ X FScell H, D, V, A, B ; X float CDD, CDH, CDV, CDA, CDB ; /* costs from anywhere to everywhere else */ X float CHD, CHH, CHV ; X float CVD, CVH, CVV ; X float CAD, CAH, CAV, CAA, CAB ; X float CBD, CBH, CBV, CBA, CBB ; X X D = *ddiag ; X H = B = *hori ; X V = A = *vert ; X X /* the costs of entrance from the diagonal direction */ X if (match) { X CDD = D.D+costDM ; X CHD = D.H+costHM ; X CVD = D.V+costVM ; X CAD = D.A+costAD + costDM ; X CBD = D.B+costBD + costDM ; X } else { X CDD = D.D+costDC ; X CHD = D.H+costHC ; X CVD = D.V+costVC ; X CAD = D.A+costAD + costDC ; X CBD = D.B+costBD + costDC ; X } X /* short horizontal */ X CDH = H.D+costDH ; X CHH = H.H+costHH ; X CVH = H.V+costVH ; X CAH = H.A+costAD+costDH ; X CBH = H.B+costBD+costDH ; X X /* short vertical */ X CDV = V.D+costDV ; X CHV = V.H+costHV ; X CVV = V.V+costVV ; X CAV = V.A+costAD+costDV ; X CBV = V.B+costBD+costDV ; X X /* long horizontal */ X CDB = H.D+costDB ; X CAB = H.A+costAD + costDB ; X CBB = H.B+costBB ; X X /* long vertical */ X CDA = V.D+costDA ; X CAA = V.A+costAA ; X CBA = V.B+costBD + costDA ; X X if (match) { X weightedincrement(-CDD,-CHD,-CVD,-CAD,-CBD,&D.DM,&D.HM,&D.VM,&D.AD,&D.BD) ; X weightedincrement2(-CVV,-CDV,-CHV,-CAV,-CBV,&V.VV,&V.DV,&V.HM,&V.AD,&V.BD) ; X weightedincrement2(-CHH,-CDH,-CVH,-CAH,-CBH,&H.HH,&H.DH,&H.VM,&H.AD,&H.BD) ; X } else { X weightedincrement(-CDD,-CHD,-CVD,-CAD,-CBD,&D.DC,&D.HC,&D.VC,&D.AD,&D.BD) ; X weightedincrement2(-CVV,-CDV,-CHV,-CAV,-CBV,&V.VV,&V.DV,&V.HC,&V.AD,&V.BD) ; X weightedincrement2(-CHH,-CDH,-CVH,-CAH,-CBH,&H.HH,&H.DH,&H.VC,&H.AD,&H.BD) ; X } X weightedincrement3(-CAA,-CDA,-CBA,&A.AA,&A.DA,&A.BD) ; X weightedincrement3(-CBB,-CDB,-CAB,&B.BB,&B.DB,&B.AD) ; X X current->H = sumlg5(CDH, CHH, CVH, CAH, CBH) ; X current->V = sumlg5(CDV, CHV, CVV, CAV, CBV) ; X current->D = sumlg5(CDD, CHD, CVD, CAD, CBD) ; X current->A = sumlg3(CDA, CAA, CBA) ; X current->B = sumlg3(CDB, CAB, CBB) ; X X weightedaverage(current,&D,&H,&V,&A,&B) ; X} X X/* X * compute the various cost from adaptive coding X */ Xstatic FS_AdaptiveCost(left,diag,top,match) X FScell *left,*diag,*top ; X bool match ; X{ X float PDM,PDC,PDH,PDV,PDA,PDB ; X float PHM,PHC,PHH,PHV ; X float PVM,PVC,PVH,PVV ; X float PAD,PAA,PBD,PBB ; X float log12 ; X X log12 = (float) log2((double)12) ; X X#ifdef FREEADAPT X /* X * Let all the probs to adapt freely ... tends to give too high a prob X * to the long indels X */ X if (match) { X PDM = (diag->DM+1)/(sumD(diag)+5) ; X PHM = (diag->HM+1)/(sumH(diag)+4) ; X PVM = (diag->VM+1)/(sumV(diag)+4) ; X costDM = (float)log2((double)PDM) - 2 ; X costHM = (float)log2((double)PHM) - 2 ; X costVM = (float)log2((double)PVM) - 2 ; X } else { X PDC = (diag->DC+1)/(sumD(diag)+5) ; X PHC = (diag->HC+1)/(sumH(diag)+4) ; X PVC = (diag->VC+1)/(sumV(diag)+4) ; X costDC = (float)log2((double)PDC) - log12 ; X costHC = (float)log2((double)PHC) - log12 ; X costVC = (float)log2((double)PVC) - log12 ; X } X PBD = (diag->BD+1)/(sumB(diag)+2) ; X PAD = (diag->AD+1)/(sumA(diag)+2) ; X X PDH = (left->DH+1)/(sumD(left)+5) ; X PHH = (left->HH+1)/(sumH(left)+4) ; X PVH = (left->VH+1)/(sumV(left)+4) ; X X PDV = (top->DV+1)/(sumD(top)+5) ; X PHV = (top->HV+1)/(sumH(top)+4) ; X PVV = (top->VV+1)/(sumV(top)+4) ; X X PDB = (left->DB+1)/(sumD(left)+5) ; X PBB = (left->BB+1)/(sumB(left)+2) ; X X PDA = (top->DA+1)/(sumD(top)+5) ; X PAA = (top->AA+1)/(sumA(top)+2) ; X X#else X X /* X * fix the probs for the long indels X */ X PDM = (diag->DM+1)/(diag->DM+diag->DC+diag->DH+diag->DV+4) - 0.002; X PHM = (diag->HM+1)/(sumH(diag)+4) ; X PVM = (diag->VM+1)/(sumV(diag)+4) ; X costDM = (float)log2((double)PDM) - 2 ; X costHM = (float)log2((double)PHM) - 2 ; X costVM = (float)log2((double)PVM) - 2 ; X X PDC = (diag->DC+1)/(diag->DM+diag->DC+diag->DH+diag->DV+4) ; X PHC = (diag->HC+1)/(sumH(diag)+4) ; X PVC = (diag->VC+1)/(sumV(diag)+4) ; X costDC = (float)log2((double)PDC) - log12 ; X costHC = (float)log2((double)PHC) - log12 ; X costVC = (float)log2((double)PVC) - log12 ; X X PDA = PDB = 0.001 ; X PAD = PBD = 0.01 ; X PAA = PBB = 0.99 ; X X PDH = (left->DH+1)/(left->DM+left->DC+left->DH+left->DV+4) ; X PHH = (left->HH+1)/(sumH(left)+4) ; X PVH = (left->VH+1)/(sumV(left)+4) ; X X PDV = (top->DV+1)/(top->DM+top->DC+top->DH+top->DV+4) ; X PHV = (top->HV+1)/(sumH(top)+4) ; X PVV = (top->VV+1)/(sumV(top)+4) ; X X#endif X /*--------------------------------------------*/ X X costBD = (float)log2((double)PBD) - 2 ; X costAD = (float)log2((double)PAD) - 2 ; X X costDH = (float)log2((double)PDH) - 2 ; X costHH = (float)log2((double)PHH) - 2 ; X costVH = (float)log2((double)PVH) - 2 ; X X costDV = (float)log2((double)PDV) - 2 ; X costHV = (float)log2((double)PHV) - 2 ; X costVV = (float)log2((double)PVV) - 2 ; X X costDB = (float)log2((double)PDB) - 2 ; X costBB = (float)log2((double)PBB) - 2 ; X X costDA = (float)log2((double)PDA) - 2 ; X costAA = (float)log2((double)PAA) - 2 ; X} X X/* X * The last argument is the forward matrix. X * The calling routine can choose to supply NULL to this argument X * when the matrix is not required. X */ Xstatic DPA( mode, P, result, matrix) X int mode ; X FScell *result ; X FiveStatePBlock *P ; X FSpcell matrix[][MATRIXSIZE] ; X{ X FScell c1[MAXSEQLENGTH], c2[MAXSEQLENGTH] ; X FScell *d1, *d2, *temp, boundarycell ; X int i, j ; X X FS_initcell(&boundarycell) ; X if (mode == fixed) FS_ProbToCosts(P); X X d1 = c1 ; X d2 = c2 ; X /* Boundary Conditions */ X FS_initcell(&d1[0]) ; X d1[0].D = (float)log2((double)P->D) ; X d1[0].H = (float)log2((double)P->H) ; X d1[0].V = (float)log2((double)P->V) ; X d1[0].A = (float)log2((double)P->A) ; X d1[0].B = (float)log2((double)P->B) ; X if (matrix != NULL) copy(&matrix[0][0],&d1[0]) ; X debug(TS_printcell(0,0,&d1[0]) ;) X X for (j = 1 ; j <= n; j++) { X if (mode == adaptive) X FS_AdaptiveCost(&d1[j-1],&boundarycell,&boundarycell,FALSE) ; X FS_ComputeCurrentCell(&d1[j],&d1[j-1],&boundarycell,&boundarycell,FALSE) ; X if (matrix != NULL) copy(&matrix[0][j],&d1[j]) ; X debug(TS_printcell(0,j,&d1[j]) ;) X } X X for (i = 1 ; i <= m ; i++) { /* General Step */ X if (mode == adaptive) X FS_AdaptiveCost(&boundarycell,&boundarycell,&d1[0],FALSE) ; X FS_ComputeCurrentCell(&d2[0],&boundarycell,&boundarycell,&d1[0],FALSE); X if (matrix != NULL) copy(&matrix[i][0],&d2[0]) ; X debug(TS_printcell(i,0,&d2[0]) ;) X X for (j = 1; j <= n; j++) { X if (mode == adaptive ) X FS_AdaptiveCost(&d2[j-1],&d1[j-1],&d1[j],a[i]==b[j]) ; X FS_ComputeCurrentCell(&d2[j],&d2[j-1],&d1[j-1],&d1[j],a[i]==b[j]) ; X if (matrix != NULL) copy(&matrix[i][j],&d2[j]) ; X debug(TS_printcell(i,j,&d2[j]) ;) X } /* for j */ X swap(d1, d2) ; X } /* for i */ X *result = d1[n] ; X} /*DPA*/ X Xstatic FS_ComputePCell(current,horz,diag,vert,charequal) X FSpcell *current,*horz,*diag,*vert ; X bool charequal ; X{ X float costD ; X X costD = (charequal?costDM:costDC) ; X current->D = sumlg5( X costD + diag->D, X costDH + horz->H, X costDB + horz->B, X costDA + vert->A, X costDV + vert->V ) ; X current->H = sumlg3( X (charequal?costHM:costHC) + diag->D, X costHH + horz->H, X costHV + vert->V) ; X current->B = sumlg5( X costBD + costD + diag->D, X costBD + costDH + horz->H, X costBB + horz->B, X costBD + costDV + vert->V, X costBD + costDA + vert->A ) ; X current->V = sumlg3( X (charequal?costVM:costVC) + diag->D, X costVH + horz->H, X costVV + vert->V) ; X current->A = sumlg5( X costAD + costD + diag->D, X costAD + costDH + horz->H, X costAD + costDB + horz->B, X costAD + costDV + vert->V, X costAA + vert->A ) ; X} X/* X * Fwd[i][j] given by DPA gives the prob that the machine ends in the X * various states after generating A[1..i], B[1..j]. X * This routine computes the matrix Rev[i][j] which gives the prob X * that FROM the various starting state the machine will X * generate A[i+1..m], B[j+1..n]. X * So Fwd[i][j] + Rev[i][j] i=1..m, j = 1..n gives the density plot. X */ Xstatic DPArev(P,matrix) X FiveStatePBlock *P ; X FSpcell matrix[][MATRIXSIZE] ; X{ X FSpcell e1[MAXSEQLENGTH], e2[MAXSEQLENGTH] ; X FSpcell *d1, *d2, *temp, *horz, *vert, *diag, boundarycell ; X int i, j ; X X FS_initpcell(&boundarycell) ; X FS_ProbToCosts(P) ; X X d1 = e1 ; X d2 = e2 ; X /* Boundary Conditions */ X d1[n+1].D = d1[n+1].H = d1[n+1].V = d1[n+1].A = d1[n+1].B = 0.0 ; X matrix[m+1][n+1] = d1[n+1] ; X X for (j = n ; j >= 1; j--) { X FS_ComputePCell(&d1[j],&d1[j+1],&boundarycell,&boundarycell,FALSE) ; X matrix[m+1][j] = d1[j] ; X } X X for (i = m ; i >= 1 ; i--) { /* General Step */ X FS_ComputePCell(&d2[n+1],&boundarycell,&boundarycell,&d1[n+1],FALSE) ; X matrix[i][n+1] = d2[n+1] ; X for (j = n; j >= 1; j--) { X FS_ComputePCell(&d2[j],&d2[j+1],&d1[j+1],&d1[j],a[i]==b[j]) ; X matrix[i][j] = d2[j] ; X } X swap(d1, d2) ; X } X} /*DPArev*/ X X/*------------------------------------------------------------------------------ X * Computes the 5 states R-theory message length with a fixed set of parameters X */ XFixParmFiveState(A,B,P,result) X string *A, *B ; X FiveStatePBlock *P ; X FScell *result ; X{ X a = A->sequence ; b = B->sequence ; X m = A->length ; n = B->length ; X DPA(fixed,P,result,(FSpcell *) NULL) ; X} END_OF_5State.c if test 26352 -ne `wc -c <5State.c`; then echo shar: \"5State.c\" unpacked with wrong size! fi # end of overwriting check fi echo shar: End of archive 1 \(of 4\). cp /dev/null ark1isdone MISSING="" for I in 1 2 3 4 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 4 archives. rm -f ark[1-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0