GH

 view release on metacpan or  search on metacpan

GH/Sim4/sim4.2002-03-03/sim4b1.c  view on Meta::CPAN

              (void)strcpy((char *)tmp,END_SIG);
              (void)strncpy((char *)(tmp+2),(char *)(seq2+sig->pos2),
			    (size_t)N-sig->pos2);
              (void)strcpy((char *)(tmp+N-sig->pos2+2),START_SIG);
              new = fmatch(seq1+sig->pos1-1,tmp,
                           M-sig->pos1+1,N-sig->pos2+4,
                           sig->pos1-1,sig->pos2+1);
              if (new) {
                  tmp_block->to1 = sig->pos1;
                  tmp_block->to2 = sig->pos2;
                  new->next_exon = tmp_block->next_exon;
                  tmp_block->next_exon = new;
                  tmp_block->ori = (G_score>=abs(C_score)) ? 'G' : 'C';
              }
          }
      } 
  }

  /* Slide exon boundaries for optimal intron signals */
  sync_flag = get_sync_flag(Lblock, Rblock, 6);
  SLIDE_INTRON(sync_flag)(6,&Lblock,seq1,seq2);

  /* decreasingly; script will be in reverse order */
  flip_list(&Lblock, &Rblock); 
  pluri_align(dist_ptr,&(st->nmatches),Lblock,&Script_head); 
  flip_list(&Lblock, &Rblock);      /* increasingly */

  if (rs.poly_flag) 
      remove_poly(&Script_head,Lblock,seq1,seq2,N,pT,pA);
  else
      *pT = *pA = 0;

  get_stats(Lblock, st);

  *Exons = Lblock->next_exon;
  free(Lblock);
  if (!rs.ali_flag) {
     free_align(Script_head);
     return NULL;
  } else
     return Script_head;

}


struct hash_node {
        int ecode;             /* integer encoding of the word */
        int pos;                /* positions where word hits query sequence
 */
        struct hash_node *link; /* next word with same last 7.5 letters */
};

#define HASH_SIZE 32767        /* 2**15 - 1 */
#define GEN_LOG4_ENTRIES 45
#define CDNA_LOG4_ENTRIES 25
static struct hash_node *phashtab[HASH_SIZE+1];
static struct hash_node **hashtab;
static int mask;
static int *next_pos, *pnext_pos;

/* The log4 arrays were computed to mimick the behaviour of the log formula
   for computing the msp threshold in exon_cores(). For genomic_log4s,
   entry i stores the value for the length of a genomic sequence
   for which the contribution to the msp threshold is i/2, i.e.:
               1.4*log_4(3/4*len1) = i/2;  
   Similarly, cDNA_log4s entries store lengths of the cDNA sequence for which
   the contribution to the msp threshold is i/2, i.e.:
               1.4*log_4(len2) = i/2;
   Both arrays are sorted in increasing order, and can be searched with 
   binary search.
*/
static long genomic_log4s[]= {1, 2, 3, 5, 9, 15, 26, 42, 70, 114, \
            188, 309, 507, 832, 1365, 1365, 2240, 2240, 3675, 6029,\
            9892, 16231, 26629, 43690, 71681, \
            117606, 192953, 316573, 519392, 852152,
            1398101, 2293823, 3763409, 6174516, 10130347, \
            16620564, 27268873, 44739242, 73402365, 120429110, \
            197584514, 324171126, 531858072, 872603963, 1431655765
            };
static long cDNA_log4s[]= {1, 1, 2, 4, 7, 11, 19, 32, 52, 86, \
            141, 231, 380, 624, 1024, 1680, 2756, 4522, 7419, 12173, \
            19972, 32768, 53761, 88204, 144715
            };


static int get_msp_threshold(int len1, int len2)
{
    int i, j;

    i = find_log_entry(genomic_log4s, GEN_LOG4_ENTRIES, len1, 0);
    j = find_log_entry(cDNA_log4s, CDNA_LOG4_ENTRIES, len2, 0);

    if (!(i % 2)) return (int)(i/2+j/2);
    else if (!(j % 2)) return (int)(i/2+j/2);
    else return (int)(i/2+j/2+1);
}

static int find_log_entry(long *log4s, int n, int len, int offset)
{
    int a;

    a = n/2;  
    if ((len<log4s[a]) && (!a || (len>=log4s[a-1]))) 
                return max(0,(a-1))+offset;
    else if ((len>=log4s[a]) && ((a==n-1) || (len<log4s[a+1]))) 
                return min(n-1,(a+1))+offset;
    else if (len<log4s[a]) 
                return find_log_entry(log4s,a-1,len, offset);   
    else if (len>log4s[a])
                return find_log_entry(log4s+a+1,n-a-1,len, offset+a+1);
    return -1;
}

/* --------------------   exon_cores()   --------------------- */

static void  exon_cores(uchar *s1, uchar *s2, int len1, int len2, int offset1, int offset2, int flag, int in_W, int in_K, int type)
{
    int       i, W, last_msp, lower, upper;
    int      *allocated;
    Exon     *tmp_block;


    if (in_K<=0) {
        /* compute expected length of longest exact match .. */
        /* K = (int) (log(.75*(double)len1)+log((double)len2))/log(4.0); */
        /* .. and throw in a fudge factor */
        /* K *= 1.4; */

        K = get_msp_threshold(len1, len2);
        if (K>=0) K--; /* compensate for the rounding in the log formula */
/*      commented this to avoid fragmentation 
        if (flag) K = min(K, DEFAULT_C); second pass 
*/
    } else 
        K = in_K;

    numMSPs = 0;
    exon_list = NULL;

    allocated = ckalloc((len1+len2+1)*sizeof(int));
    lower = ((file_type==EST_GEN) || (file_type==GEN_EST && type==TEMP)) 
             ? -len1 : -len2;   
    upper = ((file_type==EST_GEN) || (file_type==GEN_EST && type==TEMP)) 
             ?  len2 :  len1;     
    diag_lev = allocated - lower;
    for (i=lower; i<=upper; ++i) diag_lev[i]=0;

    W = min(in_W,len2);
    switch (file_type) {
       case EST_GEN:  bld_table(s2,len2,W, type);
                      /* use longer sequence for permanent tables */
                      search(s1,s2,len1,len2,W);
                      break;
       case GEN_EST:  if (type!=TEMP) {
                      uchar *aux; int   auxi;

                      aux = s1; s1 = s2; s2 = aux; 
                      auxi = len1; len1 = len2; len2 = auxi;
                      }
                      bld_table(s2,len2,W, type);
                      /* use longer sequence for permanent tables */
                      search(s1,s2,len1,len2,W); 
                      if (type!=TEMP) {
                          register int   auxi; 
                          uchar *aux;
                          Msp_ptr mp; 

                          /* change s1 and s2 back */
                          aux = s1; s1 = s2; s2 = aux;
                          auxi = len1; len1 = len2; len2 = auxi;

                          for (mp=msp_list, i=0; i<numMSPs; i++) {
		      	      auxi = mp->pos1;
			      mp->pos1 = mp->pos2;
			      mp->pos2 = auxi;
			      mp = mp->next_msp;
                          }
                      }
                      break;
       default:       fatal("sim4b1.c: Invalid file type code.");
    }
  
    free(allocated);
    if (type==TEMP) {
        register struct hash_node *hptr, *tptr;
        register int    hval;
        
        free(next_pos);
        for (hval=0; hval<HASH_SIZE+1; hval++) {
             hptr = hashtab[hval];



( run in 0.828 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )