GH
view release on metacpan or search on metacpan
GH/Sim4/sim4.2002-03-03/sim4b1.c view on Meta::CPAN
(void)strcpy((char *)tmp,END_SIG);
(void)strncpy((char *)(tmp+2),(char *)(seq2+sig->pos2),
(size_t)N-sig->pos2);
(void)strcpy((char *)(tmp+N-sig->pos2+2),START_SIG);
new = fmatch(seq1+sig->pos1-1,tmp,
M-sig->pos1+1,N-sig->pos2+4,
sig->pos1-1,sig->pos2+1);
if (new) {
tmp_block->to1 = sig->pos1;
tmp_block->to2 = sig->pos2;
new->next_exon = tmp_block->next_exon;
tmp_block->next_exon = new;
tmp_block->ori = (G_score>=abs(C_score)) ? 'G' : 'C';
}
}
}
}
/* Slide exon boundaries for optimal intron signals */
sync_flag = get_sync_flag(Lblock, Rblock, 6);
SLIDE_INTRON(sync_flag)(6,&Lblock,seq1,seq2);
/* decreasingly; script will be in reverse order */
flip_list(&Lblock, &Rblock);
pluri_align(dist_ptr,&(st->nmatches),Lblock,&Script_head);
flip_list(&Lblock, &Rblock); /* increasingly */
if (rs.poly_flag)
remove_poly(&Script_head,Lblock,seq1,seq2,N,pT,pA);
else
*pT = *pA = 0;
get_stats(Lblock, st);
*Exons = Lblock->next_exon;
free(Lblock);
if (!rs.ali_flag) {
free_align(Script_head);
return NULL;
} else
return Script_head;
}
struct hash_node {
int ecode; /* integer encoding of the word */
int pos; /* positions where word hits query sequence
*/
struct hash_node *link; /* next word with same last 7.5 letters */
};
#define HASH_SIZE 32767 /* 2**15 - 1 */
#define GEN_LOG4_ENTRIES 45
#define CDNA_LOG4_ENTRIES 25
static struct hash_node *phashtab[HASH_SIZE+1];
static struct hash_node **hashtab;
static int mask;
static int *next_pos, *pnext_pos;
/* The log4 arrays were computed to mimick the behaviour of the log formula
for computing the msp threshold in exon_cores(). For genomic_log4s,
entry i stores the value for the length of a genomic sequence
for which the contribution to the msp threshold is i/2, i.e.:
1.4*log_4(3/4*len1) = i/2;
Similarly, cDNA_log4s entries store lengths of the cDNA sequence for which
the contribution to the msp threshold is i/2, i.e.:
1.4*log_4(len2) = i/2;
Both arrays are sorted in increasing order, and can be searched with
binary search.
*/
static long genomic_log4s[]= {1, 2, 3, 5, 9, 15, 26, 42, 70, 114, \
188, 309, 507, 832, 1365, 1365, 2240, 2240, 3675, 6029,\
9892, 16231, 26629, 43690, 71681, \
117606, 192953, 316573, 519392, 852152,
1398101, 2293823, 3763409, 6174516, 10130347, \
16620564, 27268873, 44739242, 73402365, 120429110, \
197584514, 324171126, 531858072, 872603963, 1431655765
};
static long cDNA_log4s[]= {1, 1, 2, 4, 7, 11, 19, 32, 52, 86, \
141, 231, 380, 624, 1024, 1680, 2756, 4522, 7419, 12173, \
19972, 32768, 53761, 88204, 144715
};
static int get_msp_threshold(int len1, int len2)
{
int i, j;
i = find_log_entry(genomic_log4s, GEN_LOG4_ENTRIES, len1, 0);
j = find_log_entry(cDNA_log4s, CDNA_LOG4_ENTRIES, len2, 0);
if (!(i % 2)) return (int)(i/2+j/2);
else if (!(j % 2)) return (int)(i/2+j/2);
else return (int)(i/2+j/2+1);
}
static int find_log_entry(long *log4s, int n, int len, int offset)
{
int a;
a = n/2;
if ((len<log4s[a]) && (!a || (len>=log4s[a-1])))
return max(0,(a-1))+offset;
else if ((len>=log4s[a]) && ((a==n-1) || (len<log4s[a+1])))
return min(n-1,(a+1))+offset;
else if (len<log4s[a])
return find_log_entry(log4s,a-1,len, offset);
else if (len>log4s[a])
return find_log_entry(log4s+a+1,n-a-1,len, offset+a+1);
return -1;
}
/* -------------------- exon_cores() --------------------- */
static void exon_cores(uchar *s1, uchar *s2, int len1, int len2, int offset1, int offset2, int flag, int in_W, int in_K, int type)
{
int i, W, last_msp, lower, upper;
int *allocated;
Exon *tmp_block;
if (in_K<=0) {
/* compute expected length of longest exact match .. */
/* K = (int) (log(.75*(double)len1)+log((double)len2))/log(4.0); */
/* .. and throw in a fudge factor */
/* K *= 1.4; */
K = get_msp_threshold(len1, len2);
if (K>=0) K--; /* compensate for the rounding in the log formula */
/* commented this to avoid fragmentation
if (flag) K = min(K, DEFAULT_C); second pass
*/
} else
K = in_K;
numMSPs = 0;
exon_list = NULL;
allocated = ckalloc((len1+len2+1)*sizeof(int));
lower = ((file_type==EST_GEN) || (file_type==GEN_EST && type==TEMP))
? -len1 : -len2;
upper = ((file_type==EST_GEN) || (file_type==GEN_EST && type==TEMP))
? len2 : len1;
diag_lev = allocated - lower;
for (i=lower; i<=upper; ++i) diag_lev[i]=0;
W = min(in_W,len2);
switch (file_type) {
case EST_GEN: bld_table(s2,len2,W, type);
/* use longer sequence for permanent tables */
search(s1,s2,len1,len2,W);
break;
case GEN_EST: if (type!=TEMP) {
uchar *aux; int auxi;
aux = s1; s1 = s2; s2 = aux;
auxi = len1; len1 = len2; len2 = auxi;
}
bld_table(s2,len2,W, type);
/* use longer sequence for permanent tables */
search(s1,s2,len1,len2,W);
if (type!=TEMP) {
register int auxi;
uchar *aux;
Msp_ptr mp;
/* change s1 and s2 back */
aux = s1; s1 = s2; s2 = aux;
auxi = len1; len1 = len2; len2 = auxi;
for (mp=msp_list, i=0; i<numMSPs; i++) {
auxi = mp->pos1;
mp->pos1 = mp->pos2;
mp->pos2 = auxi;
mp = mp->next_msp;
}
}
break;
default: fatal("sim4b1.c: Invalid file type code.");
}
free(allocated);
if (type==TEMP) {
register struct hash_node *hptr, *tptr;
register int hval;
free(next_pos);
for (hval=0; hval<HASH_SIZE+1; hval++) {
hptr = hashtab[hval];
( run in 0.828 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )