GH

 view release on metacpan or  search on metacpan

GH/Sim4/sim4.2002-03-03/sim4.init.c  view on Meta::CPAN

*
*       W gives the word size.
*       X gives the value for terminating word extensions.
*       K gives the MSP score threshold for the first pass.
*       C gives the MSP score threshold for the second pass. 
*       R direction of search; 0 - search the '+' strand only;
*         1 - search the '-' strand only; 2 - search both strands and
*         report the best match. (R=2)
*       D adjusts the range of diagonals in building the exons.
*       H adjusts the re-linking weight factor
*       A specifies the output format: exon endpoints only (A=0),
*         alignment text (A=1), alignment in lav format (A=2) or both
*         exon endpoints and alignment text (A=3, A=4). For A=3, positions
*         in sequence 1 are given in the original sequence, and for A=4 in
*         its reverse complement. A=5 prints the exon and CDS coordinates 
*         (the latter, if known) in the `exon file' format required by PipMaker.
*       N if !=0, highly accurate exon detection is expected, for highly
*         accurate sequence data. 
*       P remove polyA tails; if match on complementary strand, change
*         coordinates in sequence 1 according to the '+' strand and print
*         #lav alignment header for all alignment options.
*       B control the presence of ambiguity codes in sequence data. If
*         1 (default), allow ambiguity codes (ABCDGKMNRCTVWXY); if 0,  

GH/Sim4/sim4.2002-03-03/sim4.init.c  view on Meta::CPAN

       W  -  word size. (W=12)\n\
       X  -  value for terminating word extensions. (X=12)\n\
       K  -  MSP score threshold for the first pass. (e.g., K=16)\n\
       C  -  MSP score threshold for the second pass. (e.g., C=12)\n\
       R  -  direction of search; 0 - search the '+' (direct) strand only; \n\
             1 - search the '-' strand only; 2 - search both strands and \n\
             report the best match. (R=2)\n\
       D  -  bound for the range of diagonals within consecutive msps in an\n\
             exon. (D=10)\n\
       H  -  weight factor for MSP scores in relinking. (H=500)\n\
       A  -  output format: exon endpoints only (A=0), alignment text (A=1),\n\
             alignment in lav (block) format (A=2), or both exon endpoints\n\
             and alignment text (A=3, A=4). If complement match, A=0,1,2,3\n\
             give direct positions in the long sequence and complement \n\
             positions in the short sequence. A=4 gives direct positions in \n\
             the first sequence, regardless of the relative lengths.\n\
             A=5 prints the exon and CDS coordinates (the latter, if known)\n\
             in the `exon file' format required by PipMaker. To be used\n\
             with full-length mRNA sequences.\n\
       P  -  if not 0, remove poly-A tails; report coordinates in the \n\
             '+' (direct) strand for complement matches; use lav alignment \n\
             headers in all display options. (P=0) \n\

GH/Sim4/sim4.2002-03-03/sim4.init.c  view on Meta::CPAN

        
        /* determine the type of comparison */
        file_type = (len2<=len1) ? GEN_EST : EST_GEN;
        if (file_type== EST_GEN) {
            rf1 = seq_copy(sf1);
            rf1 = seq_revcomp_inplace(rf1);
            revseq1 = SEQ_CHARS(rf1);

            if (rs.ali_flag==5) {
                if (rs.CDS_to>len1) 
                   fatal("Command line CDS endpoint exceeds sequence length.");
                cds_gene = extract_tok(h1);
                if (cds_gene==NULL) {  /* no FastaA header */
                    cds_from = rs.CDS_from; cds_to = rs.CDS_to;
                } else {
                    line = strstr(h1, "CDS="); 
                    if (line && rs.S) {
                       fprintf(stderr, "Warning: Command line CDS specification overrides header CDS specification."); 
                       cds_from = rs.CDS_from; cds_to = rs.CDS_to;
                    } else if (line) {
                       cds_range(line+4, &cds_from, &cds_to); 
                    } else if (rs.S) {
                       cds_from = rs.CDS_from; cds_to = rs.CDS_to;
                    } else {
                       cds_from = cds_to = 0;
                    }
                }
                if (cds_to>len1) 
                   fatal("CDS endpoints exceed sequence length.");
            }
        }
        
        if (rs.poly_flag && file_type==EST_GEN)  {
            get_polyAT(seq1,len1,&pT,&pA,BOTH_AT);
        } else pT = pA = 0;

        bld_table(seq1-1+pT, len1-pA-pT, rs.W, INIT);
        
        count = 0; 

GH/Sim4/sim4.2002-03-03/sim4.init.c  view on Meta::CPAN

                       cds_range(line+4, &cds_from, &cds_to); 
                   }
               } else if (count) {
                   line = strstr(h2, "CDS=");
                   if (line) {
                       cds_range(line+4, &cds_from, &cds_to);
                    } else {
                       cds_from = cds_to = 0;
                    }
               }
               if (cds_to>len2) fatal("CDS endpoints exceed sequence length.");
           }

           if (rs.poly_flag && file_type==GEN_EST)  {
               get_polyAT(seq2, len2, &pT, &pA, BOTH_AT);
           }

           ++count; 
           init_stats(&st); init_stats(&rev_st);
           in_K = (rs.set_K==TRUE) ? rs.K:-1;
           in_C = (rs.set_C==TRUE) ? rs.C:-1;

GH/Sim4/sim4.2002-03-03/sim4.init.c  view on Meta::CPAN

        if (get_argval('B', &(args->B))) {
                 if (args->B && (args->B!=1))
                         fatal("B must be either 0 or 1.");
        } else  
                args->B = 1;
        
        if (get_strargval('S', &(args->S))) {
            cds_range(args->S, &(args->CDS_from), &(args->CDS_to));
            if ((args->CDS_from<=0) || (args->CDS_to<=0) || 
                (args->CDS_from>args->CDS_to))
                fatal("Illegal endpoints for the CDS region.");
        } else 
                args->S = NULL;

        if (args->S && (args->ali_flag!=5))
           fatal ("A=5 must accompany CDS specification.");

        return;
}

/* extract the CDS endpoints from the command line specification <n1>..<n2> */
static void cds_range(char *line, int *from, int *to)
{
     char *s = line;

     if (line == NULL) fatal ("NULL CDS specification.");

     if (!isdigit((int)(*s))) 
         fatal("Non-numerical value in the CDS specification."); 
     while (*s && isdigit((int)(*s))) s++;
     if (*s!='.') fatal ("Illegal CDS specification."); s++;
     if (*s!='.') fatal ("Illegal CDS specification."); s++;
     if (!isdigit((int)(*s))) 
         fatal ("Non-numerical value in the CDS specification.");
     while (*s && isdigit((int)(*s))) s++;   
     if (*s && !isspace((int)(*s))) 
        fatal ("Garbage at the end of the CDS numerical specification."); 
     
     /* now extract the CDS elements */
     if (sscanf(line, "%d..%d", from, to)!=2) 
         fatal ("Error when reading the CDS endpoints.");

     return;
}

static void add_offset_exons(Exon *exons, int offset)
{
    Exon *t;

    if (!offset || !(exons)) return;
 

GH/Sim4/sim4.2002-03-03/sim4b1.c  view on Meta::CPAN

    s1 = seq1+f1+1;   /* bc at this stage, the msp pos do not have added +1 */
    s2 = seq2+f2+1;
    q1 = seq1+t1+1;
    q2 = seq2+t2+1;

    while (s1<=q1 && s2<=q2) { dist += (*s1!=*s2); s1++; s2++; } 
    
    return dist;
}

/* ----------------------  print endpoints of exons  --------------------*/

#ifdef AUXUTILS 
static void find_introns(Exon *eleft, Intron **Ilist)
{
  Exon   *tmp_exon, *tmp_exon1;
  Intron *new, *tail;
  int     GTAG_score, CTAC_score;

  *Ilist = tail = NULL;
  if (!eleft) fatal("sim4b1.c: Something wrong in the exon list.\n");



( run in 1.015 second using v1.01-cache-2.11-cpan-2b1a40005be )