GH
view release on metacpan or search on metacpan
GH/Sim4/sim4.2002-03-03/sim4.init.c view on Meta::CPAN
*
* W gives the word size.
* X gives the value for terminating word extensions.
* K gives the MSP score threshold for the first pass.
* C gives the MSP score threshold for the second pass.
* R direction of search; 0 - search the '+' strand only;
* 1 - search the '-' strand only; 2 - search both strands and
* report the best match. (R=2)
* D adjusts the range of diagonals in building the exons.
* H adjusts the re-linking weight factor
* A specifies the output format: exon endpoints only (A=0),
* alignment text (A=1), alignment in lav format (A=2) or both
* exon endpoints and alignment text (A=3, A=4). For A=3, positions
* in sequence 1 are given in the original sequence, and for A=4 in
* its reverse complement. A=5 prints the exon and CDS coordinates
* (the latter, if known) in the `exon file' format required by PipMaker.
* N if !=0, highly accurate exon detection is expected, for highly
* accurate sequence data.
* P remove polyA tails; if match on complementary strand, change
* coordinates in sequence 1 according to the '+' strand and print
* #lav alignment header for all alignment options.
* B control the presence of ambiguity codes in sequence data. If
* 1 (default), allow ambiguity codes (ABCDGKMNRCTVWXY); if 0,
GH/Sim4/sim4.2002-03-03/sim4.init.c view on Meta::CPAN
W - word size. (W=12)\n\
X - value for terminating word extensions. (X=12)\n\
K - MSP score threshold for the first pass. (e.g., K=16)\n\
C - MSP score threshold for the second pass. (e.g., C=12)\n\
R - direction of search; 0 - search the '+' (direct) strand only; \n\
1 - search the '-' strand only; 2 - search both strands and \n\
report the best match. (R=2)\n\
D - bound for the range of diagonals within consecutive msps in an\n\
exon. (D=10)\n\
H - weight factor for MSP scores in relinking. (H=500)\n\
A - output format: exon endpoints only (A=0), alignment text (A=1),\n\
alignment in lav (block) format (A=2), or both exon endpoints\n\
and alignment text (A=3, A=4). If complement match, A=0,1,2,3\n\
give direct positions in the long sequence and complement \n\
positions in the short sequence. A=4 gives direct positions in \n\
the first sequence, regardless of the relative lengths.\n\
A=5 prints the exon and CDS coordinates (the latter, if known)\n\
in the `exon file' format required by PipMaker. To be used\n\
with full-length mRNA sequences.\n\
P - if not 0, remove poly-A tails; report coordinates in the \n\
'+' (direct) strand for complement matches; use lav alignment \n\
headers in all display options. (P=0) \n\
GH/Sim4/sim4.2002-03-03/sim4.init.c view on Meta::CPAN
/* determine the type of comparison */
file_type = (len2<=len1) ? GEN_EST : EST_GEN;
if (file_type== EST_GEN) {
rf1 = seq_copy(sf1);
rf1 = seq_revcomp_inplace(rf1);
revseq1 = SEQ_CHARS(rf1);
if (rs.ali_flag==5) {
if (rs.CDS_to>len1)
fatal("Command line CDS endpoint exceeds sequence length.");
cds_gene = extract_tok(h1);
if (cds_gene==NULL) { /* no FastaA header */
cds_from = rs.CDS_from; cds_to = rs.CDS_to;
} else {
line = strstr(h1, "CDS=");
if (line && rs.S) {
fprintf(stderr, "Warning: Command line CDS specification overrides header CDS specification.");
cds_from = rs.CDS_from; cds_to = rs.CDS_to;
} else if (line) {
cds_range(line+4, &cds_from, &cds_to);
} else if (rs.S) {
cds_from = rs.CDS_from; cds_to = rs.CDS_to;
} else {
cds_from = cds_to = 0;
}
}
if (cds_to>len1)
fatal("CDS endpoints exceed sequence length.");
}
}
if (rs.poly_flag && file_type==EST_GEN) {
get_polyAT(seq1,len1,&pT,&pA,BOTH_AT);
} else pT = pA = 0;
bld_table(seq1-1+pT, len1-pA-pT, rs.W, INIT);
count = 0;
GH/Sim4/sim4.2002-03-03/sim4.init.c view on Meta::CPAN
cds_range(line+4, &cds_from, &cds_to);
}
} else if (count) {
line = strstr(h2, "CDS=");
if (line) {
cds_range(line+4, &cds_from, &cds_to);
} else {
cds_from = cds_to = 0;
}
}
if (cds_to>len2) fatal("CDS endpoints exceed sequence length.");
}
if (rs.poly_flag && file_type==GEN_EST) {
get_polyAT(seq2, len2, &pT, &pA, BOTH_AT);
}
++count;
init_stats(&st); init_stats(&rev_st);
in_K = (rs.set_K==TRUE) ? rs.K:-1;
in_C = (rs.set_C==TRUE) ? rs.C:-1;
GH/Sim4/sim4.2002-03-03/sim4.init.c view on Meta::CPAN
if (get_argval('B', &(args->B))) {
if (args->B && (args->B!=1))
fatal("B must be either 0 or 1.");
} else
args->B = 1;
if (get_strargval('S', &(args->S))) {
cds_range(args->S, &(args->CDS_from), &(args->CDS_to));
if ((args->CDS_from<=0) || (args->CDS_to<=0) ||
(args->CDS_from>args->CDS_to))
fatal("Illegal endpoints for the CDS region.");
} else
args->S = NULL;
if (args->S && (args->ali_flag!=5))
fatal ("A=5 must accompany CDS specification.");
return;
}
/* extract the CDS endpoints from the command line specification <n1>..<n2> */
static void cds_range(char *line, int *from, int *to)
{
char *s = line;
if (line == NULL) fatal ("NULL CDS specification.");
if (!isdigit((int)(*s)))
fatal("Non-numerical value in the CDS specification.");
while (*s && isdigit((int)(*s))) s++;
if (*s!='.') fatal ("Illegal CDS specification."); s++;
if (*s!='.') fatal ("Illegal CDS specification."); s++;
if (!isdigit((int)(*s)))
fatal ("Non-numerical value in the CDS specification.");
while (*s && isdigit((int)(*s))) s++;
if (*s && !isspace((int)(*s)))
fatal ("Garbage at the end of the CDS numerical specification.");
/* now extract the CDS elements */
if (sscanf(line, "%d..%d", from, to)!=2)
fatal ("Error when reading the CDS endpoints.");
return;
}
static void add_offset_exons(Exon *exons, int offset)
{
Exon *t;
if (!offset || !(exons)) return;
GH/Sim4/sim4.2002-03-03/sim4b1.c view on Meta::CPAN
s1 = seq1+f1+1; /* bc at this stage, the msp pos do not have added +1 */
s2 = seq2+f2+1;
q1 = seq1+t1+1;
q2 = seq2+t2+1;
while (s1<=q1 && s2<=q2) { dist += (*s1!=*s2); s1++; s2++; }
return dist;
}
/* ---------------------- print endpoints of exons --------------------*/
#ifdef AUXUTILS
static void find_introns(Exon *eleft, Intron **Ilist)
{
Exon *tmp_exon, *tmp_exon1;
Intron *new, *tail;
int GTAG_score, CTAC_score;
*Ilist = tail = NULL;
if (!eleft) fatal("sim4b1.c: Something wrong in the exon list.\n");
( run in 0.238 second using v1.01-cache-2.11-cpan-beeb90c9504 )