GBrowse
view release on metacpan or search on metacpan
contrib/GeneFinder/genefinder/gfcode.c view on Meta::CPAN
typedef struct GfTableStruct {
char siteType[MAXSTRLEN]; /* type of site: e.g. atg, intron5, intron3, polya */
char refSeqs[MAXSTRLEN]; /* reference sequences used to compute score table;
current possible values are "all", "genes", "introns" or "spliced" */
char freqType[MAXSTRLEN]; /* "within" or "between"; specifies how
frequencies are calculated (with respect to ntuple classes); when
classDef is "unique" or "overlap", "within" is
automatically assumed */
char classDef[MAXSTRLEN];
/* "unique","overlap",or "defined":
specifies how ntuple classes are defined. For ordinary (non-overlapping)
tuples, freqType is "within", and classDef is "unique" (for unique class).
For overlapping tuples, choose "overlap" and "within". For codon tables
(using classes corresponding to the amino acids), choose "defined", and
"within" for relative codon frequencies, "between" for amino acid frequencies
*/
int numClasses; /* the number of ntuple classes */
int classes[MAXNUMCLASSES][MAXCLASSSIZE]; /* the numerical codes for
the classes */
int startOff, endOff;
/*starting,ending offset (relative to site position)*/
int jump; /* jump: usually 1; 3 for codon tables */
int numSymbs, maxSymb; /* number of consecutive positions used;
no. of symbols */
int numCols, numRows, modNum;
/*
numCols = (endOff-startOff-numSymbs+1)/jump + 1
numRows = maxSymb^numSymbs
modNum = maxSymb^(numSymbs - 1)
*/
int forcedPos[MAXNUMFORCED]; /* positions in table (relative to the site
position) which are forced to their actual frequencies (i.e. no small
sample bias correction) */
int numForced; /* no. of forced positions */
int *convTable; /* table to convert chars to nums */
float **nucFreqs, **logNucFreqs;
/* by frequencies I mean observed probabilities */
int **nucCounts; /* observed counts (used to compute frequencies) */
int *numTuples; /* array of number of tuples in each column */
} GfTable;
/* keeps relevant tables in a single structure */
typedef struct GfTableVecStruct {
GfTable *codonTable,*intron5Table,*intron3Table,*intronTable,*atgTable,*stopTable;
} GfTableVec;
/*****************************************************/
static void lettsToNums (Sequence *sequence, GfTable *table) ;
static void countsToFreqs (GfTable *table, int selfSample) ;
static void cleanNucCounts (GfTable *table) ;
static void makeNucFreqs (GfTable *table, int selfSample) ;
static float tPower (float x) ;
static float lRatio (float nCounts, float nTotal, int selfSample) ;
static void makeClassNucFreqs (GfTable *table, int selfSample) ;
static void logDiffs (GfTable *table1, GfTable *table2) ;
static GfTable *readTableFile (char *tableFile, int initial, int selfSample) ;
static GfTable *initTable (void) ;
static void expandTable (GfTable *table) ;
static int **allocIntMat (GfTable *table) ;
static float **allocFloatMat (GfTable *table) ;
static int *makeConvTable (void) ;
static void aceFeatures (Sequence *seq, GfTableVec *tVec, float *fp) ;
static int aceSites (Sequence *sequence, GfTable *table, float cutoff, int type) ;
static void aceMaxSegs (double *cumVec, int start, int end) ;
/*****************************************************/
static void lettsToNums (Sequence *sequence, GfTable *table)
{
int modNum, i, aNum, pos;
if (sequence->currNumSymbs == table->numSymbs) return;
if (!sequence->currNumSymbs)
sequence->nums = (int *)messalloc(sequence->length * sizeof(int));
/* NOTE: should really test that maxSymb hasn't changed, either */
modNum = table->modNum;
aNum = table->convTable[(int)sequence->letters[0]];
for (i = 1; i < table->numSymbs; i++)
aNum = table->maxSymb * aNum + table->convTable[(int)sequence->letters[i]];
sequence->nums[0] = aNum;
for (pos = 1; i < sequence->length; pos++, i++) {
aNum = (table->maxSymb * (aNum % modNum)) + table->convTable[(int)sequence->letters[i]];
sequence->nums[pos] = aNum;
}
for (; pos < sequence->length; pos++)
sequence->nums[pos] = 0;
sequence->currNumSymbs = table->numSymbs;
}
/*****************************************************/
static void countsToFreqs (GfTable *table, int selfSample)
{
cleanNucCounts(table);
if (!strcmp(table->classDef,"defined"))
makeClassNucFreqs(table,selfSample);
else makeNucFreqs(table,selfSample);
}
static void cleanNucCounts (GfTable *table)
{
int i,j,k,iMod,dMod,maxSymb;
maxSymb = table->maxSymb;
dMod = pow((float)maxSymb,(float)(table->numSymbs - table->jump)) + .1;
for (j = 0; j < table->numCols; j++) {
table->numTuples[j] = 0;
for (i = 0; i < table->numRows; i++) {
if ( !strcmp(table->siteType, "codon")
&& (i/dMod == 106 || i/dMod == 108 || i/dMod == 116)
)
table->nucCounts[i][j] = 0;
for (k = 0, iMod = i; k < table->numSymbs; k++, iMod /= maxSymb)
if (!(iMod % maxSymb)) {
table->nucCounts[i][j] = 0;
break;
}
contrib/GeneFinder/genefinder/gfcode.c view on Meta::CPAN
else
{
if (!(fp = filopen (tableFile, "", "r")))
{ messout ("Sorry, I failed to open %s",tableFile) ;
return 0 ;
}
}
#else
if (!(fp = fopen (tableFile, "r")))
{
fprintf(stderr, "Sorry, I failed to open %s\n",tableFile) ;
return 0 ;
}
#endif
table = initTable();
iclass = 0;
while (EOF != fscanf(fp,"%s",string)) {
begin:
if (!strcmp(string, "//"))
do { c = fgetc(fp); } while (c != '\n' && c != EOF);
else if(!strcmp(string,"siteType:")) fscanf(fp,"%s",table->siteType);
else if(!strcmp(string,"refSeqs:")) fscanf(fp,"%s",table->refSeqs);
else if(!strcmp(string,"freqType:")) fscanf(fp,"%s",table->freqType);
else if(!strcmp(string,"classDef:")) fscanf(fp,"%s",table->classDef);
else if(!strcmp(string,"startOff:")) fscanf(fp,"%d",&table->startOff);
else if(!strcmp(string,"endOff:")) fscanf(fp,"%d",&table->endOff);
else if(!strcmp(string,"numSymbs:")) fscanf(fp,"%d",&table->numSymbs);
else if(!strcmp(string,"maxSymb:")) fscanf(fp,"%d",&table->maxSymb);
else if(!strcmp(string,"jump:")) fscanf(fp,"%d",&table->jump);
/* diffs is no longer necessary */
else if(!strcmp(string,"numForced:")) fscanf(fp,"%d",&table->numForced);
else if(!strcmp(string,"forcedPos:"))
for (i = 0; i < table->numForced; i++)
fscanf(fp,"%d",&table->forcedPos[i]);
else if(!strcmp(string,"class:")) {
i = -1;
do {
i++;
fscanf(fp,"%s",string2);
} while(1 == sscanf(string2,"%d",&table->classes[iclass][i]));
strcpy(string,string2);
table->classes[iclass][i] = 0;
iclass++;
goto begin;
}
else if(!strcmp(string,"*")) break;
else {
#ifdef ACEDB
messcrash ("ERROR in tableFile %s: unknown field %s",
tableFile, string);
#else
fprintf(stderr, "ERROR in tableFile %s: \"%s\" unknown field\n",
tableFile, string);
exit(1);
#endif
}
};
table->classes[iclass][0] = 0;
table->numClasses = iclass;
expandTable(table);
if (!initial) {
for (i = 0; i < table->numRows; i++)
for (j = 0; j < table->numCols; j++) {
d = fscanf(fp, "%d", &table->nucCounts[i][j]);
if (!d || d == EOF) {
#ifdef ACEDB
messcrash ("scoreTable %s incomplete", tableFile);
#else
fprintf(stderr, "scoreTable %s incomplete", tableFile);
exit(1);
#endif
}
}
/* Following no longer applies.
if (!strcmp(table->siteType,"codon")
|| !strcmp(table->siteType,"intron")
|| !strcmp(table->siteType,"exon"))
smallSample = 0;
else smallSample = 1;
*/
countsToFreqs(table,selfSample);
}
filclose(fp);
return table;
}
/*initialize table structure */
static GfTable *initTable (void)
{
GfTable *table;
table = (GfTable *)messalloc(sizeof(GfTable));
table->jump = 1; /*default jump size: for ordinary tables */
table->numForced = 0; /*default is no positions forced */
strcpy(table->refSeqs,"all"); /*default reference sequences*/
strcpy(table->freqType,"within");
strcpy(table->classDef,"unique"); /*default frequency calculations:
single class, frequencies are relative to it */
return table;
}
/* fill in additional (derivative) values in the table structure */
static void expandTable (GfTable *table)
{
int modNum, i;
table->convTable = makeConvTable();
table->numCols = (table->endOff - table->startOff - table->numSymbs + 1)/table->jump + 1;
modNum = 1;
for (i = 1; i < table->numSymbs; i++) modNum *= table->maxSymb;
table->modNum = modNum;
table->numRows = modNum * table->maxSymb;
if (strcmp(table->classDef,"defined")){
/* this computes numClasses when classes are NOT defined */
if (!strcmp(table->classDef,"unique")) table->numClasses = 1;
else for (i = 0, table->numClasses = 1;
i < table->numSymbs - table->jump;
i++, table->numClasses *= table->maxSymb
);
}
table->nucCounts = allocIntMat(table);
table->nucFreqs = allocFloatMat(table);
table->logNucFreqs = allocFloatMat(table);
table->numTuples = (int *)messalloc(table->numCols * sizeof(int));
}
static int **allocIntMat (GfTable *table)
{
int **mtable;
int i, j;
mtable = (int **)messalloc(table->numRows * sizeof(int *));
for (i = 0; i < table->numRows; i++) {
mtable[i] = (int *)messalloc(table->numCols * sizeof(int));
for (j = 0; j < table->numCols; j++) mtable[i][j] = 0;
}
return(mtable);
}
static float **allocFloatMat (GfTable *table)
{
float **stable;
int i, j;
stable = (float **)messalloc(table->numRows * sizeof(float *));
for (i = 0; i < table->numRows; i++) {
stable[i] = (float *)messalloc(table->numCols * sizeof(float));
for (j = 0; j < table->numCols; j++) stable[i][j] = 0;
}
return(stable);
}
static int *makeConvTable (void)
{
int *convTable;
int i;
convTable = (int *)messalloc(256 * sizeof(int));
for (i = 0; i < 256; i++) convTable[i] = 0;
convTable['A'] = convTable['a'] = 1;
convTable['C'] = convTable['c'] = 2;
convTable['G'] = convTable['g'] = 3;
( run in 1.894 second using v1.01-cache-2.11-cpan-39bf76dae61 )