formula results from the CPAN

Genome-Model-Tools-Music
#determine which test method to use
method = as.character(commandArgs()[4]);

if (method == "glm") {

    ### This program is for Generalized Linear Model (GLM) analysis
    ### Y = X + covar1 + covar2 ......
    ### Usually (not limited),
    ### Y is clinical trait (quantitative or binary)
    ### X is variant/gene/mutation etc.
    ### covar means covariate
    ### by Qunyuan Zhang (qunyuan@wustl.edu), 02/16/2012 updated 

    ### input options
    model.file = as.character(commandArgs()[5]);
    y.file = as.character(commandArgs()[6]);
    x.file = as.character(commandArgs()[7]);
    out.file = as.character(commandArgs()[8]);
    x.names="*";

    ### to run it on command line, type
    # R --no-save < glm.R model.file y.file x.file * out.csv
    # or (if you want to define x variables)
    # R --no-save < glm.R model.csv y.file x.file x1|x2|x3 out.csv
    # or (if x.file has been merged into y.file)
    # R --no-save < glm.R model.csv y.file * x1|x2|x3 out.csv
    # or (if you have defined x variable in model.file)
    # R --no-save < glm.R model.csv y.file * * out.csv

    ### about model.file 
    # column "type":  Q=quantitative trait, B=binary trait
    # column "y":     trait name
    # colum  "x":     variant/gene name; if x=NA or blank, it will determined by x.file and x.names
    # column "cvar":  covariate(s)
    # tab delimited 

    ### about y.file 
    # trait data file, column 1 must be sample id 
    # tab delimited 

    ### about x.file
    # usually mutation/variant/geene data file 
    # the first column must be sample id (the same as in y.file, ordered or not)
    # tab delimited  
    # x.file="*" if x.file already merged into y.file 

    ### about x.names
    # x.names="*" will use all column names in x.file as x variable names
    # or you can define it in the format x.names="gene1|gene2|gene3"
    # self-defined x.names have to be found in column names of y.file and/or x.file  

    #################### myglm fuction ##############
    myglm=function(z,trait,variant,covar=NA,ytype) {
        if (nchar(covar)==0 | is.na(covar) | is.null(covar)) { 
            model=formula(paste(trait,"~",variant)) 
        } else {
            model=formula(paste(trait,"~",variant,"+",covar))
        }
        if (ytype=="B") fit=glm(formula=model,data=z,family=binomial(link = "logit"))
        if (ytype=="Q") fit=glm(formula=model,data=z,family=gaussian(link = "identity"))
        fit
    }
    #################################################


    ### data input #####
    read.table(model.file,colClasses="character",na.strings = c("","NA"),sep="\t",header=T)->md
    read.table(y.file,na.strings = c("","NA"),sep="\t",header=T)->y
    if (x.names!="*") x.names=strsplit(x.names,split="[|]")[[1]]

    if (x.file!="*")
    {
        read.table(x.file,na.strings = c("","NA"),sep="\t",header=T)->x
        xid=colnames(x)[1]
        xs=colnames(x)[-1]
        if (x.names!="*") {x=x[,c(xid,x.names)];xs=colnames(x)[-1]}
        x.names=xs
        yid=colnames(y)[1]
        ysid = ! (colnames(y) %in% xs)
        y=y[,ysid]
        if (sum(ysid)==1) {y=data.frame(id=y);colnames(y)[1]=yid}
        #y=merge(y,x,by.x = xid, by.y = yid)
        y=merge(x,y,by.x = xid, by.y = yid)
    }

    ######### analysis ##########
    tt=NULL
    for (i in c(1:nrow(md)))
    {
        ytype=md[i,1];yi=md[i,2];xs=md[i,3];covi=md[i,4];memo=md[i,5]
        if (!is.na(xs) & nchar(xs)>0) xs=strsplit(xs,split="[|]")[[1]]
        if (is.na(xs)[1]|nchar(xs)[1]==0) xs=x.names 
        if (length(covi)==0) covi=NA
        for (xi in xs)
        {
            print(yi); print(xi); print(covi); print("******")
            if (ytype=="Q") try(anova(myglm(y,yi,xi,covi,ytype),test="F"))->fit
            if (ytype=="B") try(anova(myglm(y,yi,xi,covi,ytype),test="Chisq"))->fit
            if (class(fit)[1]!="try-error")
            {
                fit=as.matrix(fit)
                if (xi %in% rownames(fit)) tt=rbind(tt, cbind(yi,ytype,xi,as.data.frame(t(fit[xi,])),covi,memo))
            }
        }
    }
    #"yi","ytype","xi","Df","Deviance","Resid. Df","Resid. Dev","F","Pr(>F)","covi","memo"
    if (ytype=="Q") colnames(tt) = c("y","y_type","x","degrees_freedom","deviance","residual_degrees_freedom","residual_deviance","F_statistic","p-value","covariants","memo");
    if (ytype=="B") colnames(tt) = c("y","y_type","x","degrees_freedom","deviance","residual_degrees_freedom","residual_deviance","p-value","covariants","memo");
    write.table(tt,file=out.file,quote=F,sep="\t",row.names=F);

} else {

    # else, we process numerical or categorical clinical data correlation

    clinical_data = as.character(commandArgs()[5]);
    mutation_matrix = as.character(commandArgs()[6]);
    output_file = as.character(commandArgs()[7]);

    # FUNCTION finds the correlation between two variables
    cor2=function(ty,tx,method)
( run in 2.057 seconds using v1.01-cache-2.11-cpan-9581c071862 )