BioX-Workflow

 view release on metacpan or  search on metacpan

example/example1/config.yml  view on Meta::CPAN

---
global:
    - indir: data/raw
    - outdir: data/processed
    - file_rule: (.*).csv$
rules:
    - backup:
        local:
            - wait: 0
        process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {$self->{indir}}/{$sample.csv}"
            grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv

example/example1/workflow.sh  view on Meta::CPAN

#
# Global Variables:
#	indir: data/raw
#	outdir: data/processed
#	file_rule: (.csv)$
#

#
#

# Starting backup
#



#
# Variables 
# Indir: data/raw
# Outdir: data/processed/backup
#

cp data/raw/SAMPLE1.csv data/processed/backup/SAMPLE1.csv

cp data/raw/SAMPLE2.csv data/processed/backup/SAMPLE2.csv


wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: data/processed/backup
# Outdir: data/processed/grep_VARA
#

echo "Working on data/processed/backup/SAMPLE1csv"
grep -i "VARA" data/processed/backup/SAMPLE1.csv >> data/processed/grep_VARA/SAMPLE1.grep_VARA.csv


echo "Working on data/processed/backup/SAMPLE2csv"
grep -i "VARA" data/processed/backup/SAMPLE2.csv >> data/processed/grep_VARA/SAMPLE2.grep_VARA.csv



wait

#
# Ending grep_VARA
#


example/example3/drake.full.yml  view on Meta::CPAN

---
plugins:
    - Drake
global:
    - indir: data/raw
    - outdir: data/processed
    - file_rule: (.csv)$
rules:
    - backup:
        local:
            - INPUT: "{$self->indir}/{$sample}.csv"
            - OUTPUT: "{$self->outdir}/{$sample}.csv"
            - thing: "other thing"
        process: |
            cp $INPUT $OUTPUT
    - grep_VARA:
        local:
            - OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.csv"
        process: |

example/example3/drake.log  view on Meta::CPAN

2015-06-21 14:02:47,543 INFO Running 3 steps with concurrence of 1...
2015-06-21 14:02:47,568 INFO 
2015-06-21 14:02:47,570 INFO --- 0. Running (timestamped): /home/guests/jir2004/workflow/output/backup/test1.csv <- /home/guests/jir2004/workflow/test1.csv
2015-06-21 14:02:47,592 INFO --- 0: /home/guests/jir2004/workflow/output/backup/test1.csv <- /home/guests/jir2004/workflow/test1.csv -> done in 0.02s
2015-06-21 14:02:47,597 INFO 
2015-06-21 14:02:47,598 INFO --- 1. Running (timestamped): /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test1.csv
2015-06-21 14:02:47,612 INFO --- 1: /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test1.csv -> done in 0.01s
2015-06-21 14:02:47,614 INFO 
2015-06-21 14:02:47,615 INFO --- 2. Running (timestamped): /home/guests/jir2004/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv
2015-06-21 14:02:47,626 INFO --- 2: /home/guests/jir2004/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv -> done in 0.01s
2015-06-21 14:02:47,628 INFO Done (3 steps run).
2015-06-21 14:16:00,147 INFO Running 3 steps with concurrence of 1...
2015-06-21 14:16:00,177 INFO 
2015-06-21 14:16:00,181 INFO --- 1. Running (timestamped): /home/guests/jir2004/workflow/output/backup/test2.csv <- /home/guests/jir2004/workflow/test2.csv
2015-06-21 14:16:00,216 INFO --- 1: /home/guests/jir2004/workflow/output/backup/test2.csv <- /home/guests/jir2004/workflow/test2.csv -> done in 0.03s
2015-06-21 14:16:00,221 INFO 
2015-06-21 14:16:00,222 INFO --- 3. Running (timestamped): /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test2.csv
2015-06-21 14:16:00,244 INFO --- 3: /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test2.csv -> done in 0.02s
2015-06-21 14:16:00,246 INFO 
2015-06-21 14:16:00,246 INFO --- 5. Running (timestamped): /home/guests/jir2004/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv
2015-06-21 14:16:00,258 INFO --- 5: /home/guests/jir2004/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv -> done in 0.01s
2015-06-21 14:16:00,259 INFO Done (3 steps run).
2015-06-21 14:16:15,961 INFO Running 6 steps with concurrence of 1...
2015-06-21 14:16:15,992 INFO 
2015-06-21 14:16:15,993 INFO --- 0. Running (timestamped): /home/guests/jir2004/workflow/output/backup/test1.csv <- /home/guests/jir2004/workflow/test1.csv
2015-06-21 14:16:16,016 INFO --- 0: /home/guests/jir2004/workflow/output/backup/test1.csv <- /home/guests/jir2004/workflow/test1.csv -> done in 0.02s
2015-06-21 14:16:16,021 INFO 
2015-06-21 14:16:16,022 INFO --- 1. Running (timestamped): /home/guests/jir2004/workflow/output/backup/test2.csv <- /home/guests/jir2004/workflow/test2.csv
2015-06-21 14:16:16,035 INFO --- 1: /home/guests/jir2004/workflow/output/backup/test2.csv <- /home/guests/jir2004/workflow/test2.csv -> done in 0.01s
2015-06-21 14:16:16,037 INFO 
2015-06-21 14:16:16,037 INFO --- 2. Running (timestamped): /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test1.csv
2015-06-21 14:16:16,051 INFO --- 2: /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test1.csv -> done in 0.01s
2015-06-21 14:16:16,053 INFO 
2015-06-21 14:16:16,053 INFO --- 3. Running (timestamped): /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test2.csv
2015-06-21 14:16:16,066 INFO --- 3: /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test2.csv -> done in 0.01s
2015-06-21 14:16:16,068 INFO 
2015-06-21 14:16:16,069 INFO --- 4. Running (timestamped): /home/guests/jir2004/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv
2015-06-21 14:16:16,080 INFO --- 4: /home/guests/jir2004/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv -> done in 0.01s
2015-06-21 14:16:16,082 INFO 
2015-06-21 14:16:16,082 INFO --- 5. Running (timestamped): /home/guests/jir2004/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv
2015-06-21 14:16:16,160 INFO --- 5: /home/guests/jir2004/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv -> done in 0.08s
2015-06-21 14:16:16,163 INFO Done (6 steps run).

example/example3/drake.min.yml  view on Meta::CPAN

---
plugins:
    - Drake
global:
    - indir: data/raw
    - outdir: data/processed
    - file_rule: (.csv)$
    - min: 1
rules:
    - backup:
        local:
            - INPUT: "{$self->indir}/{$sample}.csv"
            - OUTPUT: "{$self->outdir}/{$sample}.csv"
            - thing: "other thing"
        process: |
            cp $INPUT $OUTPUT
    - grep_VARA:
        local:
            - OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.csv"
        process: |

example/example3/drake.yml  view on Meta::CPAN

---
plugins:
    - Drake
global:
    - indir: data/raw
    - outdir: data/processed
    - file_rule: (.csv)$
rules:
    - backup:
        local:
            - INPUT: "{$self->indir}/{$sample}.csv"
            - OUTPUT: "{$self->outdir}/{$sample}.csv"
            - thing: "other thing"
        process: |
            cp $INPUT $OUTPUT
    - grep_VARA:
        local:
            - OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.csv"
        process: |

example/example3/workflow.full.drake  view on Meta::CPAN

;

;
; Samples: test1, test2
;
;
; Starting Workflow
;

;
; Starting backup
;



;
; Variables 
; Indir: /home/guests/jir2004/workflow
; Outdir: /home/guests/jir2004/workflow/output/backup
; Local Variables:
;	INPUT: {$self->indir}/{$sample}.csv
;	OUTPUT: {$self->outdir}/{$sample}.csv
;	thing: other thing
;

/home/guests/jir2004/workflow/output/backup/test1.csv <- /home/guests/jir2004/workflow/test1.csv
	cp $INPUT $OUTPUT

/home/guests/jir2004/workflow/output/backup/test2.csv <- /home/guests/jir2004/workflow/test2.csv
	cp $INPUT $OUTPUT


;
; Ending backup
;


;
; Starting grep_VARA
;



;
; Variables 
; Indir: /home/guests/jir2004/workflow/output/backup
; Outdir: /home/guests/jir2004/workflow/output/grep_vara
; Local Variables:
;	OUTPUT: {$self->outdir}/{$sample}.grep_VARA.csv
;	INPUT: {$self->indir}/{$sample}.csv
;

/home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test1.csv
	echo "Working on /home/guests/jir2004/workflow/output/backup/test1csv"
	grep -i "VARA" /home/guests/jir2004/workflow/output/backup/test1.csv >> /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv \
	|| touch /home/guests/jir2004/workflow/output/grep_vara/test1.grep_VARA.csv

/home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/test2.csv
	echo "Working on /home/guests/jir2004/workflow/output/backup/test2csv"
	grep -i "VARA" /home/guests/jir2004/workflow/output/backup/test2.csv >> /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv \
	|| touch /home/guests/jir2004/workflow/output/grep_vara/test2.grep_VARA.csv


;
; Ending grep_VARA
;


;
; Starting grep_VARB

example/example3/workflow.min.drake  view on Meta::CPAN

;

;
; Samples: test1, test2
;
;
; Starting Workflow
;

;
; Starting backup
;



;
; Variables 
; Indir: /home/guests/jir2004/workflow
; Outdir: /home/guests/jir2004/workflow/output/backup
; Local Variables:
;	INPUT: {$self->indir}/{$sample}.csv
;	OUTPUT: {$self->outdir}/{$sample}.csv
;	thing: other thing
;

/home/guests/jir2004/workflow/output/backup/$[SAMPLE].csv <- /home/guests/jir2004/workflow/$[SAMPLE].csv
	cp $INPUT $OUTPUT


;
; Ending backup
;


;
; Starting grep_VARA
;



;
; Variables 
; Indir: /home/guests/jir2004/workflow/output/backup
; Outdir: /home/guests/jir2004/workflow/output/grep_vara
; Local Variables:
;	OUTPUT: {$self->outdir}/{$sample}.grep_VARA.csv
;	INPUT: {$self->indir}/{$sample}.csv
;

/home/guests/jir2004/workflow/output/grep_vara/$[SAMPLE].grep_VARA.csv <- /home/guests/jir2004/workflow/output/backup/$[SAMPLE].csv
	echo "Working on /home/guests/jir2004/workflow/output/backup/$SAMPLEcsv"
	grep -i "VARA" /home/guests/jir2004/workflow/output/backup/$SAMPLE.csv >> /home/guests/jir2004/workflow/output/grep_vara/$SAMPLE.grep_VARA.csv \
	|| touch /home/guests/jir2004/workflow/output/grep_vara/$SAMPLE.grep_VARA.csv


;
; Ending grep_VARA
;


;
; Starting grep_VARB

lib/BioX/Workflow/Example002.pod  view on Meta::CPAN


Create the /home/user/workflow/workflow.yml

 yaml
     ---
     global:
         - indir: /home/user/workflow/workflow
         - outdir: /home/user/workflow/workflow/output
         - file_rule: (.*).csv$
     rules:
         - backup:
             process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
         - grep_VARA:
             process: |
                 echo "Working on {$self->{indir}}/{$sample.csv}"
                 grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
         - grep_VARB:
             process: |
                 grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Example002.pod  view on Meta::CPAN

     biox-workflow.pl --workflow workflow.yml > workflow.sh



=head2 Look at the directory structure

    /home/user/workflow/
        test1.csv
        test2.csv
        /output
            /backup
            /grep_vara
            /grep_varb


=head2 Run the workflow

Assuming you saved your output to workflow.sh if you run ./workflow.sh you will get the following.

 yaml
     /home/user/workflow/
         test1.csv
         test2.csv
         /output
             /backup
                 test1.csv
                 test2.csv
             /grep_vara
                 test1.grep_VARA.csv
                 test2.grep_VARA.csv
             /grep_varb
                 test1.grep_VARA.grep_VARB.csv
                 test2.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Example002.pod  view on Meta::CPAN

     #   --workflow      workflow.yml
     #


If --verbose is enabled, and it is by default, you'll see some variables printed out for your benefit

 bash
     #
     # Variables
     # Indir: /home/user/workflow
     # Outdir: /home/user/workflow/output/backup
     # Samples: test1    test2
     #


Here is out first rule, named backup. As you can see our $self->outdir is automatically named 'backup', relative to the globally defined outdir.

```bash
    #
    # Starting backup
    #

    cp /home/user/workflow/test1.csv /home/user/workflow/output/backup/test1.csv
    cp /home/user/workflow/test2.csv /home/user/workflow/output/backup/test2.csv
    
    wait
    
    #
    # Ending backup
    #

```

Notice the 'wait' command. If running your outputted workflow through any of the HPC::Runner scripts, the wait signals to wait until all previous processes have ended before beginning the next one.

Basically, wait builds a linear dependency tree.

For instance, if running this as

lib/BioX/Workflow/Example003.pod  view on Meta::CPAN


 yaml
     ---
     global:
         - indir: /home/user/workflow/workflow/input
         - outdir: /home/user/workflow/workflow/output
         - file_rule: (.*)
         - find_by_dir: 1
         - by_sample_outdir: 1
     rules:
         - backup:
             process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
         - grep_VARA:
             process: |
                 echo "Working on {$self->{indir}}/{$sample.csv}"
                 grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
         - grep_VARB:
             process: |
                 grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Example003.pod  view on Meta::CPAN



=head2 Look at the directory structure

 bash
     /home/user/workflow/input
         test1/test1.csv
         test2/test2.csv
         /output
             /test1
                 /backup
                 /grep_vara
                 /grep_varb
             /test2
                 /backup
                 /grep_vara
                 /grep_varb



=head2 Run the workflow

Assuming you saved your output to workflow.sh if you run ./workflow.sh you will get the following.

 yaml
     /home/user/workflow/input
         test1/test1.csv
         test2/test2.csv
         /output
             /test1
                 /backup
                     test1.csv
                 /grep_vara
                     test1.grep_VARA.csv
                 /grep_varb
                     test1.grep_VARA.grep_VARB.csv
             /test2
                 /backup
                     test2.csv
                 /grep_vara
                     test2.grep_VARA.csv
                 /grep_varb
                     test2.grep_VARA.grep_VARB.csv

lib/BioX/Workflow/SpecialVariables.pod  view on Meta::CPAN

code. These are special variables that are also used in Drake. Please see L<BioX::Workflow::Plugin::Drake|https://metacpan.org/pod/BioX::Workflow::Plugin::Drake>
for more details.

 yaml
     ---
     global:
         - ROOT: /home/user/workflow
         - indir: {$self->ROOT}
         - outdir: {$self->indir}/output
     rules:
         - backup:
             local:
                 - INPUT: {$self->indir}/{$sample}.in
                 - OUTPUT: {$self->outdir}/{$sample}.out

lib/BioX/Workflow/Usage.pod  view on Meta::CPAN

variables contained in your rules.

    ---
    global:
        - indir: /home/user/example-workflow
        - outdir: /home/user/example-workflow/gemini-wrapper
        - file_rule: (.vcf)$|(.vcf.gz)$
        - some_variable: {$self->indir}/file_to_keep_handy
        - ext: txt
    rules:
        - backup:
            local:
                - ext: "backup"
            process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.{$self->ext}.csv
        - rule2:
            process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.{$self->ext}.csv

=head2 Rules

Rules are processed in the order they appear.

Before any rules are processed, first the samples are found. These are grepped using File::Basename, the indir, and the file_rule variable. The
default is to get rid of the everything after the final '.' .

lib/BioX/Workflow/Usage.pod  view on Meta::CPAN

By default your process is evaluated as

    foreach my $sample (@{$self->samples}){
        #Get the value from the process key.
    }

If instead you would like to use the infiles, or some other random process that has nothing to do with your samples, you can override the process
template. Make sure to use the previously defined $OUT. For more information see the L<Text::Template> man page.

    rules:
        - backup:
            outdir: {$self->ROOT}/datafiles
            override_process: 1
            process: |
                $OUT .= wget {$self->some_globally_defined_parameter}
                {
                foreach my $infile (@{$self->infiles}){
                    $OUT .= "dostuff $infile";
                }
                }

lib/BioX/Workflow/Usage.pod  view on Meta::CPAN

code. These are special variables that are also used in Drake. Please see L<BioX::Workflow::Plugin::Drake|https://metacpan.org/pod/BioX::Workflow::Plugin::Drake>
for more details.

 yaml
     ---
     global:
         - ROOT: /home/user/workflow
         - indir: {$self->ROOT}
         - outdir: {$self->indir}/output
     rules:
         - backup:
             local:
                 - INPUT: {$self->indir}/{$sample}.in
                 - OUTPUT: {$self->outdir}/{$sample}.out

=encoding utf8


=head1 Example001

Here is a very simple example that searches a directory for *.csv files and creates an outdir /home/user/workflow/output if one doesn't exist.

lib/BioX/Workflow/Usage.pod  view on Meta::CPAN


Create the /home/user/workflow/workflow.yml

 yaml
     ---
     global:
         - indir: /home/user/workflow/workflow
         - outdir: /home/user/workflow/workflow/output
         - file_rule: (.*).csv$
     rules:
         - backup:
             process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
         - grep_VARA:
             process: |
                 echo "Working on {$self->{indir}}/{$sample.csv}"
                 grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
         - grep_VARB:
             process: |
                 grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Usage.pod  view on Meta::CPAN

     biox-workflow.pl --workflow workflow.yml > workflow.sh



=head2 Look at the directory structure

    /home/user/workflow/
        test1.csv
        test2.csv
        /output
            /backup
            /grep_vara
            /grep_varb


=head2 Run the workflow

Assuming you saved your output to workflow.sh if you run ./workflow.sh you will get the following.

 yaml
     /home/user/workflow/
         test1.csv
         test2.csv
         /output
             /backup
                 test1.csv
                 test2.csv
             /grep_vara
                 test1.grep_VARA.csv
                 test2.grep_VARA.csv
             /grep_varb
                 test1.grep_VARA.grep_VARB.csv
                 test2.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Usage.pod  view on Meta::CPAN

     #   --workflow      workflow.yml
     #


If --verbose is enabled, and it is by default, you'll see some variables printed out for your benefit

 bash
     #
     # Variables
     # Indir: /home/user/workflow
     # Outdir: /home/user/workflow/output/backup
     # Samples: test1    test2
     #


Here is out first rule, named backup. As you can see our $self->outdir is automatically named 'backup', relative to the globally defined outdir.

```bash
    #
    # Starting backup
    #

    cp /home/user/workflow/test1.csv /home/user/workflow/output/backup/test1.csv
    cp /home/user/workflow/test2.csv /home/user/workflow/output/backup/test2.csv
    
    wait
    
    #
    # Ending backup
    #

```

Notice the 'wait' command. If running your outputted workflow through any of the HPC::Runner scripts, the wait signals to wait until all previous processes have ended before beginning the next one.

Basically, wait builds a linear dependency tree.

For instance, if running this as

lib/BioX/Workflow/Usage.pod  view on Meta::CPAN


 yaml
     ---
     global:
         - indir: /home/user/workflow/workflow/input
         - outdir: /home/user/workflow/workflow/output
         - file_rule: (.*)
         - find_by_dir: 1
         - by_sample_outdir: 1
     rules:
         - backup:
             process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
         - grep_VARA:
             process: |
                 echo "Working on {$self->{indir}}/{$sample.csv}"
                 grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
         - grep_VARB:
             process: |
                 grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv


lib/BioX/Workflow/Usage.pod  view on Meta::CPAN



=head2 Look at the directory structure

 bash
     /home/user/workflow/input
         test1/test1.csv
         test2/test2.csv
         /output
             /test1
                 /backup
                 /grep_vara
                 /grep_varb
             /test2
                 /backup
                 /grep_vara
                 /grep_varb



=head2 Run the workflow

Assuming you saved your output to workflow.sh if you run ./workflow.sh you will get the following.

 yaml
     /home/user/workflow/input
         test1/test1.csv
         test2/test2.csv
         /output
             /test1
                 /backup
                     test1.csv
                 /grep_vara
                     test1.grep_VARA.csv
                 /grep_varb
                     test1.grep_VARA.grep_VARB.csv
             /test2
                 /backup
                     test2.csv
                 /grep_vara
                     test2.grep_VARA.csv
                 /grep_varb
                     test2.grep_VARA.grep_VARB.csv


=head1 Acknowledgements

Before version 0.03

lib/BioX/Workflow/Workflow.pod  view on Meta::CPAN

variables contained in your rules.

    ---
    global:
        - indir: /home/user/example-workflow
        - outdir: /home/user/example-workflow/gemini-wrapper
        - file_rule: (.vcf)$|(.vcf.gz)$
        - some_variable: {$self->indir}/file_to_keep_handy
        - ext: txt
    rules:
        - backup:
            local:
                - ext: "backup"
            process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.{$self->ext}.csv
        - rule2:
            process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.{$self->ext}.csv

=head2 Rules

Rules are processed in the order they appear.

Before any rules are processed, first the samples are found. These are grepped using File::Basename, the indir, and the file_rule variable. The
default is to get rid of the everything after the final '.' .

lib/BioX/Workflow/Workflow.pod  view on Meta::CPAN

By default your process is evaluated as

    foreach my $sample (@{$self->samples}){
        #Get the value from the process key.
    }

If instead you would like to use the infiles, or some other random process that has nothing to do with your samples, you can override the process
template. Make sure to use the previously defined $OUT. For more information see the L<Text::Template> man page.

    rules:
        - backup:
            outdir: {$self->ROOT}/datafiles
            override_process: 1
            process: |
                $OUT .= wget {$self->some_globally_defined_parameter}
                {
                foreach my $infile (@{$self->infiles}){
                    $OUT .= "dostuff $infile";
                }
                }

t/example/test001.sh  view on Meta::CPAN

#	override_process: 0
#	rule_based: 1
#	verbose: 1
#	create_outdir: 1
#	file_rule: (.*).csv$
#

#
#

# Starting backup
#



#
# Variables 
# Indir: $Bin/example/data/raw/test001
# Outdir: $Bin/example/data/processed/test001/backup
#

cp $Bin/example/data/raw/test001/sample1.csv $Bin/example/data/processed/test001/backup/sample1.csv

cp $Bin/example/data/raw/test001/sample2.csv $Bin/example/data/processed/test001/backup/sample2.csv

cp $Bin/example/data/raw/test001/sample3.csv $Bin/example/data/processed/test001/backup/sample3.csv

cp $Bin/example/data/raw/test001/sample4.csv $Bin/example/data/processed/test001/backup/sample4.csv

cp $Bin/example/data/raw/test001/sample5.csv $Bin/example/data/processed/test001/backup/sample5.csv


wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: $Bin/example/data/processed/test001/backup
# Outdir: $Bin/example/data/processed/test001/grep_VARA
#

echo "Working on $Bin/example/data/processed/test001/backup/sample1.csv"
grep -i "VARA" $Bin/example/data/processed/test001/backup/sample1.csv >> $Bin/example/data/processed/test001/grep_VARA/sample1.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test001/backup/sample2.csv"
grep -i "VARA" $Bin/example/data/processed/test001/backup/sample2.csv >> $Bin/example/data/processed/test001/grep_VARA/sample2.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test001/backup/sample3.csv"
grep -i "VARA" $Bin/example/data/processed/test001/backup/sample3.csv >> $Bin/example/data/processed/test001/grep_VARA/sample3.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test001/backup/sample4.csv"
grep -i "VARA" $Bin/example/data/processed/test001/backup/sample4.csv >> $Bin/example/data/processed/test001/grep_VARA/sample4.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test001/backup/sample5.csv"
grep -i "VARA" $Bin/example/data/processed/test001/backup/sample5.csv >> $Bin/example/data/processed/test001/grep_VARA/sample5.grep_VARA.csv



wait

#
# Ending grep_VARA
#


t/example/test001.yml  view on Meta::CPAN

---
global:
    - indir: t/example/data/raw/test001
    - outdir: t/example/data/processed/test001
    - file_rule: (.*).csv$
rules:
    - backup:
        process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {$self->{indir}}/{$sample}.csv"
            grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv

t/example/test002.sh  view on Meta::CPAN

#	verbose: 1
#	create_outdir: 1
#	file_rule: (sample.*)$
#	by_sample_outdir: 1
#	find_by_dir: 1
#

#
#

# Starting backup
#



#
# Variables 
# Indir: $Bin/example/data/raw/test002
# Outdir: $Bin/example/data/processed/test002/backup
#

cp $Bin/example/data/raw/test002/sample1/sample1.csv $Bin/example/data/processed/test002/sample1/backup/sample1.csv

cp $Bin/example/data/raw/test002/sample2/sample2.csv $Bin/example/data/processed/test002/sample2/backup/sample2.csv

cp $Bin/example/data/raw/test002/sample3/sample3.csv $Bin/example/data/processed/test002/sample3/backup/sample3.csv

cp $Bin/example/data/raw/test002/sample4/sample4.csv $Bin/example/data/processed/test002/sample4/backup/sample4.csv

cp $Bin/example/data/raw/test002/sample5/sample5.csv $Bin/example/data/processed/test002/sample5/backup/sample5.csv


wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: $Bin/example/data/processed/test002/backup
# Outdir: $Bin/example/data/processed/test002/grep_VARA
#

echo "Working on $Bin/example/data/processed/test002/sample1/backup/sample1.csv"
grep -i "VARA" $Bin/example/data/processed/test002/sample1/backup/sample1.csv >> $Bin/example/data/processed/test002/sample1/grep_VARA/sample1.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test002/sample2/backup/sample2.csv"
grep -i "VARA" $Bin/example/data/processed/test002/sample2/backup/sample2.csv >> $Bin/example/data/processed/test002/sample2/grep_VARA/sample2.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test002/sample3/backup/sample3.csv"
grep -i "VARA" $Bin/example/data/processed/test002/sample3/backup/sample3.csv >> $Bin/example/data/processed/test002/sample3/grep_VARA/sample3.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test002/sample4/backup/sample4.csv"
grep -i "VARA" $Bin/example/data/processed/test002/sample4/backup/sample4.csv >> $Bin/example/data/processed/test002/sample4/grep_VARA/sample4.grep_VARA.csv


echo "Working on $Bin/example/data/processed/test002/sample5/backup/sample5.csv"
grep -i "VARA" $Bin/example/data/processed/test002/sample5/backup/sample5.csv >> $Bin/example/data/processed/test002/sample5/grep_VARA/sample5.grep_VARA.csv



wait

#
# Ending grep_VARA
#


t/example/test002.yml  view on Meta::CPAN

---
global:
    - indir: t/example/data/raw/test002
    - outdir: t/example/data/processed/test002
    - file_rule: (sample.*)$
    - by_sample_outdir: 1
    - find_by_dir: 1
rules:
    - backup:
        process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {$self->{indir}}/{$sample}.csv"
            grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv

t/example/test003.sh  view on Meta::CPAN

#	ROOT: t/example/data/processed/test003
#	file_rule: (sample.*)$
#	by_sample_outdir: 1
#	find_by_dir: 1
#	LOCAL_VAR: This should be overwritten
#

#
#

# Starting backup
#



#
# Variables 
# Indir: $Bin/example/data/raw/test003
# Outdir: $Bin/example/data/processed/test003/backup
# Local Variables:
#	LOCAL_VAR: my_local_var
#	analysis_dir: {$self->ROOT}/analysis
#	outdir: $Bin/example/data/processed/test003/backup
#	indir: $Bin/example/data/raw/test003
#

echo "my_local_var" && \
echo $Bin/example/data/processed/test003/analysis && \
cp $Bin/example/data/raw/test003/sample1/sample1.csv $Bin/example/data/processed/test003/sample1/backup/sample1.csv


echo "my_local_var" && \
echo $Bin/example/data/processed/test003/analysis && \
cp $Bin/example/data/raw/test003/sample2/sample2.csv $Bin/example/data/processed/test003/sample2/backup/sample2.csv


echo "my_local_var" && \
echo $Bin/example/data/processed/test003/analysis && \
cp $Bin/example/data/raw/test003/sample3/sample3.csv $Bin/example/data/processed/test003/sample3/backup/sample3.csv


echo "my_local_var" && \
echo $Bin/example/data/processed/test003/analysis && \
cp $Bin/example/data/raw/test003/sample4/sample4.csv $Bin/example/data/processed/test003/sample4/backup/sample4.csv


echo "my_local_var" && \
echo $Bin/example/data/processed/test003/analysis && \
cp $Bin/example/data/raw/test003/sample5/sample5.csv $Bin/example/data/processed/test003/sample5/backup/sample5.csv



wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: $Bin/example/data/processed/test003/backup
# Outdir: $Bin/example/data/processed/test003/grep_VARA
# Local Variables:
#	LOCAL_VAR: my_local_new_var
#	outdir: $Bin/example/data/processed/test003/grep_VARA
#	indir: $Bin/example/data/processed/test003/backup
#

echo "my_local_new_var" && \
echo "Working on $Bin/example/data/processed/test003/sample1/backup/sample1.csv"
grep -i "VARA" $Bin/example/data/processed/test003/sample1/backup/sample1.csv >> $Bin/example/data/processed/test003/sample1/grep_VARA/sample1.grep_VARA.csv


echo "my_local_new_var" && \
echo "Working on $Bin/example/data/processed/test003/sample2/backup/sample2.csv"
grep -i "VARA" $Bin/example/data/processed/test003/sample2/backup/sample2.csv >> $Bin/example/data/processed/test003/sample2/grep_VARA/sample2.grep_VARA.csv


echo "my_local_new_var" && \
echo "Working on $Bin/example/data/processed/test003/sample3/backup/sample3.csv"
grep -i "VARA" $Bin/example/data/processed/test003/sample3/backup/sample3.csv >> $Bin/example/data/processed/test003/sample3/grep_VARA/sample3.grep_VARA.csv


echo "my_local_new_var" && \
echo "Working on $Bin/example/data/processed/test003/sample4/backup/sample4.csv"
grep -i "VARA" $Bin/example/data/processed/test003/sample4/backup/sample4.csv >> $Bin/example/data/processed/test003/sample4/grep_VARA/sample4.grep_VARA.csv


echo "my_local_new_var" && \
echo "Working on $Bin/example/data/processed/test003/sample5/backup/sample5.csv"
grep -i "VARA" $Bin/example/data/processed/test003/sample5/backup/sample5.csv >> $Bin/example/data/processed/test003/sample5/grep_VARA/sample5.grep_VARA.csv



wait

#
# Ending grep_VARA
#


t/example/test003.yml  view on Meta::CPAN

---
global:
    - indir: t/example/data/raw/test003
    - outdir: t/example/data/processed/test003
    - ROOT: t/example/data/processed/test003
    - file_rule: (sample.*)$
    - by_sample_outdir: 1
    - find_by_dir: 1
    - LOCAL_VAR: "This should be overwritten"
rules:
    - backup:
        local:
            - LOCAL_VAR: "my_local_var"
            - analysis_dir: "{$self->ROOT}/analysis"
        process: |
            echo "{$self->LOCAL_VAR}" && \
            echo {$self->analysis_dir} && \
            cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        local:
            - LOCAL_VAR: "my_local_new_var"

t/example/test004.sh  view on Meta::CPAN

#	override_process: 0
#	rule_based: 1
#	verbose: 1
#	create_outdir: 1
#	file_rule: (.csv)$
#

#
#

# Starting backup
#



#
# Variables 
# Indir: $Bin/example/data/raw/test004
# Outdir: $Bin/example/data/processed/test004/backup
#

cp $Bin/example/data/raw/test004/.csv.csv $Bin/example/data/processed/test004/backup/.csv.csv


wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: $Bin/example/data/processed/test004/backup
# Outdir: $Bin/example/data/processed/test004/grep_VARA
#

echo "Working on $Bin/example/data/processed/test004/backup/.csv.csv"
grep -i "VARA" $Bin/example/data/processed/test004/backup/.csv.csv >> $Bin/example/data/processed/test004/grep_VARA/.csv.grep_VARA.csv



wait

#
# Ending grep_VARA
#


t/example/test004.yml  view on Meta::CPAN

---
global:
    - indir: t/example/data/raw/test004
    - outdir: t/example/data/processed/test004
    - file_rule: (.csv)$
rules:
    - backup:
        process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {$self->{indir}}/{$sample}.csv"
            grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv

t/example/test005.sh  view on Meta::CPAN

#	override_process: 0
#	rule_based: 1
#	verbose: 1
#	create_outdir: 1
#	file_rule: (.*).csv
#

#
#

# Starting backup
#



#
# Variables 
# Indir: $Bin/example/data/raw/test005
# Outdir: $Bin/example/data/processed/test005/backup
#

cp $Bin/example/data/raw/test005/${SAMPLE}.csv $Bin/example/data/processed/test005/backup/${SAMPLE}.csv


wait

#
# Ending backup
#


#
#

# Starting grep_VARA
#



#
# Variables 
# Indir: $Bin/example/data/processed/test005/backup
# Outdir: $Bin/example/data/processed/test005/grep_VARA
#

echo "Working on $Bin/example/data/processed/test005/backup/${SAMPLE}.csv"
grep -i "VARA" $Bin/example/data/processed/test005/backup/${SAMPLE}.csv >> $Bin/example/data/processed/test005/grep_VARA/${SAMPLE}.grep_VARA.csv



wait

#
# Ending grep_VARA
#


t/example/test005.yml  view on Meta::CPAN

---
global:
    - indir: t/example/data/raw/test005
    - outdir: t/example/data/processed/test005
    - file_rule: (.*).csv
    - min: 1
rules:
    - backup:
        process: cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {$self->{indir}}/{$sample}.csv"
            grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv

t/lib/TestsFor/BioX/Workflow/Test001.pm  view on Meta::CPAN


    #TODO to keep or not to keep?
    open( my $fh, ">$Bin/example/test001.yml" );
    print $fh <<EOF;
---
global:
    - indir: t/example/data/raw/test001
    - outdir: t/example/data/processed/test001
    - file_rule: (.*).csv\$
rules:
    - backup:
        process: cp {\$self->indir}/{\$sample}.csv {\$self->outdir}/{\$sample}.csv
    - grep_VARA:
        process: |
            echo "Working on {\$self->{indir}}/{\$sample}.csv"
            grep -i "VARA" {\$self->indir}/{\$sample}.csv >> {\$self->outdir}/{\$sample}.grep_VARA.csv
    - grep_VARB:
        process: |
            grep -i "VARB" {\$self->indir}/{\$sample}.grep_VARA.csv >> {\$self->outdir}/{\$sample}.grep_VARA.grep_VARB.csv
EOF

t/lib/TestsFor/BioX/Workflow/Test001.pm  view on Meta::CPAN

    my $cmd2 = <<EOF;
echo "Working on {\$self->{indir}}/{\$sample}.csv"
grep -i "VARA" {\$self->indir}/{\$sample}.csv >> {\$self->outdir}/{\$sample}.grep_VARA.csv
EOF

    my $cmd3 = <<EOF;
grep -i "VARB" {\$self->indir}/{\$sample}.grep_VARA.csv >> {\$self->outdir}/{\$sample}.grep_VARA.grep_VARB.csv
EOF

    my $process_exp = [
        {   backup => {
                process =>
                    'cp {$self->indir}/{$sample}.csv {$self->outdir}/{$sample}.csv'
            }
        },
        { grep_VARA => { process => $cmd2 } },
        { grep_VARB => { process => $cmd3 } }
    ];

    for ( my $i = 0; $i < @{$process_got}; $i++ ) {
        is_deeply( $process_got->[$i], $process_exp->[$i],

t/lib/TestsFor/BioX/Workflow/Test001.pm  view on Meta::CPAN


    #use Text::Diff;
    #my $diff = diff \$got,   \$expected;

    #diag("Diff is ".$diff);
    #return;

    is( $got, $expected, "Got expected output!" );
    ok( -d "$Bin/example/data/processed/test001" );

    my @processes = qw(backup grep_VARA grep_VARB);

    foreach my $process (@processes) {
        ok( -d "$Bin/example/data/processed/test001/$process" );
    }
}

1;

t/lib/TestsFor/BioX/Workflow/Test002.pm  view on Meta::CPAN

        $obj->write_workflow_meta('end');
    };
    #use Text::Diff;
    #my $diff = diff \$got,   \$expected;

    #diag("Diff is ".$diff);
    #return;
    is($got, $expected, "Got expected output!" );
    ok(-d "$Bin/example/data/processed/test002");

    my @processes = qw(backup grep_VARA grep_VARB);

    foreach my $sample (@{$obj->samples}){
        foreach my $process (@processes){
            ok(-d "$Bin/example/data/processed/test002/$sample/$process", "Sample $sample Process $process dir exists");
        }
    }
    my $cwd = cwd();
}

1;

t/lib/TestsFor/BioX/Workflow/Test003.pm  view on Meta::CPAN

    };
    #use Text::Diff;
    #my $diff = diff \$got,   \$expected;

    #diag("Diff is ".$diff);
    #return;

    is($got, $expected, "Got expected output!" );
    ok(-d "$Bin/example/data/processed/test003");

    #my @processes = qw(backup grep_VARA grep_VARB);

    #foreach my $sample (@{$obj->samples}){
        #foreach my $process (@processes){
            #ok(-d "$Bin/example/data/processed/test003/$sample/$process", "Sample $sample Process $process dir exists");
        #}
    #}
}

1;



( run in 2.120 seconds using v1.01-cache-2.11-cpan-49f99fa48dc )