view release on metacpan or search on metacpan
.github/workflows/docker.yml view on Meta::CPAN
lfs: true
- name: Checkout LFS objects
run: git lfs checkout
- name: Get release tag
id: tag
run: |
case ${{ github.ref_name }} in
master)
echo name=latest >> $GITHUB_OUTPUT ;;
*.*.*)
echo name=release-${{ github.ref_name }} >> $GITHUB_OUTPUT ;;
*)
echo name=${{ github.ref_name }} >> $GITHUB_OUTPUT ;;
esac
- name: Login to Dockerhub
run: >
docker login
-u ${{ secrets.DOCKERHUB_USERNAME }}
-p ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build image
run: >
lib/App/Sandy/Command/Expression/Add.pm view on Meta::CPAN
-H, --man full documentation
-v, --verbose print log messages
-s, --source expression-matrix source detail for database
=head1 DESCRIPTION
Add an expression-matrix to the database. A valid expression-matrix is a
file with two columns. The first column is for the seqid and the second
column is for the raw count. The counts will be treated as weights.
=head2 INPUT
A two-columns whitespace separated file, where the first column is the
transcript id, or the gene id, and the second column is the raw counts.
===> my_custom_expression_matrix.txt
#feature count
ENST00000000233.9 2463
ENST00000000412.7 2494
ENST00000000442.10 275
ENST00000001008.5 5112
lib/App/Sandy/Command/Genome.pm view on Meta::CPAN
[default:"none"]
=head1 DESCRIPTION
This subcommand simulates genome sequencing reads taking into account the
quality-profile and the genome-variation patterns, along with: raffle
seed; coverage (depth); fragment mean and standard deviation; single-end
(long and short fragments) and paired-end sequencing type; bam, sam,
fastq.gz and fastq output formats and more.
=head2 INPUT
I<sandy genome> expects as argument a fasta file with chromosome sequences.
For example, L<the GENCODE human genome|https://www.gencodegenes.org/human/>
GRCh38.p13 fasta file.
=head2 OUTPUT
The output file generated will depend on the I<output-format> (fastq, bam),
on the I<join-paired-ends> option (mate read pairs into a single file) and
on the I<sequencing-type> (single-end, paired-end). A file with the simulated
coverage (${prefix}_coverage.tsv) for each chromosome (read counts) also
accompanies the output file.
=head1 OPTIONS
=over 8
lib/App/Sandy/Command/Quality/Add.pm view on Meta::CPAN
=head1 DESCRIPTION
Add a new quality-profile to the database. The profile will be generated
from the quality strings, which encode the phred-score in ASCII characters
from 0x21 to 0x7e (lowest and highest qualities). So the valid characters
are:
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
=head2 INPUT
The user must pass a file in fastq format or a file containing only the
ASCII-encoded phred-scores, as in this example:
===> my_qualities.txt
BECGF@F@DEBIDBE@DCC?HFH?BBB?H@FEEIFDCCECCCIGDIDI?@?CCC?AE?EC?F?@FB;<9<>9:599=>7:57614,30,440&"!***)#
@DCGIDBDECIHIG@FII?G?GCAD@BFECDCEF?H?GIHE?@GEECBCIHCABAFHDFAHBEBEB:5575678=75>657673-14,.113#"()#&)$
F?B@@DFAHIDD?EBFADICBFABCBBAHFCGF@@@?DEIAIEAFCEADC?B@IB?BIEABIBG@C<:;96<968:>::;778,+0203-3,#&'$$#&!
HAAAB@AGAEHC@CHE?EGI?@GFDFFAABDEBHBCDEAA?@IHEBCD@A@HDGFBA?@GHEGIE?5>;>8=75;5<6:<:76,.23-3141#("$"'%"
CDHC@ADAF?ED?GFFCFBDEE?BDACCEE??DA@?F@ABI@BHGIGFGBBDDBCHHEAIACC@GH<5577:><88;95>9:7///++24.2)"(#%&%$
lib/App/Sandy/Command/Transcriptome.pm view on Meta::CPAN
-f, --expression-matrix an expression-matrix entry from database
=head1 DESCRIPTION
This subcommand simulates transcriptome sequencing reads taking into account
the quality-profile and the expression-matrix weights, along with: raffle
seed; number of reads; fragment mean and standard deviation; single-end
(long and short fragments) and paired-end sequencing type; bam, sam,
fastq.gz and fastq output formats and more.
=head2 INPUT
I<sandy transcriptome> expects as argument a fasta file with transcript sequences.
For example, L<the GENCODE human genome|https://www.gencodegenes.org/human/>
transcript sequences and protein-coding transcript sequences.
=head2 OUTPUT
The output file generated will depend on the I<output-format> (fastq, bam),
on the I<join-paired-ends> option (mate read pairs into a single file) and
on the I<sequencing-type> (single-end, paired-end). One file with the simulated
abundance (${prefix}_abundance_transcripts.tsv) per transcript and one file with
the simulated abundance (${prefix}_abundance_genes.tsv) per gene (if the fasta
file used has the relationship between gene and its transcripts at the header)
will accompany the output file.
=head1 OPTIONS
lib/App/Sandy/Command/Variation/Add.pm view on Meta::CPAN
vcf columns SAMPLES from which the genotype
will be extracted
=head1 DESCRIPTION
Add genomic-variation to the database. A genomic-variation may be
represented by a genomic position (seqid, position), a reference
sequence at that postion, an alternate sequence and a genotype
(homozygous or heterozygous).
=head2 INPUT
The input file may be a vcf or a custom genomic-variation file.
For vcf files, the user can point out the sample-name present in
vcf header and then its column will be used to extract the
genotype. if the user does not pass the option I<--sample-name>,
then it will be used the first sample.
===> my_variations.vcf
##fileformat=VCFv4.3
...
lib/App/Sandy/Role/Digest.pm view on Meta::CPAN
requires qw/default_opt opt_spec rm_opt/;
our $VERSION = '0.25'; # VERSION
use constant {
COUNT_LOOPS_BY_OPT => ['coverage', 'number-of-reads'],
STRAND_BIAS_OPT => ['random', 'plus', 'minus'],
SEQID_WEIGHT_OPT => ['length', 'same', 'count'],
SEQUENCING_TYPE_OPT => ['single-end', 'paired-end'],
OUTPUT_FORMAT_OPT => ['fastq', 'fastq.gz', 'sam', 'bam']
};
override 'opt_spec' => sub {
my $self = shift;
my @rm_opt = $self->rm_opt;
my %all_opt = (
'seed' => 'seed|s=i',
'prefix' => 'prefix|p=s',
'id' => 'id|I=s',
lib/App/Sandy/Role/Digest.pm view on Meta::CPAN
my ($self, $opts) = @_;
my $progname = $self->progname;
my %default_opt = $self->default_opt;
$self->fill_opts($opts, \%default_opt);
# Possible alternatives
my %STRAND_BIAS = map { $_ => 1 } @{ &STRAND_BIAS_OPT };
my %SEQID_WEIGHT = map { $_ => 1 } @{ &SEQID_WEIGHT_OPT };
my %SEQUENCING_TYPE = map { $_ => 1 } @{ &SEQUENCING_TYPE_OPT };
my %COUNT_LOOPS_BY = map { $_ => 1 } @{ &COUNT_LOOPS_BY_OPT };
my %OUTPUT_FORMAT = map { $_ => 1 } @{ &OUTPUT_FORMAT_OPT };
my %QUALITY_PROFILE = %{ $self->_quality_profile_report };
my %EXPRESSION_MATRIX = %{ $self->_expression_matrix_report };
my %STRUCTURAL_VARIATION = %{ $self->_genomic_variation_report };
# prefix
if ($opts->{prefix} =~ /([\/\\])/) {
die "Invalid character in 'prefix' option: $opts->{prefix} => '$1'\n";
}
# jobs > 0
lib/App/Sandy/Role/Digest.pm view on Meta::CPAN
if ($default_opt{'count-loops-by'} eq 'number-of-reads') {
if (not defined $opts->{'number-of-reads'}) {
die "The provider must define the 'number-of-reads' if count-loop-by = number-of-reads";
}
}
if (defined $opts->{'number-of-reads'} && $opts->{'number-of-reads'} <= 0) {
die "Option 'number-of-reads' requires a value greater than zero, not $opts->{'number-of-reads'}\n";
}
if (not exists $OUTPUT_FORMAT{$opts->{'output-format'}}) {
my $opt = join ', ' => keys %OUTPUT_FORMAT;
die "Option 'output-format' requires one of these arguments: $opt not $opts->{'output-format'}\n";
}
if ($opts->{'compression-level'} !~ /^[1-9]$/) {
die "Option 'compression-level' requires an integer between 1-9, not $opts->{'compression-level'}\n";
}
# seqid-weight (SEQID_WEIGHT_OPT)
if (not exists $SEQID_WEIGHT{$opts->{'seqid-weight'}}) {
my $opt = join ', ' => keys %SEQID_WEIGHT;
PUSH_MULTICALL_FLAGS|5.018000||Viu
PUSHn|5.006000|5.003007|
PUSHp|5.003007|5.003007|
PUSHs|5.003007|5.003007|
push_scope|5.003007|5.003007|u
PUSHSTACK|5.005000||Viu
PUSHSTACKi|5.005000||Viu
PUSHSTACK_INIT_HWM|5.027002||Viu
PUSHTARG|5.003007||Viu
PUSHu|5.004000|5.003007|p
PUTBACK|5.003007|5.003007|
putc|5.003007||Viu
put_charclass_bitmap_innards|5.021004||Viu
put_charclass_bitmap_innards_common|5.023008||Viu
put_charclass_bitmap_innards_invlist|5.023008||Viu
put_code_point|5.021004||Viu
putc_unlocked|5.003007||Viu
putenv|5.005000||Viu
put_range|5.019009||Viu
putw|5.003007||Viu
pv_display|5.006000|5.003007|p
{
dSP;
SV* sv = newSVpv(p, 0);
PUSHMARK(sp);
eval_sv(sv, G_SCALAR);
SvREFCNT_dec(sv);
SPAGAIN;
sv = POPs;
PUTBACK;
D_PPP_CROAK_IF_ERROR(croak_on_error);
return sv;
}
#endif
#endif
#if ! defined(vload_module) && defined(start_subparse)
t/lib/TestsFor/App/Sandy/Simulator.pm view on Meta::CPAN
SEQUENCING_TYPE => 'paired-end',
SEQUENCING_SYSTEM => 'poisson',
SEED => time,
JOBS => 2,
FORMAT => 'fastq',
SEQ_SYS => 'poisson',
QUALITY_SIZE => 10,
GENOME => '.data.fa',
GENOME_SIZE => 2280,
PREFIX => 'ponga',
OUTPUT_SINGLE_END => 'ponga_R1_001.fastq',
OUTPUT_PAIRED_END => ['ponga_R1_001.fastq', 'ponga_R2_001.fastq'],
OUTPUT_COUNTS => 'ponga_coverage.tsv'
};
sub startup : Tests(startup) {
my $test = shift;
$test->SUPER::startup;
my $class = ref $test;
$class->mk_classdata('default_attr');
$class->mk_classdata('default_sg_single_end');
$class->mk_classdata('default_sg_paired_end');
t/lib/TestsFor/App/Sandy/Simulator.pm view on Meta::CPAN
my %default_attr = %{ $test->default_attr };
while (my ($attr, $value) = each %default_attr) {
can_ok $sg, $attr;
is $sg->$attr, $value, "The value for $attr shold be correct";
}
}
sub run_simulation : Tests(9) {
my $test = shift;
my $output_single_end = OUTPUT_SINGLE_END;
my $output_paired_end = OUTPUT_PAIRED_END;
my $output_counts = OUTPUT_COUNTS;
my $fastq_count = sub {
my $file = shift;
my $entries = 0;
my %chr_acm;
open my $fh, "<" => $file;
my $mark = 4;
my $acm = 0;
while (<$fh>) {
chomp;
RNGContext *
App::Sandy::RNG::new (unsigned long seed)
PREINIT:
RNGContext *ctx;
CODE:
Newx (ctx, 1, RNGContext);
ctx->rng = gsl_rng_alloc (gsl_rng_ranlxd2);
gsl_rng_set (ctx->rng, seed);
RETVAL = ctx;
OUTPUT:
RETVAL
void
DESTROY (RNGContext *ctx)
CODE:
Safefree (ctx->rng);
Safefree (ctx);
void
set (RNGContext *ctx, unsigned long seed)
CODE:
gsl_rng_set (ctx->rng, seed);
unsigned long
max (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_max (ctx->rng);
OUTPUT:
RETVAL
unsigned long
min (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_min (ctx->rng);
OUTPUT:
RETVAL
size_t
size (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_size (ctx->rng);
OUTPUT:
RETVAL
char *
name (RNGContext *ctx)
CODE:
RETVAL = (char *) gsl_rng_name (ctx->rng);
OUTPUT:
RETVAL
unsigned long
get (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_get (ctx->rng);
OUTPUT:
RETVAL
double
uniform (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_uniform (ctx->rng);
OUTPUT:
RETVAL
double
uniform_pos (RNGContext *ctx)
CODE:
RETVAL = gsl_rng_uniform_pos (ctx->rng);
OUTPUT:
RETVAL
double
ran_gaussian (RNGContext *ctx, double sigma)
CODE:
RETVAL = gsl_ran_gaussian (ctx->rng, sigma);
OUTPUT:
RETVAL
double
ran_gaussian_ratio_method (RNGContext *ctx, double sigma)
CODE:
RETVAL = gsl_ran_gaussian_ratio_method (ctx->rng, sigma);
OUTPUT:
RETVAL
double
ran_gaussian_pdf (double x, double sigma)
CODE:
RETVAL = gsl_ran_gaussian_pdf (x, sigma);
OUTPUT:
RETVAL
double
ran_ugaussian (RNGContext *ctx)
CODE:
RETVAL = gsl_ran_ugaussian (ctx->rng);
OUTPUT:
RETVAL
double
ran_ugaussian_ratio_method (RNGContext *ctx)
CODE:
RETVAL = gsl_ran_ugaussian_ratio_method (ctx->rng);
OUTPUT:
RETVAL
double
ran_ugaussian_pdf (double x)
CODE:
RETVAL = gsl_ran_ugaussian_pdf (x);
OUTPUT:
RETVAL
unsigned long
get_n (RNGContext *ctx, unsigned long n)
INIT:
if (n <= 0)
croak ("n must be greater than zero");
CODE:
RETVAL = gsl_rng_get (ctx->rng) % n;
OUTPUT:
RETVAL
double
get_norm (RNGContext *ctx, double mean, double stdd)
CODE:
RETVAL = mean + (int) (gsl_ran_gaussian (ctx->rng, stdd) + 0.5);
OUTPUT:
RETVAL
TYPEMAP
RNGContext* T_PTROBJ_RNG_CONTEXT
INPUT
T_PTROBJ_RNG_CONTEXT
$var = XS_STATE(RNGContext *, $arg);
OUTPUT
T_PTROBJ_RNG_CONTEXT
XS_STRUCT2OBJ($arg, "App::Sandy::RNG", $var);