Pheno-Ranker

 view release on metacpan or  search on metacpan

utils/bff_pxf_simulator/bff-pxf-simulator  view on Meta::CPAN

#use Data::Printer;
use Data::Dumper;
use Path::Tiny;
use List::Util 1.50 qw(head shuffle);
use JSON::XS;
use Data::Fake qw(Core Company Dates Names);
use FindBin    qw($Bin);
use lib $Bin;
use Ontologies
  qw($hpo_array $omim_array $rxnorm_array $ncit_procedures_array $ncit_exposures_array $ethnicity_array);

sub new {
    my ( $class, $self ) = @_;
    bless $self, $class;
    return $self;
}

sub run {
    my $self        = shift;
    my $number      = $self->{number};
    my $format      = $self->{format};
    my $output      = $self->{output};
    my $random_seed = $self->{random_seed};
    my %func        = (
        bff => \&bff_generator,
        pxf => \&pxf_generator
    );

    # Set seed if defined
    srand($random_seed) if defined $random_seed;    # user can set it to 0

    # Load external ontologies file if present
    $self->{ontologies_data} =
      $self->{ext_ontologies}
      ? validate_json( $self->{ext_ontologies} )
      : undef;                                      # setter

    #########
    # START #
    #########

    my $json_data;
    for ( my $i = 1 ; $i <= $number ; $i++ ) {
        push @$json_data, $func{$format}->( $i, $self );
    }

    #######
    # END #
    #######
    #p $json_data;

    # Serialize the data and write
    write_json( { filepath => $output, data => $json_data } );
}

sub write_json {
    my $arg       = shift;
    my $file      = $arg->{filepath};
    my $json_data = $arg->{data};

    # Note that canonical DOES not match the order of nsort from Sort::Naturally
    my $json = JSON::XS->new->utf8->canonical->pretty->encode($json_data);
    path($file)->spew_utf8($json);
    return 1;
}

sub pxf_generator {
    my ( $id, $self ) = @_;
    my $result_hash = run_functions($self);
    my $subject_id  = "IndividualId_$id";
    my $pxf         = fake_hash(
        {
            id      => "Phenopacket_" . $id,
            subject => {
                id  => $subject_id,
                age => {
                    iso8601duration =>
                      fake_template( "P%dY", fake_int_mod( 1, 99 ) )
                },
                sex => fake_pick_mod( [ 'MALE', 'FEMALE' ] )
            },
            diseases           => $result_hash->{diseases},
            phenotypicFeatures => $result_hash->{phenotypicFeatures},
            interpretations    =>
              pxf_interpretations( $result_hash->{diseases}, $subject_id ),
            medicalActions     => merge_medical_actions($result_hash)
        }
    );
    return $pxf->();
}

sub pxf_interpretations {
    my ( $diseases, $subject_id ) = @_;
    $diseases ||= [];
    my @status   = qw(SOLVED UNSOLVED);
    my @interpretations;

    for my $i ( 0 .. $#{$diseases} ) {
        my $disease = $diseases->[$i]{term};
        push @interpretations,
          {
            id             => 'Interpretation_' . ( $i + 1 ),
            progressStatus => $status[ $i % @status ],
            diagnosis      => {
                disease                => $disease,
                genomicInterpretations => [
                    {
                        subjectOrBiosampleId  => $subject_id,
                        interpretationStatus => 'CAUSATIVE',
                        geneDescriptor       => {
                            valueId => 'HGNC:' . ( 1000 + $i + 1 ),
                            symbol  => 'GENE' . ( $i + 1 )
                        }
                    }
                ]
            }
          };
    }

    return \@interpretations;
}



( run in 1.157 second using v1.01-cache-2.11-cpan-524268b4103 )