Convert-Pheno

 view release on metacpan or  search on metacpan

lib/Convert/Pheno/IO/CSVHandler.pm  view on Meta::CPAN


    # Returning hashref
    return \%hash;
}

sub get_headers {
    my $data = shift;

    # Ensure $data is an array reference, wrap it in an array if it's a hash reference.
    $data = [$data] unless ref $data eq 'ARRAY';

    # Step 1 & 2: Collect all unique keys from all hashes, ignoring hash references.
    my %all_keys;
    foreach my $row (@$data) {
        foreach my $key ( keys %$row ) {

            # Skip any key where the value is a reference (including hash references)
            # Why?
            # In pxf2csv I encountered HASH(foobarbaz) as header. This is actually
            # a deeper issue I have to investigate
            next if ref $row->{$key};
            $all_keys{$key} = ();
        }
    }

    # Step 3: Sort keys for consistency.
    my @headers = sort keys %all_keys;
    return \@headers;
}

sub remap_useHeaderAsTermLabel {
    my $hash = shift;
    for my $key (%$hash) {
        if ( exists $hash->{$key}{useHeaderAsTermLabel} ) {
            $hash->{$key}{useHeaderAsTermLabel_hash} =
              array_ref_to_hash( $hash->{$key}{useHeaderAsTermLabel} );
        }
    }
    return 1;
}

sub array_ref_to_hash {
    my $array_ref = shift;

    # Check if the input is an array reference
    die "Expected an array reference at <useHeaderAsTermLabel>"
      unless ref($array_ref) eq 'ARRAY';

    my %hash;

    # Iterate over the elements of the array reference
    foreach my $element ( @{$array_ref} ) {
        $hash{$element} = 1;
    }
    return \%hash;
}

sub convert_table_aoh_to_hoh {
    my ( $data, $table, $self ) = @_;

    my %table_cursor =
      map { $_ => $data->{$_} } qw(CONCEPT PERSON VISIT_OCCURRENCE);
    my %table_id = (
        CONCEPT          => 'concept_id',
        PERSON           => 'person_id',
        VISIT_OCCURRENCE => 'visit_occurrence_id'
    );
    my $array_ref = $table_cursor{$table};
    my $id        = $table_id{$table};

    ###########
    # CONCEPT #
    ###########

    # $VAR1 = [
    #          {
    #            'concept_class_id' => '4-char billing code',
    #            'concept_code' => 'K92.2',
    #            'concept_id' => 35208414,
    #            'concept_name' => 'Gastrointestinal hemorrhage, unspecified',
    #            'domain_id' => 'Condition',
    #            'invalid_reason' => undef,
    #            'standard_concept' => undef,
    #            'valid_end_date' => '2099-12-31',
    #            'valid_start_date' => '2007-01-01',
    #            'vocabulary_id' => 'ICD10CM'
    #          },
    #
    # and we convert it to hash to allow for quick searches by 'concept_id'
    #
    # $VAR1 = {
    #          '1107830' => {
    #                         'concept_class_id' => 'Ingredient',
    #                         'concept_code' => 28889,
    #                         'concept_id' => 1107830,
    #                         'concept_name' => 'Loratadine',
    #                         'domain_id' => 'Drug',
    #                         'invalid_reason' => undef,
    #                         'standard_concept' => 'S',
    #                         'valid_end_date' => '2099-12-31',
    #                         'valid_start_date' => '1970-01-01',
    #                         'vocabulary_id' => 'RxNorm'
    #                         },
    #
    # NB: We store all columns yet we'll use 4:
    # 'concept_id', 'concept_code', 'concept_name', 'vocabulary_id'

    ####################
    # VISIT_OCCURRENCE #
    ####################

    # Going from
    #$VAR1 = [
    #        {
    #          'admitting_source_concept_id' => 0,
    #          'visit_occurrence_id' => 85,
    #          ...
    #        }
    #      ];

    # To
    #$VAR1 = {
    #        '85' => {
    #                  'admitting_source_concept_id' => 0,
    #                  'visit_occurrence_id' => 85,
    #                  ...
    #                }
    #      };



( run in 0.650 second using v1.01-cache-2.11-cpan-39bf76dae61 )