Convert-Pheno
view release on metacpan or search on metacpan
lib/Convert/Pheno/Mapping/BFF/Individuals/Tabular.pm view on Meta::CPAN
sub source_value {
my ( $arg, $field ) = @_;
return exists $arg->{record}
? $arg->{record}->value($field)
: $arg->{participant}{$field};
}
sub raw_value {
my ( $arg, $field ) = @_;
return exists $arg->{record}
? $arg->{record}->raw_value($field)
: $arg->{participant}{$field};
}
sub field_note {
my ( $arg, $field ) = @_;
return exists $arg->{record}
? $arg->{record}->field_note($field)
: undef;
}
sub field_meta {
my ( $arg, $field ) = @_;
return exists $arg->{record}
? $arg->{record}->field_meta($field)
: undef;
}
sub source_columns_snapshot {
my ($arg) = @_;
return exists $arg->{record}
? $arg->{record}->columns_snapshot
: { %{ $arg->{participant} } };
}
sub remap_mapping_hash_term {
my ( $mapping_file_data, $term ) = @_;
my %hash_out = map {
$_ => exists $mapping_file_data->{$term}{$_}
? $mapping_file_data->{$term}{$_}
: undef
} (
qw/fields useHeaderAsTermLabel useHeaderAsTermLabel_hash fieldTermLabels valueTermLabels targetFields fieldRules terminology unit age drugDose drugUnit duration durationUnit dateOfProcedure bodySite ageOfOnset ageAtProcedure familyHistory vis...
);
$hash_out{ontology} =
exists $mapping_file_data->{$term}{ontology}
? $mapping_file_data->{$term}{ontology}
: $mapping_file_data->{project}{ontology};
$hash_out{routeOfAdministration} =
$mapping_file_data->{$term}{routeOfAdministration}
if $term eq 'treatments';
return \%hash_out;
}
sub resolve_field_query {
my ( $term_mapping_cursor, $field ) = @_;
return
exists $term_mapping_cursor->{terminology}{$field}
? $term_mapping_cursor->{terminology}{$field}
: exists $term_mapping_cursor->{fieldTermLabels}{$field}
? $term_mapping_cursor->{fieldTermLabels}{$field}
: $field;
}
sub resolve_value_query {
my ( $term_mapping_cursor, $value ) = @_;
return
exists $term_mapping_cursor->{terminology}{$value}
? $term_mapping_cursor->{terminology}{$value}
: exists $term_mapping_cursor->{valueTermLabels}{$value}
? $term_mapping_cursor->{valueTermLabels}{$value}
: $value;
}
sub resolve_term_query {
my ( $term_mapping_cursor, $field, $participant_field ) = @_;
return defined $term_mapping_cursor->{useHeaderAsTermLabel_hash}{$field}
? resolve_field_query( $term_mapping_cursor, $field )
: resolve_value_query( $term_mapping_cursor, $participant_field );
}
sub get_required_terms {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
return ( $data_mapping_file->{sex}{fields},
$data_mapping_file->{id}{targetFields}{primaryKey} );
}
sub propagate_fields {
my ( $id_field, $arg ) = @_;
my $participant = $arg->{participant};
my $self = $arg->{self};
my $data_mapping_file = $arg->{data_mapping_file};
my @propagate_fields =
@{ $data_mapping_file->{project}{baselineFieldsToPropagate} };
for my $field (@propagate_fields) {
$self->{baselineFieldsToPropagate}{ $participant->{$id_field} }{$field}
= $participant->{$field}
if defined $participant->{$field};
$participant->{$field} =
$self->{baselineFieldsToPropagate}{ $participant->{$id_field} }
{$field};
}
return 1;
}
sub map_diseases {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'diseases' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
my $disease;
$disease->{ageOfOnset} =
exists $term_mapping_cursor->{ageOfOnset}{$field}
? map_age_range(
source_value( $arg, $term_mapping_cursor->{ageOfOnset}{$field} ) )
: $DEFAULT->{age};
my $disease_query =
resolve_term_query( $term_mapping_cursor, $field,
source_value( $arg, $field ) );
next unless defined $disease_query;
$disease->{diseaseCode} = map_ontology_term(
{
query => $disease_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
if ( exists $term_mapping_cursor->{familyHistory}{$field}
&& defined
raw_value( $arg, $term_mapping_cursor->{familyHistory}{$field} ) )
{
$disease->{familyHistory} = convert2boolean(
source_value( $arg, $term_mapping_cursor->{familyHistory}{$field} )
);
}
_add_visit( $disease, $arg );
$disease->{severity} = $DEFAULT->{ontology_term};
$disease->{stage} = $DEFAULT->{ontology_term};
push @{ $individual->{diseases} }, $disease;
}
return 1;
}
sub map_ethnicity {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $ethnicity_field = $data_mapping_file->{ethnicity}{fields};
if ( defined raw_value( $arg, $ethnicity_field ) ) {
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'ethnicity' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
my $ethnicity_query =
resolve_term_query( $term_mapping_cursor, $ethnicity_field,
source_value( $arg, $ethnicity_field ) );
$individual->{ethnicity} = map_ontology_term(
{
query => $ethnicity_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
}
return 1;
}
sub map_exposures {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'exposures' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
next
if ( source_value( $arg, $field ) eq 'No'
|| source_value( $arg, $field ) eq 'False' );
my $exposure;
my $subkey_ageAtExposure =
( exists $term_mapping_cursor->{fieldRules}{$field}
&& defined $term_mapping_cursor->{fieldRules}{$field} )
? $term_mapping_cursor->{fieldRules}{$field}{ageAtExposure}
: undef;
$exposure->{ageAtExposure} =
defined $subkey_ageAtExposure
? map_age_range( source_value( $arg, $subkey_ageAtExposure ) )
: $DEFAULT->{age};
for my $item (qw/date duration/) {
$exposure->{$item} =
exists $term_mapping_cursor->{targetFields}{$item}
? source_value( $arg, $term_mapping_cursor->{targetFields}{$item} )
: $DEFAULT->{$item};
}
# Exposure codes come from the field/header concept (for example
# smoking -> Smoking), while fieldRules below map the recorded value
# (for example Never smoked -> Never Smoker).
my $exposure_query =
resolve_field_query( $term_mapping_cursor, $field );
$exposure->{exposureCode} = map_ontology_term(
{
query => $exposure_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
$exposure->{_info} = $field;
my $subkey =
( lc( $data_mapping_file->{project}{source} ) eq 'redcap'
&& exists $term_mapping_cursor->{fieldRules}{$field} )
? $field
: undef;
my $unit_query = defined $subkey
? $term_mapping_cursor->{fieldRules}{$field}{ source_value( $arg, $subkey ) }
: source_value( $arg, $field );
my $unit = map_ontology_term(
{
query => $unit_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
$exposure->{unit} = $unit;
$exposure->{value} =
looks_like_number( source_value( $arg, $field ) )
? source_value( $arg, $field )
: -1;
_add_visit( $exposure, $arg );
push @{ $individual->{exposures} }, $exposure
if defined $exposure->{exposureCode};
}
return 1;
}
sub map_info {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $source = $arg->{source};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'info' );
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
$individual->{info}{$field} = source_value( $arg, $field );
my $meta = field_meta( $arg, $field );
if ( defined $meta && exists $meta->{'Field Label'} ) {
$individual->{info}{objects}{ $field . '_obj' } = {
value => dotify_and_coerce_number( source_value( $arg, $field ) ),
map { $_ => $meta->{$_} } @redcap_field_types
};
}
}
if ( exists $term_mapping_cursor->{targetFields}{age} ) {
my $age_range = map_age_range(
source_value( $arg, $term_mapping_cursor->{targetFields}{age} ) );
$individual->{info}{ageRange} = $age_range->{ageRange};
}
unless ( $self->{test} ) {
$individual->{info}{convertPheno} = $self->{convertPheno};
}
$individual->{info}{project}{$_} = $data_mapping_file->{project}{$_}
for (qw/id source ontology version description/);
my $output = $source eq 'redcap' ? 'REDCap' : 'CSV';
my $tmp_str = $output . '_columns';
$individual->{info}{$tmp_str} = source_columns_snapshot($arg);
return 1;
}
sub map_interventionsOrProcedures {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $source = $arg->{source};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file,
'interventionsOrProcedures' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
my $intervention;
$intervention->{ageAtProcedure} =
exists $term_mapping_cursor->{ageAtProcedure}{$field}
? map_age_range(
source_value( $arg, $term_mapping_cursor->{ageAtProcedure}{$field} ) )
: $DEFAULT->{age};
$intervention->{bodySite} =
exists $term_mapping_cursor->{bodySite}{$field}
? map_ontology_term(
{
query => $term_mapping_cursor->{bodySite}{$field},
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
)
: $DEFAULT->{ontology_term};
$intervention->{dateOfProcedure} =
exists $term_mapping_cursor->{dateOfProcedure}{$field}
? convert_date_to_iso8601(
source_value( $arg, $term_mapping_cursor->{dateOfProcedure}{$field} ) )
: $DEFAULT->{date};
$intervention->{_info} = $field;
my $subkey =
exists $term_mapping_cursor->{fieldRules}{$field} ? $field : undef;
my $intervention_query =
defined $subkey
? $term_mapping_cursor->{fieldRules}{$subkey}{ source_value( $arg, $field ) }
: resolve_term_query(
$term_mapping_cursor, $field, source_value( $arg, $field ) );
$intervention->{procedureCode} = map_ontology_term(
{
query => $intervention_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
_add_visit( $intervention, $arg );
push @{ $individual->{interventionsOrProcedures} }, $intervention
if defined $intervention->{procedureCode};
}
return 1;
}
sub map_measures {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $source = $arg->{source};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'measures' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
my $measure;
$measure->{assayCode} = map_ontology_term(
{
query =>
resolve_field_query( $term_mapping_cursor, $field ),
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self,
}
);
$measure->{date} = $DEFAULT->{date};
my ( $tmp_unit, $unit_cursor );
my $measure_value = raw_value( $arg, $field );
if ( lc($source) eq 'redcap' ) {
$tmp_unit = field_note( $arg, $field );
if ( $measure_value =~ m/ \- / ) {
my ( $tmp_val, $tmp_scale ) = split / \- /,
$measure_value;
$measure_value = $tmp_val;
$tmp_unit = $tmp_scale;
}
}
else {
$unit_cursor = $term_mapping_cursor->{unit}{$field};
$tmp_unit =
exists $unit_cursor->{label} ? $unit_cursor->{label} : undef;
}
my $unit = map_ontology_term(
{
query =>
resolve_value_query( $term_mapping_cursor, $tmp_unit ),
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
my $reference_range =
lc($source) eq 'csv' && exists $unit_cursor->{referenceRange}
? map_reference_range_csv( $unit, $unit_cursor->{referenceRange} )
: map_reference_range(
{
unit => $unit,
redcap_dict => $arg->{redcap_dict},
field => $field,
source => $source
}
);
$measure->{measurementValue} = {
quantity => {
unit => $unit,
value => dotify_and_coerce_number($measure_value),
referenceRange => $reference_range
}
};
if ( lc($source) eq 'redcap' ) {
my $meta = field_meta( $arg, $field ) || {};
$measure->{notes} = join ' /// ', $field,
( map { qq/$_=$meta->{$_}/ } @redcap_field_types );
}
$measure->{procedure} = {
procedureCode => map_ontology_term(
{
query => exists $unit_cursor->{procedureCodeLabel}
? $unit_cursor->{procedureCodeLabel}
: $field eq 'calprotectin' ? 'Feces'
: $field =~ m/^nancy/ ? 'Histologic'
: 'Blood Test Result',
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
)
};
_add_visit( $measure, $arg );
push @{ $individual->{measures} }, $measure
if defined $measure->{assayCode};
}
return 1;
}
sub map_pedigrees {
return 1;
}
sub map_phenotypicFeatures {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $source = $arg->{source};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'phenotypicFeatures' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
my $phenotypicFeature;
next
unless ( defined raw_value( $arg, $field )
&& raw_value( $arg, $field ) ne '' );
$phenotypicFeature->{excluded_ori} =
dotify_and_coerce_number( raw_value( $arg, $field ) );
my $is_boolean = 0;
if ( looks_like_number( raw_value( $arg, $field ) ) ) {
$phenotypicFeature->{excluded} =
raw_value( $arg, $field ) ? JSON::XS::false : JSON::XS::true;
$is_boolean++;
}
else {
$phenotypicFeature->{excluded} = JSON::XS::false;
}
my $subkey =
exists $term_mapping_cursor->{fieldRules}{$field} ? $field : undef;
my $participant_field = $is_boolean ? $field : source_value( $arg, $field );
my $phenotypicFeature_query =
defined $subkey
? $term_mapping_cursor->{fieldRules}{$subkey}{$participant_field}
: resolve_term_query(
$term_mapping_cursor, $field, source_value( $arg, $field ) );
$phenotypicFeature->{featureType} = map_ontology_term(
{
query => $phenotypicFeature_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
$field =~ s/___\w+$// if $field =~ m/___\w+$/;
if ( lc($source) eq 'redcap' ) {
my $meta = field_meta( $arg, $field ) || {};
$phenotypicFeature->{notes} = join ' /// ',
( $field, map { qq/$_=$meta->{$_}/ } @redcap_field_types );
}
_add_visit( $phenotypicFeature, $arg );
push @{ $individual->{phenotypicFeatures} }, $phenotypicFeature
if defined $phenotypicFeature->{featureType};
}
return 1;
}
sub map_sex {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $project_ontology = $arg->{project_ontology};
my $sex_field = $data_mapping_file->{sex}{fields};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'sex' );
my $sex_query =
resolve_term_query(
$term_mapping_cursor, $sex_field, source_value( $arg, $sex_field ) );
$individual->{sex} = map_ontology_term(
{
query => $sex_query,
column => 'label',
ontology => $project_ontology,
self => $self
}
);
return 1;
}
sub map_treatments {
my $arg = shift;
my $data_mapping_file = $arg->{data_mapping_file};
my $participant = $arg->{participant};
my $self = $arg->{self};
my $individual = $arg->{individual};
my $term_mapping_cursor =
remap_mapping_hash_term( $data_mapping_file, 'treatments' );
$arg->{term_mapping_cursor} = $term_mapping_cursor;
for my $field ( @{ $term_mapping_cursor->{fields} } ) {
next unless defined raw_value( $arg, $field );
my $treatment;
my $treatment_name =
resolve_term_query(
$term_mapping_cursor, $field, source_value( $arg, $field ) );
$treatment->{ageAtOnset} = $DEFAULT->{age};
$treatment->{doseIntervals} = [];
my $dose_interval;
my $duration =
exists $term_mapping_cursor->{duration}{$field}
? $term_mapping_cursor->{duration}{$field}
: undef;
my $drug_dose =
exists $term_mapping_cursor->{drugDose}{$field}
? $term_mapping_cursor->{drugDose}{$field}
: undef;
my $duration_unit =
exists $term_mapping_cursor->{durationUnit}{$field}
? map_ontology_term(
{
query => $term_mapping_cursor->{durationUnit}{$field},
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
)
: $DEFAULT->{ontology_term};
if ( defined $duration ) {
my $duration_value =
defined source_value( $arg, $duration )
? dotify_and_coerce_number( source_value( $arg, $duration ) )
: -1;
my $drug_dose_value =
defined $drug_dose && defined source_value( $arg, $drug_dose )
? dotify_and_coerce_number( source_value( $arg, $drug_dose ) )
: -1;
$treatment->{cumulativeDose} = {
unit => $duration_unit,
value => $duration_value
};
my $drug_unit =
exists $term_mapping_cursor->{drugUnit}{$field}
? map_ontology_term(
{
query => $term_mapping_cursor->{drugUnit}{$field},
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
)
: $DEFAULT->{ontology_term};
$dose_interval->{interval} = $DEFAULT->{interval};
# Duration and amount are modeled separately. Keep the duration in
# cumulativeDose, and emit the actual administered amount from the
# dedicated dose field when the mapping provides one.
$dose_interval->{quantity}{value} = $drug_dose_value;
$dose_interval->{quantity}{unit} = $drug_unit;
$dose_interval->{quantity}{referenceRange} =
$DEFAULT->{referenceRange};
$dose_interval->{scheduleFrequency} = $DEFAULT->{ontology_term};
push @{ $treatment->{doseIntervals} }, $dose_interval;
}
my $route =
exists $term_mapping_cursor->{routeOfAdministration}
{ source_value( $arg, $field ) }
? $term_mapping_cursor->{routeOfAdministration}
{ source_value( $arg, $field ) }
: 'oral';
my $route_query = ucfirst($route) . ' Route of Administration';
$treatment->{_info} = {
field => $field,
value => source_value( $arg, $field ),
drug_name => $treatment_name,
route => $route
};
$treatment->{routeOfAdministration} = map_ontology_term(
{
query => $route_query,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
$treatment->{treatmentCode} = map_ontology_term(
{
query => $treatment_name,
column => 'label',
ontology => $term_mapping_cursor->{ontology},
self => $self
}
);
_add_visit( $treatment, $arg );
push @{ $individual->{treatments} }, $treatment
if defined $treatment->{treatmentCode};
}
return 1;
}
sub _add_visit {
my ( $item, $p ) = @_;
my $cursor = $p->{term_mapping_cursor}
or return;
my $vf = $cursor->{visitId}
or return;
my $visit_val = $p->{participant}{$vf};
$item->{_visit}{id} = dotify_and_coerce_number($visit_val);
my $pid = $p->{participant_id} // q{};
my $composite = join '.', grep { length } $pid, $visit_val;
my $self = $p->{self};
$item->{_visit}{composite} = $composite;
# Tabular imports synthesize visit ids from source labels. A cached
# surrogate integer is enough for referential integrity and much cheaper
# than reversible BigInt encoding.
$item->{_visit}{occurrence_id} = allocate_surrogate_integer(
$self,
'bff_visit_occurrence_id',
$composite
);
}
1;
( run in 1.834 second using v1.01-cache-2.11-cpan-39bf76dae61 )