Perl500503Syntax-OrDie
view release on metacpan or search on metacpan
t/corpus/JSON-LINQ/lib/JSON/LINQ.pm view on Meta::CPAN
$key = '' unless defined $key;
push @{$lookup{$key}}, $value;
});
return { %lookup };
}
# DefaultIfEmpty - return default value if empty
sub DefaultIfEmpty {
my($self, $default_value) = @_;
my $has_default_arg = @_ > 1;
if (!$has_default_arg) {
$default_value = undef;
}
my $class = ref($self);
my $iter = $self->iterator;
my $has_elements = 0;
my $returned_default = 0;
return $class->new(sub {
my $item = $iter->();
if (defined $item) {
$has_elements = 1;
return $item;
}
if (!$has_elements && !$returned_default) {
$returned_default = 1;
return $default_value;
}
return undef;
});
}
# ToJSON - write sequence as a JSON array file
# Each element is encoded as JSON; the result is a JSON array.
sub ToJSON {
my($self, $file) = @_;
my $fhn = _open_fh('>', $file, 1);
{ no strict 'refs'; print {*{$fhn}} "[\n" }
my $first = 1;
$self->ForEach(sub {
my $record = shift;
no strict 'refs';
print {*{$fhn}} ",\n" unless $first;
$first = 0;
print {*{$fhn}} _json_encode($record);
});
{ no strict 'refs'; print {*{$fhn}} "\n]\n" }
{ no strict 'refs'; close($fhn) }
return 1;
}
# ToJSONL - write sequence as a JSONL (JSON Lines) file
# Each element is encoded as one line of JSON.
# This is streaming-friendly and memory-efficient.
sub ToJSONL {
my($self, $file) = @_;
my $fhn = _open_fh('>', $file, 1);
$self->ForEach(sub {
my $record = shift;
no strict 'refs';
print {*{$fhn}} _json_encode($record), "\n";
});
{ no strict 'refs'; close($fhn) }
return 1;
}
# ToLTSV - write sequence as an LTSV (Labeled Tab-Separated Values) file.
# Each element must be a HASH reference.
# Tab/CR/LF in values are sanitized to a single space to keep the file
# structurally valid. This method is provided so a JSON::LINQ pipeline
# can emit LTSV output without requiring LTSV::LINQ.
#
# Options (key => value pairs after $filename):
# label_order => \@labels emit only these labels in this order;
# labels not present in the record are skipped.
# headers => \@labels alias for label_order.
#
# Without label_order/headers, all keys are emitted alphabetically.
sub ToLTSV {
my($self, $file, %opt) = @_;
# Resolve label_order / headers alias
my $label_order = $opt{label_order} || $opt{headers} || undef;
my $fhn = _open_fh('>', $file, 1);
$self->ForEach(sub {
my $record = shift;
# LTSV spec: tab is the field separator; newline terminates the record.
# Sanitize values to prevent structural corruption of the output file.
my @keys = $label_order
? grep { exists $record->{$_} } @$label_order
: sort keys %$record;
my $line = join("\t", map {
my $v = defined($record->{$_}) ? $record->{$_} : '';
$v =~ s/[\t\n\r]/ /g;
"$_:$v"
} @keys);
no strict 'refs';
print {*{$fhn}} $line, "\n";
});
{ no strict 'refs'; close($fhn) }
return 1;
}
###############################################################################
# CSV Output
###############################################################################
# ToCSV - write the sequence as a CSV file.
t/corpus/JSON-LINQ/lib/JSON/LINQ.pm view on Meta::CPAN
->Join($depts,
sub { $_[0]{dept_id} },
sub { $_[0]{id} },
sub { { name => $_[0]{name}, dept => $_[1]{name} } })
->ToArray();
# JOIN an LTSV file (main) with a JSON file (sub-table)
my $prices = JSON::LINQ->FromJSON("prices.json");
my @priced = JSON::LINQ->FromLTSV("orders.ltsv")
->Join($prices,
sub { $_[0]{sku} },
sub { $_[0]{sku} },
sub { { order_id => $_[0]{id},
amount => $_[0]{qty} * $_[1]{price} } })
->ToArray();
# Boolean values
my $rec = { active => JSON::LINQ::true, count => 0 };
JSON::LINQ->From([$rec])->ToJSON("output.json");
# ToJSON encodes as: {"active":true,"count":0}
=head1 TABLE OF CONTENTS
=over 4
=item * L</DESCRIPTION>
=item * L</INCLUDED DOCUMENTATION> -- eg/ samples and doc/ cheat sheets
=item * L</METHODS> -- Complete method reference (67 methods)
=item * L</EXAMPLES> -- Practical examples
=item * L</FEATURES> -- Lazy evaluation, method chaining, DSL
=item * L</ARCHITECTURE> -- Iterator design, execution flow
=item * L</COMPATIBILITY> -- Perl 5.005+ support, pure Perl
=item * L</DIAGNOSTICS> -- Error messages
=item * L</LIMITATIONS AND KNOWN ISSUES>
=item * L</BUGS>
=item * L</SEE ALSO>
=back
=head1 DESCRIPTION
JSON::LINQ provides a LINQ-style query interface for JSON, JSONL
(JSON Lines), and LTSV (Labeled Tab-Separated Values) files. It is
the JSON counterpart of L<LTSV::LINQ>, sharing the same LINQ API and
adding JSON-specific I/O methods.
Key features:
=over 4
=item * B<Lazy evaluation> - O(1) memory for JSONL and LTSV streaming;
JSON arrays are loaded once then iterated lazily
=item * B<Method chaining> - Fluent, readable query composition
=item * B<DSL syntax> - Simple key-value filtering
=item * B<67 LINQ methods> - including JSON I/O (FromJSON, FromJSONL,
FromJSONString, ToJSON, ToJSONL), LTSV I/O (FromLTSV, ToLTSV),
CSV I/O (FromCSV, ToCSV), and all 60 methods from L<LTSV::LINQ>
=item * B<Pure Perl> - No XS dependencies
=item * B<Perl 5.005_03+> - Works on ancient and modern Perl
=item * B<Built-in JSON parser> - No CPAN JSON module required
=back
=head2 Supported Data Sources
=over 4
=item * B<FromJSON($file)> - JSON file containing a top-level array or object
=item * B<FromJSONL($file)> - JSONL file (one JSON value per line)
=item * B<FromJSONString($json)> - JSON string (array or object)
=item * B<FromLTSV($file)> - LTSV file (Labeled Tab-Separated Values)
=item * B<FromCSV($file)> - CSV file (Comma-Separated Values; also TSV via sep option)
=item * B<From(\@array)> - In-memory Perl array
=item * B<Range($start, $count)> - Integer sequence
=item * B<Empty()> - Empty sequence
=item * B<Repeat($element, $count)> - Repeated element
=back
=head2 What is JSONL?
JSONL (JSON Lines, also known as ndjson - newline-delimited JSON) is a
text format where each line is a valid JSON value (typically an object).
It is particularly suited for log files and streaming data because:
=over 4
=item * One record per line enables streaming with O(1) memory usage
=item * Compatible with standard Unix tools (grep, sed, awk)
=item * Easily appendable without rewriting the whole file
=item * Each line is independently parseable
=back
B<Format example:>
{"time":"2026-04-20T10:00:00","host":"192.0.2.1","status":200,"url":"/"}
{"time":"2026-04-20T10:00:01","host":"192.0.2.2","status":404,"url":"/missing"}
C<FromJSONL> reads these files lazily (one line at a time), matching the
memory efficiency of C<LTSV::LINQ>'s C<FromLTSV>.
=head2 What is LINQ?
LINQ (Language Integrated Query) is the Microsoft .NET query API.
This module brings the same LINQ interface to JSON data in Perl.
See L<LTSV::LINQ> for a detailed description of the LINQ design philosophy.
=head1 INCLUDED DOCUMENTATION
The C<eg/> directory contains sample programs:
eg/01_json_query.pl FromJSON/Where/Select/OrderByDescending/Distinct/ToLookup
eg/02_jsonl_query.pl FromJSONL streaming, GroupBy, aggregation, ToJSONL
eg/03_grouping.pl GroupBy, ToLookup, GroupJoin, SelectMany, Join
eg/04_sorting.pl OrderBy/ThenBy multi-key sort, OrderByNum vs OrderByStr
eg/05_json_ltsv_join.pl JOIN main JSON x sub-table LTSV
eg/06_ltsv_json_join.pl JOIN main LTSV x sub-table JSON
eg/07_csv_query.pl FromCSV/Where/Select/GroupBy/OrderByNum/ToCSV
eg/08_csv_json_join.pl JOIN main CSV x sub-table JSON, CSV to JSON conversion
The C<doc/> directory contains JSON::LINQ cheat sheets in 21 languages:
doc/json_linq_cheatsheet.EN.txt English
doc/json_linq_cheatsheet.JA.txt Japanese
doc/json_linq_cheatsheet.ZH.txt Chinese (Simplified)
doc/json_linq_cheatsheet.TW.txt Chinese (Traditional)
doc/json_linq_cheatsheet.KO.txt Korean
doc/json_linq_cheatsheet.FR.txt French
doc/json_linq_cheatsheet.ID.txt Indonesian
doc/json_linq_cheatsheet.VI.txt Vietnamese
doc/json_linq_cheatsheet.TH.txt Thai
doc/json_linq_cheatsheet.HI.txt Hindi
doc/json_linq_cheatsheet.BN.txt Bengali
doc/json_linq_cheatsheet.TR.txt Turkish
doc/json_linq_cheatsheet.MY.txt Burmese
doc/json_linq_cheatsheet.TL.txt Filipino
doc/json_linq_cheatsheet.KM.txt Khmer
doc/json_linq_cheatsheet.MN.txt Mongolian
doc/json_linq_cheatsheet.NE.txt Nepali
doc/json_linq_cheatsheet.SI.txt Sinhala
doc/json_linq_cheatsheet.UR.txt Urdu
doc/json_linq_cheatsheet.UZ.txt Uzbek
doc/json_linq_cheatsheet.BM.txt Malay
=head1 METHODS
=head2 Complete Method Reference
This module implements 67 LINQ methods organized into 15 categories.
In addition, C<true> and C<false> boolean accessor functions are provided.
=over 4
=item * B<Data Sources (9)>: From, FromJSON, FromJSONL, FromJSONString, FromLTSV, FromCSV, Range, Empty, Repeat
=item * B<Filtering (1)>: Where (with DSL)
=item * B<Projection (2)>: Select, SelectMany
=item * B<Concatenation (2)>: Concat, Zip
=item * B<Partitioning (4)>: Take, Skip, TakeWhile, SkipWhile
=item * B<Ordering (13)>: OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending, OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending, ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending
=item * B<Grouping (1)>: GroupBy
=item * B<Set Operations (4)>: Distinct, Union, Intersect, Except
=item * B<Join (2)>: Join, GroupJoin
=item * B<Quantifiers (3)>: All, Any, Contains
=item * B<Comparison (1)>: SequenceEqual
=item * B<Element Access (8)>: First, FirstOrDefault, Last, LastOrDefault, Single, SingleOrDefault, ElementAt, ElementAtOrDefault
=item * B<Aggregation (7)>: Count, Sum, Min, Max, Average, AverageOrDefault, Aggregate
=item * B<Conversion (9)>: ToArray, ToList, ToDictionary, ToLookup, ToJSON, ToJSONL, ToLTSV, ToCSV, DefaultIfEmpty
=item * B<Utility (1)>: ForEach
=back
=head2 JSON-Specific Data Source Methods
=over 4
=item B<FromJSON($filename)>
Read a JSON file containing a top-level array of values. Each element of
the array becomes one item in the sequence.
my $q = JSON::LINQ->FromJSON("users.json");
If the file contains a single JSON object (not an array), it is treated
as a one-element sequence.
B<File format:>
[
{"name": "Alice", "age": 30},
{"name": "Bob", "age": 25}
]
The entire file is read into memory and parsed once. For large files,
consider JSONL format with C<FromJSONL> for streaming access.
B<Concurrent use (e.g. Join/GroupJoin):> On Perl 5.006 and later,
each call to C<FromJSON> uses a distinct numbered filehandle slot, so
multiple iterators may be open simultaneously without interference.
On Perl 5.005_03, a unique numbered package glob is used per call
(JSON::LINQ::FH::H1, JSON::LINQ::FH::H2, ...) to achieve the same safety.
=item B<FromJSONL($filename)>
Read a JSONL (JSON Lines) file. Each non-empty line is parsed as a
separate JSON value. Empty lines and lines starting with C<#> are skipped.
my $q = JSON::LINQ->FromJSONL("events.jsonl");
B<File format:>
{"event":"login","user":"alice","ts":1713600000}
{"event":"purchase","user":"alice","ts":1713600060,"amount":29.99}
{"event":"logout","user":"alice","ts":1713600120}
C<FromJSONL> reads lazily (one line at a time), providing O(1) memory
usage for arbitrarily large files.
Invalid JSON lines produce a warning and are skipped rather than
aborting the entire sequence.
B<Concurrent use (e.g. Join/GroupJoin):> On Perl 5.006 and later,
each call to C<FromJSONL> uses a distinct numbered filehandle slot, so
multiple iterators may be open simultaneously without interference.
On Perl 5.005_03, a unique numbered package glob is used per call
(JSON::LINQ::FH::H1, JSON::LINQ::FH::H2, ...) to achieve the same safety.
=item B<FromJSONString($json)>
Create a query from a JSON string. Accepts a JSON array (each element
becomes one sequence item) or a JSON object (single-element sequence).
my $q = JSON::LINQ->FromJSONString('[{"id":1},{"id":2}]');
my $q = JSON::LINQ->FromJSONString('{"id":1,"name":"Alice"}');
=back
=head2 LTSV Interoperability
To make it easy to JOIN JSON data with LTSV master/lookup tables (or vice
versa) without requiring L<LTSV::LINQ> to be installed, JSON::LINQ ships
with built-in LTSV I/O methods. The LTSV format is described at
L<https://ltsv.org/>.
=over 4
=item B<FromLTSV($filename)>
Read an LTSV (Labeled Tab-Separated Values) file. Each line is split on
TAB, and each field is split on the first colon to produce a label/value
pair. The result is a sequence of hash references.
my $q = JSON::LINQ->FromLTSV("departments.ltsv");
B<File format:>
t/corpus/JSON-LINQ/lib/JSON/LINQ.pm view on Meta::CPAN
=item B<ToCSV($filename)>
=item B<ToCSV($filename, sep =E<gt> $char)>
=item B<ToCSV($filename, headers =E<gt> \@cols)>
=item B<ToCSV($filename, label_order =E<gt> \@cols)>
=item B<ToCSV($filename, no_header =E<gt> 1)>
Write the sequence as a CSV file.
B<Options:>
=over 4
=item C<sep> - Field separator character (default: C<','>).
=item C<headers> - Array reference of column names that controls which keys
are written and in what order. Also serves as the header row.
=item C<label_order> - Alias for C<headers>.
=item C<no_header> - If true, suppress the header row entirely.
=back
$query->ToCSV("output.csv");
$query->ToCSV("output.tsv", sep => "\t");
$query->ToCSV("output.csv", headers => [qw(name age city)]);
When C<headers>/C<label_order> is not supplied and elements are HASH
references, column names are taken from the first record's keys in
alphabetical order.
=back
=head2 JSON-Specific Conversion Methods
=over 4
=item B<ToJSON($filename)>
Write the sequence as a JSON file containing a JSON array. Each element
is encoded as JSON. The output is a valid JSON array.
$query->ToJSON("output.json");
B<Output format:>
[
{"age":30,"name":"Alice"},
{"age":25,"name":"Bob"}
]
Hash keys are sorted alphabetically for deterministic output.
=item B<ToJSONL($filename)>
Write the sequence as a JSONL file. Each element is written as one line
of JSON. This is the streaming counterpart of C<ToJSON>.
$query->ToJSONL("output.jsonl");
B<Output format:>
{"age":30,"name":"Alice"}
{"age":25,"name":"Bob"}
=back
=head2 Boolean Values
JSON::LINQ provides boolean singleton objects compatible with JSON encoding:
JSON::LINQ::true # stringifies as "true", numifies as 1
JSON::LINQ::false # stringifies as "false", numifies as 0
Use these when creating data structures that will be serialised to JSON:
my $rec = { active => JSON::LINQ::true, count => 0 };
# ToJSON encodes as: {"active":true,"count":0}
When C<FromJSON> or C<FromJSONL> decode a JSON C<true> or C<false>,
the result is a C<JSON::LINQ::Boolean> object that behaves as 1 or 0
in numeric and boolean context.
=head2 All Other Methods
All other LINQ methods are inherited from L<LTSV::LINQ> and behave
identically. Please refer to L<LTSV::LINQ> for complete documentation of:
Where, Select, SelectMany, Concat, Zip, Take, Skip, TakeWhile,
SkipWhile, OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending,
OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending,
ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending, GroupBy,
Distinct, Union, Intersect, Except, Join, GroupJoin, All, Any, Contains,
SequenceEqual, First, FirstOrDefault, Last, LastOrDefault, Single,
SingleOrDefault, ElementAt, ElementAtOrDefault, Count, Sum, Min, Max,
Average, AverageOrDefault, Aggregate, ToArray, ToList, ToDictionary,
ToLookup, DefaultIfEmpty, ForEach.
=head1 EXAMPLES
=head2 Basic JSON File Query
use JSON::LINQ;
# users.json: [{"name":"Alice","age":30}, {"name":"Bob","age":25}, ...]
my @adults = JSON::LINQ->FromJSON("users.json")
->Where(sub { $_[0]{age} >= 18 })
->OrderBy(sub { $_[0]{name} })
->ToArray();
=head2 JSONL Streaming
# events.jsonl: one JSON object per line
my $error_count = JSON::LINQ->FromJSONL("events.jsonl")
->Count(sub { $_[0]{level} eq 'ERROR' });
JSON::LINQ->FromJSONL("events.jsonl")
t/corpus/JSON-LINQ/lib/JSON/LINQ.pm view on Meta::CPAN
JSON::LINQ->FromCSV("data.csv")
->Select(sub {
my $r = shift;
return { %$r, processed => JSON::LINQ::true };
})
->ToJSON("data.json");
=head2 In-Memory Array Query
my @data = (
{name => 'Alice', score => 95},
{name => 'Bob', score => 72},
{name => 'Carol', score => 88},
);
my @top = JSON::LINQ->From(\@data)
->Where(sub { $_[0]{score} >= 80 })
->OrderByDescending(sub { $_[0]{score} })
->ToArray();
=head1 FEATURES
=head2 Lazy Evaluation
C<FromJSONL> reads one line at a time. Combined with C<Where> and C<Take>,
only the needed records are ever in memory simultaneously.
C<FromJSON> reads the whole file once but then iterates the array lazily.
=head2 Built-in JSON Parser
JSON::LINQ contains its own JSON encoder/decoder (derived from mb::JSON 0.06).
No CPAN JSON module is required. The parser handles:
=over 4
=item * UTF-8 multibyte strings (output as-is, not \uXXXX-escaped)
=item * C<\uXXXX> escape sequences on input (converted to UTF-8)
=item * All JSON types: object, array, string, number, true, false, null
=item * Nested structures of arbitrary depth
=back
=head1 ARCHITECTURE
=head2 Relationship to LTSV::LINQ
JSON::LINQ and LTSV::LINQ are parallel modules sharing the same LINQ API.
LTSV::LINQ - LINQ for LTSV (Labeled Tab-Separated Values) files
JSON::LINQ - LINQ for JSON and JSONL files
Both share the same LINQ API. JSON::LINQ adds the following I/O methods
on top of LTSV::LINQ's interface:
FromJSON($file) - read JSON array file
FromJSONL($file) - read JSONL file (streaming)
FromJSONString($json) - read JSON string
FromLTSV($file) - read LTSV file (streaming)
FromCSV($file) - read CSV file (streaming, RFC 4180)
ToJSON($file) - write JSON array file
ToJSONL($file) - write JSONL file
ToLTSV($file) - write LTSV file (streaming)
ToCSV($file) - write CSV file
C<FromLTSV>, C<ToLTSV>, C<FromCSV>, and C<ToCSV> are provided so a
JSON::LINQ pipeline can JOIN against (or emit into) LTSV and CSV files
without requiring LTSV::LINQ or CSV::LINQ to be installed.
The internal iterator architecture is identical: each operator returns a
new query object wrapping a closure.
=head2 Memory Characteristics
FromJSONL - O(1) per record: one line at a time
FromJSON - O(n): entire file loaded once, then lazy iteration
FromLTSV - O(1) per record: one line at a time
FromCSV - O(1) per record: one line at a time
ToJSON - O(n): entire sequence collected for array output
ToJSONL - O(1) per record: streaming write
ToLTSV - O(1) per record: streaming write
ToCSV - O(n): entire sequence collected before writing header
=head1 COMPATIBILITY
=head2 Perl Version Support
Compatible with B<Perl 5.00503 and later>. See L<LTSV::LINQ> for the
full compatibility rationale (Universal Consensus 1998 / Perl 5.005_03).
=head2 Pure Perl Implementation
No XS dependencies. No CPAN module dependencies. Works on any Perl
installation with only the standard core.
=head2 JSON Limitations
The built-in parser has the same limitations as mb::JSON 0.06:
=over 4
=item * Surrogate pairs (C<\uD800>-C<\uDFFF>) are not supported
=item * Circular references in encoding cause infinite recursion
=item * Non-ARRAY/HASH references are stringified
=back
=head2 Iterator Protocol and JSON null
The internal iterator protocol uses C<undef> to signal end-of-sequence.
As a consequence, an C<undef> value (i.e. a decoded JSON C<null>) cannot
appear as a I<top-level element> of a sequence: it would be
indistinguishable from EOF and the sequence would be silently truncated
at that point.
This affects C<Select> in particular: a selector that returns C<undef>
for some elements will terminate the sequence early.
# JSON: [{"v":1},{"v":null},{"v":3}]
JSON::LINQ->FromJSON("data.json")
->Select(sub { $_[0]{v} })
->ToArray;
# returns (1) - sequence stops at the undef from the second record
C<Where> is unaffected when filtering hash records (the hashref itself
is the element, not its C<v> field), but a C<Select> that projects a
nullable field will be truncated at the first C<null>. Workarounds:
=over 4
=item * Project to a sentinel value: C<< Select(sub { defined $_[0]{v} ? $_[0]{v} : '' }) >>
=item * Wrap each element in a hashref so the element itself is never undef.
=back
C<DefaultIfEmpty(undef)> is similarly affected: a default of C<undef>
is silently lost. Use a non-undef sentinel (C<0>, C<''>, C<{}>) instead.
( run in 0.968 second using v1.01-cache-2.11-cpan-140bd7fdf52 )