Perl500503Syntax-OrDie

 view release on metacpan or  search on metacpan

t/corpus/JSON-LINQ/lib/JSON/LINQ.pm  view on Meta::CPAN

        $key = '' unless defined $key;
        push @{$lookup{$key}}, $value;
    });

    return { %lookup };
}

# DefaultIfEmpty - return default value if empty
sub DefaultIfEmpty {
    my($self, $default_value) = @_;
    my $has_default_arg = @_ > 1;
    if (!$has_default_arg) {
        $default_value = undef;
    }

    my $class = ref($self);
    my $iter = $self->iterator;
    my $has_elements = 0;
    my $returned_default = 0;

    return $class->new(sub {
        my $item = $iter->();
        if (defined $item) {
            $has_elements = 1;
            return $item;
        }

        if (!$has_elements && !$returned_default) {
            $returned_default = 1;
            return $default_value;
        }

        return undef;
    });
}

# ToJSON - write sequence as a JSON array file
# Each element is encoded as JSON; the result is a JSON array.
sub ToJSON {
    my($self, $file) = @_;

    my $fhn = _open_fh('>', $file, 1);

    { no strict 'refs'; print {*{$fhn}} "[\n" }
    my $first = 1;
    $self->ForEach(sub {
        my $record = shift;
        no strict 'refs';
        print {*{$fhn}} ",\n" unless $first;
        $first = 0;
        print {*{$fhn}} _json_encode($record);
    });
    { no strict 'refs'; print {*{$fhn}} "\n]\n" }

    { no strict 'refs'; close($fhn) }
    return 1;
}

# ToJSONL - write sequence as a JSONL (JSON Lines) file
# Each element is encoded as one line of JSON.
# This is streaming-friendly and memory-efficient.
sub ToJSONL {
    my($self, $file) = @_;

    my $fhn = _open_fh('>', $file, 1);

    $self->ForEach(sub {
        my $record = shift;
        no strict 'refs';
        print {*{$fhn}} _json_encode($record), "\n";
    });

    { no strict 'refs'; close($fhn) }
    return 1;
}

# ToLTSV - write sequence as an LTSV (Labeled Tab-Separated Values) file.
# Each element must be a HASH reference.
# Tab/CR/LF in values are sanitized to a single space to keep the file
# structurally valid.  This method is provided so a JSON::LINQ pipeline
# can emit LTSV output without requiring LTSV::LINQ.
#
# Options (key => value pairs after $filename):
#   label_order => \@labels   emit only these labels in this order;
#                             labels not present in the record are skipped.
#   headers     => \@labels   alias for label_order.
#
# Without label_order/headers, all keys are emitted alphabetically.
sub ToLTSV {
    my($self, $file, %opt) = @_;

    # Resolve label_order / headers alias
    my $label_order = $opt{label_order} || $opt{headers} || undef;

    my $fhn = _open_fh('>', $file, 1);

    $self->ForEach(sub {
        my $record = shift;
        # LTSV spec: tab is the field separator; newline terminates the record.
        # Sanitize values to prevent structural corruption of the output file.
        my @keys = $label_order
            ? grep { exists $record->{$_} } @$label_order
            : sort keys %$record;
        my $line = join("\t", map {
            my $v = defined($record->{$_}) ? $record->{$_} : '';
            $v =~ s/[\t\n\r]/ /g;
            "$_:$v"
        } @keys);
        no strict 'refs';
        print {*{$fhn}} $line, "\n";
    });

    { no strict 'refs'; close($fhn) }
    return 1;
}

###############################################################################
# CSV Output
###############################################################################

# ToCSV - write the sequence as a CSV file.

t/corpus/JSON-LINQ/lib/JSON/LINQ.pm  view on Meta::CPAN

      ->Join($depts,
          sub { $_[0]{dept_id} },
          sub { $_[0]{id}      },
          sub { { name => $_[0]{name}, dept => $_[1]{name} } })
      ->ToArray();

  # JOIN an LTSV file (main) with a JSON file (sub-table)
  my $prices = JSON::LINQ->FromJSON("prices.json");
  my @priced = JSON::LINQ->FromLTSV("orders.ltsv")
      ->Join($prices,
          sub { $_[0]{sku} },
          sub { $_[0]{sku} },
          sub { { order_id => $_[0]{id},
                  amount   => $_[0]{qty} * $_[1]{price} } })
      ->ToArray();

  # Boolean values
  my $rec = { active => JSON::LINQ::true, count => 0 };
  JSON::LINQ->From([$rec])->ToJSON("output.json");
  # ToJSON encodes as: {"active":true,"count":0}

=head1 TABLE OF CONTENTS

=over 4

=item * L</DESCRIPTION>

=item * L</INCLUDED DOCUMENTATION> -- eg/ samples and doc/ cheat sheets

=item * L</METHODS> -- Complete method reference (67 methods)

=item * L</EXAMPLES> -- Practical examples

=item * L</FEATURES> -- Lazy evaluation, method chaining, DSL

=item * L</ARCHITECTURE> -- Iterator design, execution flow

=item * L</COMPATIBILITY> -- Perl 5.005+ support, pure Perl

=item * L</DIAGNOSTICS> -- Error messages

=item * L</LIMITATIONS AND KNOWN ISSUES>

=item * L</BUGS>

=item * L</SEE ALSO>

=back

=head1 DESCRIPTION

JSON::LINQ provides a LINQ-style query interface for JSON, JSONL
(JSON Lines), and LTSV (Labeled Tab-Separated Values) files. It is
the JSON counterpart of L<LTSV::LINQ>, sharing the same LINQ API and
adding JSON-specific I/O methods.

Key features:

=over 4

=item * B<Lazy evaluation> - O(1) memory for JSONL and LTSV streaming;
JSON arrays are loaded once then iterated lazily

=item * B<Method chaining> - Fluent, readable query composition

=item * B<DSL syntax> - Simple key-value filtering

=item * B<67 LINQ methods> - including JSON I/O (FromJSON, FromJSONL,
FromJSONString, ToJSON, ToJSONL), LTSV I/O (FromLTSV, ToLTSV),
CSV I/O (FromCSV, ToCSV), and all 60 methods from L<LTSV::LINQ>

=item * B<Pure Perl> - No XS dependencies

=item * B<Perl 5.005_03+> - Works on ancient and modern Perl

=item * B<Built-in JSON parser> - No CPAN JSON module required

=back

=head2 Supported Data Sources

=over 4

=item * B<FromJSON($file)> - JSON file containing a top-level array or object

=item * B<FromJSONL($file)> - JSONL file (one JSON value per line)

=item * B<FromJSONString($json)> - JSON string (array or object)

=item * B<FromLTSV($file)> - LTSV file (Labeled Tab-Separated Values)

=item * B<FromCSV($file)> - CSV file (Comma-Separated Values; also TSV via sep option)

=item * B<From(\@array)> - In-memory Perl array

=item * B<Range($start, $count)> - Integer sequence

=item * B<Empty()> - Empty sequence

=item * B<Repeat($element, $count)> - Repeated element

=back

=head2 What is JSONL?

JSONL (JSON Lines, also known as ndjson - newline-delimited JSON) is a
text format where each line is a valid JSON value (typically an object).
It is particularly suited for log files and streaming data because:

=over 4

=item * One record per line enables streaming with O(1) memory usage

=item * Compatible with standard Unix tools (grep, sed, awk)

=item * Easily appendable without rewriting the whole file

=item * Each line is independently parseable

=back

B<Format example:>

  {"time":"2026-04-20T10:00:00","host":"192.0.2.1","status":200,"url":"/"}
  {"time":"2026-04-20T10:00:01","host":"192.0.2.2","status":404,"url":"/missing"}

C<FromJSONL> reads these files lazily (one line at a time), matching the
memory efficiency of C<LTSV::LINQ>'s C<FromLTSV>.

=head2 What is LINQ?

LINQ (Language Integrated Query) is the Microsoft .NET query API.
This module brings the same LINQ interface to JSON data in Perl.
See L<LTSV::LINQ> for a detailed description of the LINQ design philosophy.

=head1 INCLUDED DOCUMENTATION

The C<eg/> directory contains sample programs:

  eg/01_json_query.pl       FromJSON/Where/Select/OrderByDescending/Distinct/ToLookup
  eg/02_jsonl_query.pl      FromJSONL streaming, GroupBy, aggregation, ToJSONL
  eg/03_grouping.pl         GroupBy, ToLookup, GroupJoin, SelectMany, Join
  eg/04_sorting.pl          OrderBy/ThenBy multi-key sort, OrderByNum vs OrderByStr
  eg/05_json_ltsv_join.pl   JOIN main JSON x sub-table LTSV
  eg/06_ltsv_json_join.pl   JOIN main LTSV x sub-table JSON
  eg/07_csv_query.pl        FromCSV/Where/Select/GroupBy/OrderByNum/ToCSV
  eg/08_csv_json_join.pl    JOIN main CSV x sub-table JSON, CSV to JSON conversion

The C<doc/> directory contains JSON::LINQ cheat sheets in 21 languages:

  doc/json_linq_cheatsheet.EN.txt   English
  doc/json_linq_cheatsheet.JA.txt   Japanese
  doc/json_linq_cheatsheet.ZH.txt   Chinese (Simplified)
  doc/json_linq_cheatsheet.TW.txt   Chinese (Traditional)
  doc/json_linq_cheatsheet.KO.txt   Korean
  doc/json_linq_cheatsheet.FR.txt   French
  doc/json_linq_cheatsheet.ID.txt   Indonesian
  doc/json_linq_cheatsheet.VI.txt   Vietnamese
  doc/json_linq_cheatsheet.TH.txt   Thai
  doc/json_linq_cheatsheet.HI.txt   Hindi
  doc/json_linq_cheatsheet.BN.txt   Bengali
  doc/json_linq_cheatsheet.TR.txt   Turkish
  doc/json_linq_cheatsheet.MY.txt   Burmese
  doc/json_linq_cheatsheet.TL.txt   Filipino
  doc/json_linq_cheatsheet.KM.txt   Khmer
  doc/json_linq_cheatsheet.MN.txt   Mongolian
  doc/json_linq_cheatsheet.NE.txt   Nepali
  doc/json_linq_cheatsheet.SI.txt   Sinhala
  doc/json_linq_cheatsheet.UR.txt   Urdu
  doc/json_linq_cheatsheet.UZ.txt   Uzbek
  doc/json_linq_cheatsheet.BM.txt   Malay

=head1 METHODS

=head2 Complete Method Reference

This module implements 67 LINQ methods organized into 15 categories.
In addition, C<true> and C<false> boolean accessor functions are provided.

=over 4

=item * B<Data Sources (9)>: From, FromJSON, FromJSONL, FromJSONString, FromLTSV, FromCSV, Range, Empty, Repeat

=item * B<Filtering (1)>: Where (with DSL)

=item * B<Projection (2)>: Select, SelectMany

=item * B<Concatenation (2)>: Concat, Zip

=item * B<Partitioning (4)>: Take, Skip, TakeWhile, SkipWhile

=item * B<Ordering (13)>: OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending, OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending, ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending

=item * B<Grouping (1)>: GroupBy

=item * B<Set Operations (4)>: Distinct, Union, Intersect, Except

=item * B<Join (2)>: Join, GroupJoin

=item * B<Quantifiers (3)>: All, Any, Contains

=item * B<Comparison (1)>: SequenceEqual

=item * B<Element Access (8)>: First, FirstOrDefault, Last, LastOrDefault, Single, SingleOrDefault, ElementAt, ElementAtOrDefault

=item * B<Aggregation (7)>: Count, Sum, Min, Max, Average, AverageOrDefault, Aggregate

=item * B<Conversion (9)>: ToArray, ToList, ToDictionary, ToLookup, ToJSON, ToJSONL, ToLTSV, ToCSV, DefaultIfEmpty

=item * B<Utility (1)>: ForEach

=back

=head2 JSON-Specific Data Source Methods

=over 4

=item B<FromJSON($filename)>

Read a JSON file containing a top-level array of values. Each element of
the array becomes one item in the sequence.

  my $q = JSON::LINQ->FromJSON("users.json");

If the file contains a single JSON object (not an array), it is treated
as a one-element sequence.

B<File format:>

  [
    {"name": "Alice", "age": 30},
    {"name": "Bob",   "age": 25}
  ]

The entire file is read into memory and parsed once. For large files,
consider JSONL format with C<FromJSONL> for streaming access.

B<Concurrent use (e.g. Join/GroupJoin):> On Perl 5.006 and later,
each call to C<FromJSON> uses a distinct numbered filehandle slot, so
multiple iterators may be open simultaneously without interference.
On Perl 5.005_03, a unique numbered package glob is used per call
(JSON::LINQ::FH::H1, JSON::LINQ::FH::H2, ...) to achieve the same safety.

=item B<FromJSONL($filename)>

Read a JSONL (JSON Lines) file. Each non-empty line is parsed as a
separate JSON value. Empty lines and lines starting with C<#> are skipped.

  my $q = JSON::LINQ->FromJSONL("events.jsonl");

B<File format:>

  {"event":"login","user":"alice","ts":1713600000}
  {"event":"purchase","user":"alice","ts":1713600060,"amount":29.99}
  {"event":"logout","user":"alice","ts":1713600120}

C<FromJSONL> reads lazily (one line at a time), providing O(1) memory
usage for arbitrarily large files.

Invalid JSON lines produce a warning and are skipped rather than
aborting the entire sequence.

B<Concurrent use (e.g. Join/GroupJoin):> On Perl 5.006 and later,
each call to C<FromJSONL> uses a distinct numbered filehandle slot, so
multiple iterators may be open simultaneously without interference.
On Perl 5.005_03, a unique numbered package glob is used per call
(JSON::LINQ::FH::H1, JSON::LINQ::FH::H2, ...) to achieve the same safety.

=item B<FromJSONString($json)>

Create a query from a JSON string. Accepts a JSON array (each element
becomes one sequence item) or a JSON object (single-element sequence).

  my $q = JSON::LINQ->FromJSONString('[{"id":1},{"id":2}]');
  my $q = JSON::LINQ->FromJSONString('{"id":1,"name":"Alice"}');

=back

=head2 LTSV Interoperability

To make it easy to JOIN JSON data with LTSV master/lookup tables (or vice
versa) without requiring L<LTSV::LINQ> to be installed, JSON::LINQ ships
with built-in LTSV I/O methods. The LTSV format is described at
L<https://ltsv.org/>.

=over 4

=item B<FromLTSV($filename)>

Read an LTSV (Labeled Tab-Separated Values) file. Each line is split on
TAB, and each field is split on the first colon to produce a label/value
pair. The result is a sequence of hash references.

  my $q = JSON::LINQ->FromLTSV("departments.ltsv");

B<File format:>

t/corpus/JSON-LINQ/lib/JSON/LINQ.pm  view on Meta::CPAN

=item B<ToCSV($filename)>

=item B<ToCSV($filename, sep =E<gt> $char)>

=item B<ToCSV($filename, headers =E<gt> \@cols)>

=item B<ToCSV($filename, label_order =E<gt> \@cols)>

=item B<ToCSV($filename, no_header =E<gt> 1)>

Write the sequence as a CSV file.

B<Options:>

=over 4

=item C<sep> - Field separator character (default: C<','>).

=item C<headers> - Array reference of column names that controls which keys
are written and in what order. Also serves as the header row.

=item C<label_order> - Alias for C<headers>.

=item C<no_header> - If true, suppress the header row entirely.

=back

  $query->ToCSV("output.csv");
  $query->ToCSV("output.tsv", sep => "\t");
  $query->ToCSV("output.csv", headers => [qw(name age city)]);

When C<headers>/C<label_order> is not supplied and elements are HASH
references, column names are taken from the first record's keys in
alphabetical order.

=back

=head2 JSON-Specific Conversion Methods

=over 4

=item B<ToJSON($filename)>

Write the sequence as a JSON file containing a JSON array. Each element
is encoded as JSON. The output is a valid JSON array.

  $query->ToJSON("output.json");

B<Output format:>

  [
  {"age":30,"name":"Alice"},
  {"age":25,"name":"Bob"}
  ]

Hash keys are sorted alphabetically for deterministic output.

=item B<ToJSONL($filename)>

Write the sequence as a JSONL file. Each element is written as one line
of JSON. This is the streaming counterpart of C<ToJSON>.

  $query->ToJSONL("output.jsonl");

B<Output format:>

  {"age":30,"name":"Alice"}
  {"age":25,"name":"Bob"}

=back

=head2 Boolean Values

JSON::LINQ provides boolean singleton objects compatible with JSON encoding:

  JSON::LINQ::true   # stringifies as "true",  numifies as 1
  JSON::LINQ::false  # stringifies as "false", numifies as 0

Use these when creating data structures that will be serialised to JSON:

  my $rec = { active => JSON::LINQ::true, count => 0 };
  # ToJSON encodes as: {"active":true,"count":0}

When C<FromJSON> or C<FromJSONL> decode a JSON C<true> or C<false>,
the result is a C<JSON::LINQ::Boolean> object that behaves as 1 or 0
in numeric and boolean context.

=head2 All Other Methods

All other LINQ methods are inherited from L<LTSV::LINQ> and behave
identically. Please refer to L<LTSV::LINQ> for complete documentation of:

Where, Select, SelectMany, Concat, Zip, Take, Skip, TakeWhile,
SkipWhile, OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending,
OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending,
ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending, GroupBy,
Distinct, Union, Intersect, Except, Join, GroupJoin, All, Any, Contains,
SequenceEqual, First, FirstOrDefault, Last, LastOrDefault, Single,
SingleOrDefault, ElementAt, ElementAtOrDefault, Count, Sum, Min, Max,
Average, AverageOrDefault, Aggregate, ToArray, ToList, ToDictionary,
ToLookup, DefaultIfEmpty, ForEach.

=head1 EXAMPLES

=head2 Basic JSON File Query

  use JSON::LINQ;

  # users.json: [{"name":"Alice","age":30}, {"name":"Bob","age":25}, ...]
  my @adults = JSON::LINQ->FromJSON("users.json")
      ->Where(sub { $_[0]{age} >= 18 })
      ->OrderBy(sub { $_[0]{name} })
      ->ToArray();

=head2 JSONL Streaming

  # events.jsonl: one JSON object per line
  my $error_count = JSON::LINQ->FromJSONL("events.jsonl")
      ->Count(sub { $_[0]{level} eq 'ERROR' });

  JSON::LINQ->FromJSONL("events.jsonl")

t/corpus/JSON-LINQ/lib/JSON/LINQ.pm  view on Meta::CPAN


  JSON::LINQ->FromCSV("data.csv")
      ->Select(sub {
          my $r = shift;
          return { %$r, processed => JSON::LINQ::true };
      })
      ->ToJSON("data.json");

=head2 In-Memory Array Query

  my @data = (
      {name => 'Alice', score => 95},
      {name => 'Bob',   score => 72},
      {name => 'Carol', score => 88},
  );

  my @top = JSON::LINQ->From(\@data)
      ->Where(sub { $_[0]{score} >= 80 })
      ->OrderByDescending(sub { $_[0]{score} })
      ->ToArray();

=head1 FEATURES

=head2 Lazy Evaluation

C<FromJSONL> reads one line at a time. Combined with C<Where> and C<Take>,
only the needed records are ever in memory simultaneously.

C<FromJSON> reads the whole file once but then iterates the array lazily.

=head2 Built-in JSON Parser

JSON::LINQ contains its own JSON encoder/decoder (derived from mb::JSON 0.06).
No CPAN JSON module is required. The parser handles:

=over 4

=item * UTF-8 multibyte strings (output as-is, not \uXXXX-escaped)

=item * C<\uXXXX> escape sequences on input (converted to UTF-8)

=item * All JSON types: object, array, string, number, true, false, null

=item * Nested structures of arbitrary depth

=back

=head1 ARCHITECTURE

=head2 Relationship to LTSV::LINQ

JSON::LINQ and LTSV::LINQ are parallel modules sharing the same LINQ API.

  LTSV::LINQ  - LINQ for LTSV (Labeled Tab-Separated Values) files
  JSON::LINQ  - LINQ for JSON and JSONL files

Both share the same LINQ API. JSON::LINQ adds the following I/O methods
on top of LTSV::LINQ's interface:

  FromJSON($file)         - read JSON array file
  FromJSONL($file)        - read JSONL file (streaming)
  FromJSONString($json)   - read JSON string
  FromLTSV($file)         - read LTSV file (streaming)
  FromCSV($file)          - read CSV file (streaming, RFC 4180)
  ToJSON($file)           - write JSON array file
  ToJSONL($file)          - write JSONL file
  ToLTSV($file)           - write LTSV file (streaming)
  ToCSV($file)            - write CSV file

C<FromLTSV>, C<ToLTSV>, C<FromCSV>, and C<ToCSV> are provided so a
JSON::LINQ pipeline can JOIN against (or emit into) LTSV and CSV files
without requiring LTSV::LINQ or CSV::LINQ to be installed.

The internal iterator architecture is identical: each operator returns a
new query object wrapping a closure.

=head2 Memory Characteristics

  FromJSONL  - O(1) per record: one line at a time
  FromJSON   - O(n): entire file loaded once, then lazy iteration
  FromLTSV   - O(1) per record: one line at a time
  FromCSV    - O(1) per record: one line at a time
  ToJSON     - O(n): entire sequence collected for array output
  ToJSONL    - O(1) per record: streaming write
  ToLTSV     - O(1) per record: streaming write
  ToCSV      - O(n): entire sequence collected before writing header

=head1 COMPATIBILITY

=head2 Perl Version Support

Compatible with B<Perl 5.00503 and later>. See L<LTSV::LINQ> for the
full compatibility rationale (Universal Consensus 1998 / Perl 5.005_03).

=head2 Pure Perl Implementation

No XS dependencies. No CPAN module dependencies. Works on any Perl
installation with only the standard core.

=head2 JSON Limitations

The built-in parser has the same limitations as mb::JSON 0.06:

=over 4

=item * Surrogate pairs (C<\uD800>-C<\uDFFF>) are not supported

=item * Circular references in encoding cause infinite recursion

=item * Non-ARRAY/HASH references are stringified

=back

=head2 Iterator Protocol and JSON null

The internal iterator protocol uses C<undef> to signal end-of-sequence.
As a consequence, an C<undef> value (i.e. a decoded JSON C<null>) cannot
appear as a I<top-level element> of a sequence: it would be
indistinguishable from EOF and the sequence would be silently truncated
at that point.

This affects C<Select> in particular: a selector that returns C<undef>
for some elements will terminate the sequence early.

  # JSON: [{"v":1},{"v":null},{"v":3}]
  JSON::LINQ->FromJSON("data.json")
            ->Select(sub { $_[0]{v} })
            ->ToArray;
  # returns (1) - sequence stops at the undef from the second record

C<Where> is unaffected when filtering hash records (the hashref itself
is the element, not its C<v> field), but a C<Select> that projects a
nullable field will be truncated at the first C<null>. Workarounds:

=over 4

=item * Project to a sentinel value: C<< Select(sub { defined $_[0]{v} ? $_[0]{v} : '' }) >>

=item * Wrap each element in a hashref so the element itself is never undef.

=back

C<DefaultIfEmpty(undef)> is similarly affected: a default of C<undef>
is silently lost. Use a non-undef sentinel (C<0>, C<''>, C<{}>) instead.



( run in 0.968 second using v1.01-cache-2.11-cpan-140bd7fdf52 )