Apophis
view release on metacpan or search on metacpan
MANIFEST
PLAN.md
ppport.h
README
t/00-load.t
t/01-identify.t
t/02-namespace.t
t/03-store-fetch.t
t/04-sharding.t
t/05-exists-remove.t
t/06-verify.t
t/07-streaming.t
t/08-bulk.t
t/09-metadata.t
t/10-edge-cases.t
t/11-custom-ops.t
t/manifest.t
t/pod-coverage.t
t/pod.t
META.yml Module YAML meta-data (added by MakeMaker)
META.json Module JSON meta-data (added by MakeMaker)
+-----------+--------------+
|
+-----------+--------------+
| Apophis.xs |
| ALL logic in C/XS: |
| - new() constructor |
| - identify / identify_ |
| file (streaming) |
| - store / fetch / |
| exists / remove |
| - path_for / verify |
| - store_many / |
| find_missing |
| - metadata (JSON in C) |
+-----------+--------------+
|
+-----------+--------------+
| Horus (C headers) |
| horus_uuid_v5() |
| horus_sha1_*() streaming |
| horus_parse_uuid() |
lib/
Apophis.pm # XSLoader + POD only
Apophis.xs # ALL logic: OO interface, file I/O, hashing, storage
t/
00-load.t
01-identify.t # determinism, format, different content
02-namespace.t # namespace isolation
03-store-fetch.t # round-trip, dedup, atomic write
04-sharding.t # path_for correctness
05-exists-remove.t # existence check, removal + .meta cleanup
06-verify.t # integrity verification
07-streaming.t # identify_file matches identify for same content
08-bulk.t # store_many, find_missing
09-metadata.t # metadata write/read
10-edge-cases.t # empty content, binary with nulls, unicode
```
## XS Internal C Functions
### `apophis_parse_ns_uuid(ns_hex) -> 16 bytes`
Uses `horus_parse_uuid()` to convert a 36-char namespace UUID string back to 16 raw bytes.
### `$obj->exists($id, %opts)`
Returns true if sharded path exists.
### `$obj->remove($id, %opts)`
Unlinks content + `.meta` sidecar.
### `$obj->path_for($id, %opts)`
Returns the 2-level sharded filesystem path.
### `$obj->verify($id, %opts)`
Re-identifies stored content, compares UUID. Returns true if match.
### `$obj->store_many(\@refs, %opts)`
Maps store over array. Returns list of UUIDs.
### `$obj->find_missing(\@ids, %opts)`
Returns list of IDs not in store.
### `$obj->namespace()`
Returns the namespace UUID string.
- path_for(), apophis_mkdir_p(), apophis_store_file()
- store(), fetch(), exists(), remove()
- Tests: t/03-store-fetch.t, t/04-sharding.t, t/05-exists-remove.t
### Phase 4: Streaming
- apophis_identify_stream() with PerlIO_read in 64KB chunks
- identify_file()
- Tests: t/07-streaming.t
### Phase 5: Integrity + Bulk
- verify(), store_many(), find_missing()
- Tests: t/06-verify.t, t/08-bulk.t
### Phase 6: Metadata
- apophis_meta_write(), apophis_meta_read()
- Integration into store/remove
- Tests: t/09-metadata.t
### Phase 7: Polish
- Edge case tests, POD, MANIFEST
- Tests: t/10-edge-cases.t
lib/Apophis.pm view on Meta::CPAN
my $id = $ca->store(\$content);
# Retrieve
my $data = $ca->fetch($id);
# Check / remove
if ($ca->exists($id)) { ... }
$ca->remove($id);
# Integrity verification
my $ok = $ca->verify($id); # re-hash and compare
# Sharded path
my $path = $ca->path_for($id);
# /var/store/a3/bb/a3bb189e-8bf9-5f18-b3f6-1b2f5f5c1e3a
# Bulk operations
my @ids = $ca->store_many(\@content_refs);
my @missing = $ca->find_missing(\@ids);
# Metadata
lib/Apophis.pm view on Meta::CPAN
Removes the content and its metadata sidecar (if any) from the store.
=head2 path_for
my $path = $ca->path_for($id);
Returns the 2-level sharded filesystem path for the given UUID:
a3bb189e-8bf9-... â /store/a3/bb/a3bb189e-8bf9-...
=head2 verify
my $ok = $ca->verify($id);
Re-reads the stored content, re-identifies it, and compares the UUID.
Returns true if the content is intact.
=head2 store_many
my @ids = $ca->store_many(\@content_refs);
Stores multiple content items. Returns a list of UUIDs.
lib/Apophis.xs view on Meta::CPAN
/* ================================================================== */
/* Custom Ops - bypass method dispatch for hot-path operations */
/* ================================================================== */
/* Forward declarations */
static OP *pp_apophis_identify(pTHX);
static OP *pp_apophis_store(pTHX);
static OP *pp_apophis_exists(pTHX);
static OP *pp_apophis_fetch(pTHX);
static OP *pp_apophis_verify(pTHX);
static OP *pp_apophis_remove(pTHX);
/* XOP structs for debug names (5.14+ only) */
#if PERL_VERSION >= 14
static XOP apophis_xop_identify;
static XOP apophis_xop_store;
static XOP apophis_xop_exists;
static XOP apophis_xop_fetch;
static XOP apophis_xop_verify;
static XOP apophis_xop_remove;
#endif
/*
* pp_apophis_identify - Custom op: content â UUID v5 string
*
* Stack input: self_sv, content_ref_sv
* Stack output: uuid_string_sv
*
* Fuses: namespace extraction + SHA-1 + v5 stamp + format
lib/Apophis.xs view on Meta::CPAN
SvCUR_set(content, (STRLEN)nread);
*SvEND(content) = '\0';
PUSHs(sv_2mortal(newRV_noinc(content)));
}
PUTBACK;
return NORMAL;
}
/*
* pp_apophis_verify - Custom op: fused re-read + re-hash + compare
*
* Stack input: self_sv, id_sv
* Stack output: bool_sv
*
* Fuses: path computation + open + streaming SHA-1 + format + memcmp.
*/
static OP *
pp_apophis_verify(pTHX) {
dSP;
SV *id_sv = POPs;
SV *self_sv = POPs;
HV *hv;
const unsigned char *ns;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
unsigned char uuid[16];
char recomputed[HORUS_FMT_STR_LEN + 1];
if (!sv_isobject(self_sv))
croak("Apophis: pp_verify: not an object");
hv = (HV *)SvRV(self_sv);
ns = apophis_get_ns(aTHX_ hv);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id_sv, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
EXTEND(SP, 1);
lib/Apophis.xs view on Meta::CPAN
XopENTRY_set(&apophis_xop_exists, xop_name, "apophis_exists");
XopENTRY_set(&apophis_xop_exists, xop_desc, "Apophis fused existence check (path + stat)");
XopENTRY_set(&apophis_xop_exists, xop_class, OA_BASEOP);
Perl_custom_op_register(aTHX_ pp_apophis_exists, &apophis_xop_exists);
XopENTRY_set(&apophis_xop_fetch, xop_name, "apophis_fetch");
XopENTRY_set(&apophis_xop_fetch, xop_desc, "Apophis fused fetch (path + stat + read)");
XopENTRY_set(&apophis_xop_fetch, xop_class, OA_BASEOP);
Perl_custom_op_register(aTHX_ pp_apophis_fetch, &apophis_xop_fetch);
XopENTRY_set(&apophis_xop_verify, xop_name, "apophis_verify");
XopENTRY_set(&apophis_xop_verify, xop_desc, "Apophis fused verify (read + re-hash + compare)");
XopENTRY_set(&apophis_xop_verify, xop_class, OA_BASEOP);
Perl_custom_op_register(aTHX_ pp_apophis_verify, &apophis_xop_verify);
XopENTRY_set(&apophis_xop_remove, xop_name, "apophis_remove");
XopENTRY_set(&apophis_xop_remove, xop_desc, "Apophis fused remove (path + unlink + meta cleanup)");
XopENTRY_set(&apophis_xop_remove, xop_class, OA_BASEOP);
Perl_custom_op_register(aTHX_ pp_apophis_remove, &apophis_xop_remove);
#endif
# ------------------------------------------------------------------ #
# new(class, %args) -> blessed object #
# ------------------------------------------------------------------ #
lib/Apophis.xs view on Meta::CPAN
/* Also remove metadata sidecar if it exists */
apophis_build_meta_path(meta_path, sizeof(meta_path),
path, path_len);
unlink(meta_path); /* ignore error â may not exist */
RETVAL = removed ? TRUE : FALSE;
OUTPUT:
RETVAL
# ------------------------------------------------------------------ #
# verify($id, %opts) -> bool #
# ------------------------------------------------------------------ #
bool
verify(self, id, ...)
SV *self
SV *id
PREINIT:
HV *hv;
HV *opts = NULL;
const unsigned char *ns;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
unsigned char uuid[16];
char recomputed[HORUS_FMT_STR_LEN + 1];
CODE:
if (!sv_isobject(self))
croak("Apophis::verify: not an object");
hv = (HV *)SvRV(self);
ns = apophis_get_ns(aTHX_ hv);
if (items > 2) {
int i;
if ((items - 2) % 2 != 0)
croak("Apophis::verify: odd number of optional arguments");
opts = newHV();
sv_2mortal((SV *)opts);
for (i = 2; i < items; i += 2) {
STRLEN klen;
const char *k = SvPV(ST(i), klen);
hv_store(opts, k, klen, SvREFCNT_inc(ST(i+1)), 0);
}
}
store_dir = apophis_get_store_dir(aTHX_ hv, opts, &store_dir_len);
lib/Apophis.xs view on Meta::CPAN
croak("Apophis::op_fetch: read error on '%s'", path);
}
SvCUR_set(content, (STRLEN)nread);
*SvEND(content) = '\0';
RETVAL = newRV_noinc(content);
}
OUTPUT:
RETVAL
# op_verify($self, $id) -> bool
# Fused read + streaming SHA-1 + compare â single call.
bool
op_verify(self, id)
SV *self
SV *id
PREINIT:
HV *hv;
const unsigned char *ns;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
unsigned char uuid[16];
char recomputed[HORUS_FMT_STR_LEN + 1];
CODE:
if (!sv_isobject(self))
croak("Apophis::op_verify: not an object");
hv = (HV *)SvRV(self);
ns = apophis_get_ns(aTHX_ hv);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
fh = PerlIO_open(path, "rb");
if (!fh) {
lib/Apophis.xs view on Meta::CPAN
RETVAL = newSVpvf("CUSTOM_OP@apophis_store[%p]", (void *)op->op_ppaddr);
FreeOp(op);
} else if (strEQ(type, "exists")) {
OP *op = apophis_make_custom_op(aTHX_ pp_apophis_exists);
RETVAL = newSVpvf("CUSTOM_OP@apophis_exists[%p]", (void *)op->op_ppaddr);
FreeOp(op);
} else if (strEQ(type, "fetch")) {
OP *op = apophis_make_custom_op(aTHX_ pp_apophis_fetch);
RETVAL = newSVpvf("CUSTOM_OP@apophis_fetch[%p]", (void *)op->op_ppaddr);
FreeOp(op);
} else if (strEQ(type, "verify")) {
OP *op = apophis_make_custom_op(aTHX_ pp_apophis_verify);
RETVAL = newSVpvf("CUSTOM_OP@apophis_verify[%p]", (void *)op->op_ppaddr);
FreeOp(op);
} else if (strEQ(type, "remove")) {
OP *op = apophis_make_custom_op(aTHX_ pp_apophis_remove);
RETVAL = newSVpvf("CUSTOM_OP@apophis_remove[%p]", (void *)op->op_ppaddr);
FreeOp(op);
} else {
croak("Apophis::_make_op: unknown type '%s'", type);
}
OUTPUT:
RETVAL
The default namespace is C<DPPP_>.
=back
The good thing is that most of the above can be checked by running
F<ppport.h> on your source code. See the next section for
details.
=head1 EXAMPLES
To verify whether F<ppport.h> is needed for your module, whether you
should make any changes to your code, and whether any special defines
should be used, F<ppport.h> can be run as a Perl script to check your
source code. Simply say:
perl ppport.h
The result will usually be a list of patches suggesting changes
that should at least be acceptable, if not necessarily the most
efficient solution, or a fix for all possible problems.
VTBL_substr|5.005003||Viu
VTBL_sv|5.005003||Viu
VTBL_taint|5.005003||Viu
VTBL_uvar|5.005003||Viu
VTBL_vec|5.005003||Viu
vTHX|5.006000||Viu
VT_NATIVE|5.021004||Viu
vtohl|5.003007||Viu
vtohs|5.003007||Viu
VUTIL_REPLACE_CORE|5.019008||Viu
vverify|5.009003|5.009003|
VVERIFY|5.019008||Viu
vwarn|5.006000|5.006000|
vwarner|5.006000|5.006000|p
wait4pid|5.003007||Viu
wait|5.005000||Viu
want_vtbl_bm|5.015000||Viu
want_vtbl_fm|5.015000||Viu
warn|5.006000|5.003007|v
WARN_ALL|5.006000|5.003007|p
WARN_ALLstring|5.006000||Viu
t/06-verify.t view on Meta::CPAN
use strict;
use warnings;
use Test::More tests => 3;
use File::Temp qw(tempdir);
use Apophis;
my $dir = tempdir(CLEANUP => 1);
my $ca = Apophis->new(namespace => 'test-verify', store_dir => $dir);
# Store and verify
my $content = 'verify this content';
my $id = $ca->store(\$content);
ok($ca->verify($id), 'verify returns true for intact content');
# Corrupt the file and verify fails
my $path = $ca->path_for($id);
open my $fh, '>', $path or die "Cannot write $path: $!";
print $fh 'corrupted data';
close $fh;
ok(!$ca->verify($id), 'verify returns false for corrupted content');
# Verify nonexistent returns false
ok(!$ca->verify('00000000-0000-5000-8000-000000000000'),
'verify returns false for nonexistent ID');
t/10-edge-cases.t view on Meta::CPAN
# Unicode content (UTF-8 bytes)
my $unicode = "caf\xc3\xa9 \xe2\x98\x83"; # cafe + snowman in UTF-8
my $id3 = $ca->store(\$unicode);
my $fetched3 = $ca->fetch($id3);
is($$fetched3, $unicode, 'UTF-8 bytes round-trip correctly');
# Large content
my $large = 'A' x 500_000;
my $id4 = $ca->store(\$large);
ok($ca->exists($id4), 'large content (500KB) stored');
ok($ca->verify($id4), 'large content verifies');
# Constructor requires namespace
eval { Apophis->new() };
like($@, qr/namespace/, 'new without namespace croaks');
# identify requires scalar ref
eval { $ca->identify('not a ref') };
like($@, qr/scalar reference/, 'identify without ref croaks');
t/11-custom-ops.t view on Meta::CPAN
my $op_st = Apophis::_make_op('store');
like($op_st, qr/^CUSTOM_OP\@apophis_store/, 'store op created');
my $op_ex = Apophis::_make_op('exists');
like($op_ex, qr/^CUSTOM_OP\@apophis_exists/, 'exists op created');
my $op_fe = Apophis::_make_op('fetch');
like($op_fe, qr/^CUSTOM_OP\@apophis_fetch/, 'fetch op created');
my $op_ve = Apophis::_make_op('verify');
like($op_ve, qr/^CUSTOM_OP\@apophis_verify/, 'verify op created');
my $op_rm = Apophis::_make_op('remove');
like($op_rm, qr/^CUSTOM_OP\@apophis_remove/, 'remove op created');
# --- op_identify matches identify ---
my $content = 'custom op test content';
my $id_method = $ca->identify(\$content);
my $id_op = $ca->op_identify(\$content);
is($id_op, $id_method, 'op_identify matches identify');
t/11-custom-ops.t view on Meta::CPAN
# op_fetch returns undef for nonexistent
my $op_fetched_missing = $ca->op_fetch('00000000-0000-5000-8000-000000000000');
ok(!defined $op_fetched_missing, 'op_fetch returns undef for nonexistent');
# op_fetch reads op_store content
my $rt_content = 'round-trip via ops';
my $rt_id = $ca->op_store(\$rt_content);
my $rt_fetched = $ca->op_fetch($rt_id);
is($$rt_fetched, $rt_content, 'op_fetch reads op_store content');
# --- op_verify ---
ok($ca->op_verify($fetch_id), 'op_verify returns true for intact content');
ok($ca->op_verify($rt_id), 'op_verify returns true for op_store content');
ok(!$ca->op_verify('00000000-0000-5000-8000-000000000000'),
'op_verify returns false for nonexistent');
# op_verify matches standard verify
my $std_verify = $ca->verify($fetch_id);
my $op_verify = $ca->op_verify($fetch_id);
is(!!$op_verify, !!$std_verify, 'op_verify matches standard verify');
# --- op_remove ---
my $rm_content = 'content to remove via op';
my $rm_id = $ca->op_store(\$rm_content);
ok($ca->op_exists($rm_id), 'item exists before op_remove');
ok($ca->op_remove($rm_id), 'op_remove returns true for existing item');
ok(!$ca->op_exists($rm_id), 'op_exists false after op_remove');
ok(!$ca->op_remove($rm_id), 'op_remove returns false for already removed');
( run in 1.021 second using v1.01-cache-2.11-cpan-e1769b4cff6 )