Apophis
view release on metacpan or search on metacpan
lib/Apophis.xs view on Meta::CPAN
/*
* Apophis.xs - Content-addressable storage with deterministic UUID v5
*
* Named after Apophis, the Egyptian serpent of chaos â here tamed
* to bring order to content through deterministic hashing.
*
* 100% XS: all logic in C, Perl layer is just XSLoader.
* Uses Horus library for RFC 9562 UUID v5 (SHA-1 namespace) generation.
*/
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "../ppport.h"
#include <sys/stat.h>
#include <errno.h>
#ifndef _WIN32
# include <unistd.h>
#else
# include <io.h>
# include <direct.h>
#endif
/*
* Windows compatibility:
* perl.h on threaded Windows redefines mkdir, stat, unlink etc. as macros
* requiring the interpreter context (my_perl). Our static C helper functions
* don't have pTHX, so we undo those overrides and use the real libc calls.
*/
#ifdef WIN32
# undef mkdir
# undef stat
# undef unlink
# undef rename
# undef open
# undef close
# undef read
# undef write
/* Windows mkdir takes only one arg */
# define apophis_mkdir(p, m) _mkdir(p)
#else
# define apophis_mkdir(p, m) mkdir(p, m)
#endif
/* Portable stat type â Stat_t handles struct w32_stat on Windows */
#define apophis_stat_t Stat_t
/* Horus UUID library - pure C, no Perl deps */
#define HORUS_FATAL(msg) croak("%s", (msg))
#include "horus_core.h"
/* ------------------------------------------------------------------ */
/* Constants */
/* ------------------------------------------------------------------ */
#define APOPHIS_STREAM_BUF 65536 /* 64KB read chunks for streaming */
#define APOPHIS_PATH_MAX 4096
/* ------------------------------------------------------------------ */
/* Internal: namespace UUID generation */
/* ------------------------------------------------------------------ */
/* Derive a namespace UUID from a human-readable string via v5(DNS, name) */
static void
apophis_derive_namespace(unsigned char *ns_out,
const char *name, STRLEN name_len)
{
horus_uuid_v5(ns_out, HORUS_NS_DNS,
(const unsigned char *)name, (size_t)name_len);
}
/* Format 16-byte UUID binary to 36-char string SV */
static SV *
apophis_uuid_to_sv(pTHX_ const unsigned char *uuid)
{
char buf[HORUS_FMT_STR_LEN + 1];
horus_format_uuid(buf, uuid, HORUS_FMT_STR);
return newSVpvn(buf, HORUS_FMT_STR_LEN);
}
/* ------------------------------------------------------------------ */
/* Internal: content identification */
/* ------------------------------------------------------------------ */
/* Identify in-memory content: v5(namespace, content) */
static void
apophis_identify_content(unsigned char *uuid_out,
const unsigned char *ns_bytes,
const char *content, STRLEN content_len)
{
horus_uuid_v5(uuid_out, ns_bytes,
(const unsigned char *)content, (size_t)content_len);
}
/* Identify via streaming SHA-1 â O(1) memory */
static void
apophis_identify_stream(pTHX_ unsigned char *uuid_out,
const unsigned char *ns_bytes,
PerlIO *fh)
{
horus_sha1_ctx ctx;
unsigned char buf[APOPHIS_STREAM_BUF];
unsigned char digest[20];
SSize_t nread;
horus_sha1_init(&ctx);
horus_sha1_update(&ctx, ns_bytes, 16); /* namespace first per RFC */
while ((nread = PerlIO_read(fh, buf, sizeof(buf))) > 0) {
horus_sha1_update(&ctx, (const unsigned char *)buf, (size_t)nread);
}
horus_sha1_final(digest, &ctx);
memcpy(uuid_out, digest, 16);
horus_stamp_version_variant(uuid_out, 5);
}
/* ------------------------------------------------------------------ */
/* Internal: path computation */
/* ------------------------------------------------------------------ */
/* Build 2-level sharded path: store_dir/a3/bb/a3bb189e-...-1e3a
* Returns length written (excluding NUL). */
static int
apophis_build_path(char *out, size_t out_size,
const char *store_dir, STRLEN store_len,
const char *id, STRLEN id_len)
{
/* UUID is 36 chars: a3bb189e-8bf9-5f18-b3f6-1b2f5f5c1e3a
* Shard on first 2 and chars 3-4 (skipping no hyphens needed,
* first 4 hex chars are positions 0-3 of the UUID string) */
if (id_len < 5)
croak("Apophis: invalid UUID id");
return snprintf(out, out_size, "%.*s/%c%c/%c%c/%.*s",
(int)store_len, store_dir,
id[0], id[1], /* first shard level */
id[2], id[3], /* second shard level */
(int)id_len, id);
}
/* Build path for .meta sidecar */
static int
apophis_build_meta_path(char *out, size_t out_size,
const char *content_path, int content_path_len)
{
return snprintf(out, out_size, "%.*s.meta",
content_path_len, content_path);
}
/* ------------------------------------------------------------------ */
/* Internal: recursive mkdir */
/* ------------------------------------------------------------------ */
static void
lib/Apophis.xs view on Meta::CPAN
* Stack output: \$content or undef
*
* Fuses: path computation + stat + open + read into a single op.
*/
static OP *
pp_apophis_fetch(pTHX) {
dSP;
SV *id_sv = POPs;
SV *self_sv = POPs;
HV *hv;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
apophis_stat_t st;
if (!sv_isobject(self_sv))
croak("Apophis: pp_fetch: not an object");
hv = (HV *)SvRV(self_sv);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id_sv, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
EXTEND(SP, 1);
if (stat(path, &st) != 0) {
PUSHs(&PL_sv_undef);
} else {
PerlIO *fh = PerlIO_open(path, "rb");
if (!fh)
croak("Apophis: pp_fetch: cannot open '%s': %s",
path, strerror(errno));
SV *content = newSV((STRLEN)st.st_size + 1);
SvPOK_on(content);
SSize_t nread = PerlIO_read(fh, SvPVX(content), (Size_t)st.st_size);
PerlIO_close(fh);
if (nread < 0) {
SvREFCNT_dec(content);
croak("Apophis: pp_fetch: read error on '%s'", path);
}
SvCUR_set(content, (STRLEN)nread);
*SvEND(content) = '\0';
PUSHs(sv_2mortal(newRV_noinc(content)));
}
PUTBACK;
return NORMAL;
}
/*
* pp_apophis_verify - Custom op: fused re-read + re-hash + compare
*
* Stack input: self_sv, id_sv
* Stack output: bool_sv
*
* Fuses: path computation + open + streaming SHA-1 + format + memcmp.
*/
static OP *
pp_apophis_verify(pTHX) {
dSP;
SV *id_sv = POPs;
SV *self_sv = POPs;
HV *hv;
const unsigned char *ns;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
unsigned char uuid[16];
char recomputed[HORUS_FMT_STR_LEN + 1];
if (!sv_isobject(self_sv))
croak("Apophis: pp_verify: not an object");
hv = (HV *)SvRV(self_sv);
ns = apophis_get_ns(aTHX_ hv);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id_sv, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
EXTEND(SP, 1);
fh = PerlIO_open(path, "rb");
if (!fh) {
PUSHs(&PL_sv_no);
} else {
apophis_identify_stream(aTHX_ uuid, ns, fh);
PerlIO_close(fh);
horus_format_uuid(recomputed, uuid, HORUS_FMT_STR);
PUSHs((id_len == HORUS_FMT_STR_LEN &&
memcmp(id_str, recomputed, HORUS_FMT_STR_LEN) == 0)
? &PL_sv_yes : &PL_sv_no);
}
PUTBACK;
return NORMAL;
}
/*
* pp_apophis_remove - Custom op: fused path + unlink + meta cleanup
*
* Stack input: self_sv, id_sv
* Stack output: bool_sv
*
* Fuses: path computation + unlink + meta sidecar cleanup.
*/
static OP *
pp_apophis_remove(pTHX) {
dSP;
SV *id_sv = POPs;
SV *self_sv = POPs;
HV *hv;
const char *store_dir;
lib/Apophis.xs view on Meta::CPAN
store_dir, store_dir_len, id_str, id_len);
RETVAL = (stat(path, &st) == 0) ? TRUE : FALSE;
OUTPUT:
RETVAL
# op_fetch($self, $id) -> \$content or undef
# Fused path computation + stat + read â single call.
SV *
op_fetch(self, id)
SV *self
SV *id
PREINIT:
HV *hv;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
apophis_stat_t st;
SV *content;
SSize_t nread;
CODE:
if (!sv_isobject(self))
croak("Apophis::op_fetch: not an object");
hv = (HV *)SvRV(self);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
if (stat(path, &st) != 0) {
RETVAL = &PL_sv_undef;
} else {
fh = PerlIO_open(path, "rb");
if (!fh)
croak("Apophis::op_fetch: cannot open '%s': %s",
path, strerror(errno));
content = newSV((STRLEN)st.st_size + 1);
SvPOK_on(content);
nread = PerlIO_read(fh, SvPVX(content), (Size_t)st.st_size);
PerlIO_close(fh);
if (nread < 0) {
SvREFCNT_dec(content);
croak("Apophis::op_fetch: read error on '%s'", path);
}
SvCUR_set(content, (STRLEN)nread);
*SvEND(content) = '\0';
RETVAL = newRV_noinc(content);
}
OUTPUT:
RETVAL
# op_verify($self, $id) -> bool
# Fused read + streaming SHA-1 + compare â single call.
bool
op_verify(self, id)
SV *self
SV *id
PREINIT:
HV *hv;
const unsigned char *ns;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
PerlIO *fh;
unsigned char uuid[16];
char recomputed[HORUS_FMT_STR_LEN + 1];
CODE:
if (!sv_isobject(self))
croak("Apophis::op_verify: not an object");
hv = (HV *)SvRV(self);
ns = apophis_get_ns(aTHX_ hv);
store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
id_str = SvPV(id, id_len);
apophis_build_path(path, sizeof(path),
store_dir, store_dir_len, id_str, id_len);
fh = PerlIO_open(path, "rb");
if (!fh) {
RETVAL = FALSE;
} else {
apophis_identify_stream(aTHX_ uuid, ns, fh);
PerlIO_close(fh);
horus_format_uuid(recomputed, uuid, HORUS_FMT_STR);
RETVAL = (id_len == HORUS_FMT_STR_LEN &&
memcmp(id_str, recomputed, HORUS_FMT_STR_LEN) == 0)
? TRUE : FALSE;
}
OUTPUT:
RETVAL
# op_remove($self, $id) -> bool
# Fused path + unlink + meta cleanup â single call.
bool
op_remove(self, id)
SV *self
SV *id
PREINIT:
HV *hv;
const char *store_dir;
STRLEN store_dir_len;
const char *id_str;
STRLEN id_len;
char path[APOPHIS_PATH_MAX];
int path_len;
char meta_path[APOPHIS_PATH_MAX];
int removed;
CODE:
( run in 0.521 second using v1.01-cache-2.11-cpan-140bd7fdf52 )