Apophis

 view release on metacpan or  search on metacpan

lib/Apophis.xs  view on Meta::CPAN

/*
 * Apophis.xs - Content-addressable storage with deterministic UUID v5
 *
 * Named after Apophis, the Egyptian serpent of chaos — here tamed
 * to bring order to content through deterministic hashing.
 *
 * 100% XS: all logic in C, Perl layer is just XSLoader.
 * Uses Horus library for RFC 9562 UUID v5 (SHA-1 namespace) generation.
 */

#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "../ppport.h"

#include <sys/stat.h>
#include <errno.h>
#ifndef _WIN32
#  include <unistd.h>
#else
#  include <io.h>
#  include <direct.h>
#endif

/*
 * Windows compatibility:
 * perl.h on threaded Windows redefines mkdir, stat, unlink etc. as macros
 * requiring the interpreter context (my_perl).  Our static C helper functions
 * don't have pTHX, so we undo those overrides and use the real libc calls.
 */
#ifdef WIN32
#  undef mkdir
#  undef stat
#  undef unlink
#  undef rename
#  undef open
#  undef close
#  undef read
#  undef write
   /* Windows mkdir takes only one arg */
#  define apophis_mkdir(p, m) _mkdir(p)
#else
#  define apophis_mkdir(p, m) mkdir(p, m)
#endif

/* Portable stat type — Stat_t handles struct w32_stat on Windows */
#define apophis_stat_t Stat_t

/* Horus UUID library - pure C, no Perl deps */
#define HORUS_FATAL(msg) croak("%s", (msg))
#include "horus_core.h"

/* ------------------------------------------------------------------ */
/* Constants                                                           */
/* ------------------------------------------------------------------ */

#define APOPHIS_STREAM_BUF  65536   /* 64KB read chunks for streaming */
#define APOPHIS_PATH_MAX    4096

/* ------------------------------------------------------------------ */
/* Internal: namespace UUID generation                                 */
/* ------------------------------------------------------------------ */

/* Derive a namespace UUID from a human-readable string via v5(DNS, name) */
static void
apophis_derive_namespace(unsigned char *ns_out,
                         const char *name, STRLEN name_len)
{
    horus_uuid_v5(ns_out, HORUS_NS_DNS,
                  (const unsigned char *)name, (size_t)name_len);
}

/* Format 16-byte UUID binary to 36-char string SV */
static SV *
apophis_uuid_to_sv(pTHX_ const unsigned char *uuid)
{
    char buf[HORUS_FMT_STR_LEN + 1];
    horus_format_uuid(buf, uuid, HORUS_FMT_STR);
    return newSVpvn(buf, HORUS_FMT_STR_LEN);
}

/* ------------------------------------------------------------------ */
/* Internal: content identification                                    */
/* ------------------------------------------------------------------ */

/* Identify in-memory content: v5(namespace, content) */
static void
apophis_identify_content(unsigned char *uuid_out,
                         const unsigned char *ns_bytes,
                         const char *content, STRLEN content_len)
{
    horus_uuid_v5(uuid_out, ns_bytes,
                  (const unsigned char *)content, (size_t)content_len);
}

/* Identify via streaming SHA-1 — O(1) memory */
static void
apophis_identify_stream(pTHX_ unsigned char *uuid_out,
                        const unsigned char *ns_bytes,
                        PerlIO *fh)
{
    horus_sha1_ctx ctx;
    unsigned char buf[APOPHIS_STREAM_BUF];
    unsigned char digest[20];
    SSize_t nread;

    horus_sha1_init(&ctx);
    horus_sha1_update(&ctx, ns_bytes, 16);  /* namespace first per RFC */

    while ((nread = PerlIO_read(fh, buf, sizeof(buf))) > 0) {
        horus_sha1_update(&ctx, (const unsigned char *)buf, (size_t)nread);
    }

    horus_sha1_final(digest, &ctx);
    memcpy(uuid_out, digest, 16);
    horus_stamp_version_variant(uuid_out, 5);
}

/* ------------------------------------------------------------------ */
/* Internal: path computation                                          */
/* ------------------------------------------------------------------ */

/* Build 2-level sharded path: store_dir/a3/bb/a3bb189e-...-1e3a
 * Returns length written (excluding NUL). */
static int
apophis_build_path(char *out, size_t out_size,
                   const char *store_dir, STRLEN store_len,
                   const char *id, STRLEN id_len)
{
    /* UUID is 36 chars: a3bb189e-8bf9-5f18-b3f6-1b2f5f5c1e3a
     * Shard on first 2 and chars 3-4 (skipping no hyphens needed,
     * first 4 hex chars are positions 0-3 of the UUID string) */
    if (id_len < 5)
        croak("Apophis: invalid UUID id");

    return snprintf(out, out_size, "%.*s/%c%c/%c%c/%.*s",
                    (int)store_len, store_dir,
                    id[0], id[1],    /* first shard level */
                    id[2], id[3],    /* second shard level */
                    (int)id_len, id);
}

/* Build path for .meta sidecar */
static int
apophis_build_meta_path(char *out, size_t out_size,
                        const char *content_path, int content_path_len)
{
    return snprintf(out, out_size, "%.*s.meta",
                    content_path_len, content_path);
}

/* ------------------------------------------------------------------ */
/* Internal: recursive mkdir                                           */
/* ------------------------------------------------------------------ */

static void

lib/Apophis.xs  view on Meta::CPAN

 * Stack output: \$content or undef
 *
 * Fuses: path computation + stat + open + read into a single op.
 */
static OP *
pp_apophis_fetch(pTHX) {
    dSP;
    SV *id_sv = POPs;
    SV *self_sv = POPs;
    HV *hv;
    const char *store_dir;
    STRLEN store_dir_len;
    const char *id_str;
    STRLEN id_len;
    char path[APOPHIS_PATH_MAX];
    apophis_stat_t st;

    if (!sv_isobject(self_sv))
        croak("Apophis: pp_fetch: not an object");
    hv = (HV *)SvRV(self_sv);

    store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
    id_str = SvPV(id_sv, id_len);

    apophis_build_path(path, sizeof(path),
                       store_dir, store_dir_len, id_str, id_len);

    EXTEND(SP, 1);
    if (stat(path, &st) != 0) {
        PUSHs(&PL_sv_undef);
    } else {
        PerlIO *fh = PerlIO_open(path, "rb");
        if (!fh)
            croak("Apophis: pp_fetch: cannot open '%s': %s",
                  path, strerror(errno));

        SV *content = newSV((STRLEN)st.st_size + 1);
        SvPOK_on(content);
        SSize_t nread = PerlIO_read(fh, SvPVX(content), (Size_t)st.st_size);
        PerlIO_close(fh);

        if (nread < 0) {
            SvREFCNT_dec(content);
            croak("Apophis: pp_fetch: read error on '%s'", path);
        }
        SvCUR_set(content, (STRLEN)nread);
        *SvEND(content) = '\0';

        PUSHs(sv_2mortal(newRV_noinc(content)));
    }
    PUTBACK;
    return NORMAL;
}

/*
 * pp_apophis_verify - Custom op: fused re-read + re-hash + compare
 *
 * Stack input:  self_sv, id_sv
 * Stack output: bool_sv
 *
 * Fuses: path computation + open + streaming SHA-1 + format + memcmp.
 */
static OP *
pp_apophis_verify(pTHX) {
    dSP;
    SV *id_sv = POPs;
    SV *self_sv = POPs;
    HV *hv;
    const unsigned char *ns;
    const char *store_dir;
    STRLEN store_dir_len;
    const char *id_str;
    STRLEN id_len;
    char path[APOPHIS_PATH_MAX];
    PerlIO *fh;
    unsigned char uuid[16];
    char recomputed[HORUS_FMT_STR_LEN + 1];

    if (!sv_isobject(self_sv))
        croak("Apophis: pp_verify: not an object");
    hv = (HV *)SvRV(self_sv);
    ns = apophis_get_ns(aTHX_ hv);

    store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
    id_str = SvPV(id_sv, id_len);

    apophis_build_path(path, sizeof(path),
                       store_dir, store_dir_len, id_str, id_len);

    EXTEND(SP, 1);
    fh = PerlIO_open(path, "rb");
    if (!fh) {
        PUSHs(&PL_sv_no);
    } else {
        apophis_identify_stream(aTHX_ uuid, ns, fh);
        PerlIO_close(fh);

        horus_format_uuid(recomputed, uuid, HORUS_FMT_STR);
        PUSHs((id_len == HORUS_FMT_STR_LEN &&
               memcmp(id_str, recomputed, HORUS_FMT_STR_LEN) == 0)
              ? &PL_sv_yes : &PL_sv_no);
    }
    PUTBACK;
    return NORMAL;
}

/*
 * pp_apophis_remove - Custom op: fused path + unlink + meta cleanup
 *
 * Stack input:  self_sv, id_sv
 * Stack output: bool_sv
 *
 * Fuses: path computation + unlink + meta sidecar cleanup.
 */
static OP *
pp_apophis_remove(pTHX) {
    dSP;
    SV *id_sv = POPs;
    SV *self_sv = POPs;
    HV *hv;
    const char *store_dir;

lib/Apophis.xs  view on Meta::CPAN

                           store_dir, store_dir_len, id_str, id_len);

        RETVAL = (stat(path, &st) == 0) ? TRUE : FALSE;
    OUTPUT:
        RETVAL

# op_fetch($self, $id) -> \$content or undef
# Fused path computation + stat + read — single call.

SV *
op_fetch(self, id)
        SV *self
        SV *id
    PREINIT:
        HV *hv;
        const char *store_dir;
        STRLEN store_dir_len;
        const char *id_str;
        STRLEN id_len;
        char path[APOPHIS_PATH_MAX];
        PerlIO *fh;
        apophis_stat_t st;
        SV *content;
        SSize_t nread;
    CODE:
        if (!sv_isobject(self))
            croak("Apophis::op_fetch: not an object");
        hv = (HV *)SvRV(self);

        store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
        id_str = SvPV(id, id_len);
        apophis_build_path(path, sizeof(path),
                           store_dir, store_dir_len, id_str, id_len);

        if (stat(path, &st) != 0) {
            RETVAL = &PL_sv_undef;
        } else {
            fh = PerlIO_open(path, "rb");
            if (!fh)
                croak("Apophis::op_fetch: cannot open '%s': %s",
                      path, strerror(errno));

            content = newSV((STRLEN)st.st_size + 1);
            SvPOK_on(content);
            nread = PerlIO_read(fh, SvPVX(content), (Size_t)st.st_size);
            PerlIO_close(fh);

            if (nread < 0) {
                SvREFCNT_dec(content);
                croak("Apophis::op_fetch: read error on '%s'", path);
            }
            SvCUR_set(content, (STRLEN)nread);
            *SvEND(content) = '\0';

            RETVAL = newRV_noinc(content);
        }
    OUTPUT:
        RETVAL

# op_verify($self, $id) -> bool
# Fused read + streaming SHA-1 + compare — single call.

bool
op_verify(self, id)
        SV *self
        SV *id
    PREINIT:
        HV *hv;
        const unsigned char *ns;
        const char *store_dir;
        STRLEN store_dir_len;
        const char *id_str;
        STRLEN id_len;
        char path[APOPHIS_PATH_MAX];
        PerlIO *fh;
        unsigned char uuid[16];
        char recomputed[HORUS_FMT_STR_LEN + 1];
    CODE:
        if (!sv_isobject(self))
            croak("Apophis::op_verify: not an object");
        hv = (HV *)SvRV(self);
        ns = apophis_get_ns(aTHX_ hv);

        store_dir = apophis_get_store_dir(aTHX_ hv, NULL, &store_dir_len);
        id_str = SvPV(id, id_len);
        apophis_build_path(path, sizeof(path),
                           store_dir, store_dir_len, id_str, id_len);

        fh = PerlIO_open(path, "rb");
        if (!fh) {
            RETVAL = FALSE;
        } else {
            apophis_identify_stream(aTHX_ uuid, ns, fh);
            PerlIO_close(fh);

            horus_format_uuid(recomputed, uuid, HORUS_FMT_STR);
            RETVAL = (id_len == HORUS_FMT_STR_LEN &&
                      memcmp(id_str, recomputed, HORUS_FMT_STR_LEN) == 0)
                     ? TRUE : FALSE;
        }
    OUTPUT:
        RETVAL

# op_remove($self, $id) -> bool
# Fused path + unlink + meta cleanup — single call.

bool
op_remove(self, id)
        SV *self
        SV *id
    PREINIT:
        HV *hv;
        const char *store_dir;
        STRLEN store_dir_len;
        const char *id_str;
        STRLEN id_len;
        char path[APOPHIS_PATH_MAX];
        int path_len;
        char meta_path[APOPHIS_PATH_MAX];
        int removed;
    CODE:



( run in 0.521 second using v1.01-cache-2.11-cpan-140bd7fdf52 )