Alien-Libjio
view release on metacpan or search on metacpan
libjio/libjio/journal.c view on Meta::CPAN
/*
* Internal journal
*/
#include <sys/types.h> /* [s]size_t */
#include <sys/stat.h> /* open() */
#include <fcntl.h> /* open() */
#include <unistd.h> /* f[data]sync(), close() */
#include <stdlib.h> /* malloc() and friends */
#include <limits.h> /* PATH_MAX */
#include <string.h> /* memcpy() */
#include <stdio.h> /* fprintf() */
#include <errno.h> /* errno */
#include <stdint.h> /* uintX_t */
#include <arpa/inet.h> /* htonl() and friends */
#include <netinet/in.h> /* htonl() and friends (on some platforms) */
#include "libjio.h"
#include "common.h"
#include "compat.h"
#include "journal.h"
#include "trans.h"
/*
* On-disk structures
*
* Each transaction will be stored on disk as a single file, composed of a
* header, operation information, and a trailer. The operation information is
* composed of repeated operation headers followed by their corresponding
* data, one for each operation. A special operation header containing all 0s
* marks the end of the operations.
*
* Visually, something like this:
*
* +--------+---------+----------+---------+----------+-----+-----+---------+
* | header | op1 hdr | op1 data | op2 hdr | op2 data | ... | eoo | trailer |
* +--------+---------+----------+---------+----------+-----+-----+---------+
* \ /
* +--------------- operations ----------------+
*
* The details of each part can be seen on the following structures. All
* integers are stored in network byte order.
*/
/** Transaction file header */
struct on_disk_hdr {
uint16_t ver;
uint16_t flags;
uint32_t trans_id;
} __attribute__((packed));
/** Transaction file operation header */
struct on_disk_ophdr {
uint32_t len;
uint64_t offset;
} __attribute__((packed));
/** Transaction file trailer */
struct on_disk_trailer {
uint32_t numops;
uint32_t checksum;
} __attribute__((packed));
/* Convert structs to/from host to network (disk) endian */
static void hdr_hton(struct on_disk_hdr *hdr)
{
hdr->ver = htons(hdr->ver);
hdr->flags = htons(hdr->flags);
hdr->trans_id = htonl(hdr->trans_id);
}
static void hdr_ntoh(struct on_disk_hdr *hdr)
{
hdr->ver = ntohs(hdr->ver);
hdr->flags = ntohs(hdr->flags);
hdr->trans_id = ntohl(hdr->trans_id);
}
static void ophdr_hton(struct on_disk_ophdr *ophdr)
{
ophdr->len = htonl(ophdr->len);
ophdr->offset = htonll(ophdr->offset);
}
static void ophdr_ntoh(struct on_disk_ophdr *ophdr)
{
ophdr->len = ntohl(ophdr->len);
ophdr->offset = ntohll(ophdr->offset);
}
static void trailer_hton(struct on_disk_trailer *trailer) {
trailer->numops = htonl(trailer->numops);
trailer->checksum = htonl(trailer->checksum);
}
static void trailer_ntoh(struct on_disk_trailer *trailer) {
trailer->numops = ntohl(trailer->numops);
trailer->checksum = ntohl(trailer->checksum);
}
/*
* Helper functions
*/
/** Get a new transaction id */
static unsigned int get_tid(struct jfs *fs)
{
unsigned int curid, rv;
/* lock the whole file */
plockf(fs->jfd, F_LOCKW, 0, 0);
/* read the current max. curid */
curid = *(fs->jmap);
fiu_do_on("jio/get_tid/overflow", curid = -1);
/* increment it and handle overflows */
rv = curid + 1;
if (rv == 0)
goto exit;
/* write to the file descriptor */
*(fs->jmap) = rv;
exit:
plockf(fs->jfd, F_UNLOCK, 0, 0);
return rv;
}
/** Free a transaction id */
static void free_tid(struct jfs *fs, unsigned int tid)
{
unsigned int curid, i;
libjio/libjio/journal.c view on Meta::CPAN
/** Corrupt a journal file. Used as a last resource to prevent an applied
* transaction file laying around */
static int corrupt_journal_file(struct journal_op *jop)
{
off_t pos;
struct on_disk_trailer trailer;
/* We set the number of operations to 0, and the checksum to
* 0xffffffff, so there is no chance it's considered valid after a new
* transaction overwrites this one */
trailer.numops = 0;
trailer.checksum = 0xffffffff;
pos = lseek(jop->fd, 0, SEEK_END);
if (pos == (off_t) -1)
return -1;
if (pwrite(jop->fd, (void *) &trailer, sizeof(trailer), pos)
!= sizeof(trailer))
return -1;
if (fdatasync(jop->fd) != 0)
return -1;
return 0;
}
/** Mark the journal as broken. To do so, we just create a file named "broken"
* inside the journal directory. Used internally to mark severe journal errors
* that should prevent further journal use to avoid potential corruption, like
* failures to remove transaction files. The mark is removed by jfsck(). */
static int mark_broken(struct jfs *fs)
{
char broken_path[PATH_MAX];
int fd;
snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
fd = creat(broken_path, 0600);
close(fd);
return fd >= 0;
}
/** Check if the journal is broken */
static int is_broken(struct jfs *fs)
{
char broken_path[PATH_MAX];
snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
return access(broken_path, F_OK) == 0;
}
/*
* Journal functions
*/
/** Create a new transaction in the journal. Returns a pointer to an opaque
* jop_t (that is freed using journal_free), or NULL if there was an error. */
struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
{
int fd, id;
ssize_t rv;
char *name = NULL;
struct journal_op *jop = NULL;
struct on_disk_hdr hdr;
struct iovec iov[1];
if (is_broken(fs))
goto error;
jop = malloc(sizeof(struct journal_op));
if (jop == NULL)
goto error;
name = (char *) malloc(PATH_MAX);
if (name == NULL)
goto error;
id = get_tid(fs);
if (id == 0)
goto error;
/* open the transaction file */
get_jtfile(fs, id, name);
fd = open(name, O_RDWR | O_CREAT | O_TRUNC, 0600);
if (fd < 0)
goto error;
if (plockf(fd, F_LOCKW, 0, 0) != 0)
goto unlink_error;
jop->id = id;
jop->fd = fd;
jop->numops = 0;
jop->name = name;
jop->csum = 0;
jop->fs = fs;
fiu_exit_on("jio/commit/created_tf");
/* save the header */
hdr.ver = 1;
hdr.trans_id = id;
hdr.flags = flags;
hdr_hton(&hdr);
iov[0].iov_base = (void *) &hdr;
iov[0].iov_len = sizeof(hdr);
rv = swritev(fd, iov, 1);
if (rv != sizeof(hdr))
goto unlink_error;
jop->csum = checksum_buf(jop->csum, (unsigned char *) &hdr,
sizeof(hdr));
fiu_exit_on("jio/commit/tf_header");
return jop;
unlink_error:
unlink(name);
free_tid(fs, id);
close(fd);
error:
free(name);
free(jop);
return NULL;
}
/** Save a single operation in the journal file */
int journal_add_op(struct journal_op *jop, unsigned char *buf, size_t len,
off_t offset)
{
ssize_t rv;
struct on_disk_ophdr ophdr;
struct iovec iov[2];
ophdr.len = len;
ophdr.offset = offset;
ophdr_hton(&ophdr);
iov[0].iov_base = (void *) &ophdr;
iov[0].iov_len = sizeof(ophdr);
jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
sizeof(ophdr));
iov[1].iov_base = (void *) buf;
iov[1].iov_len = len;
jop->csum = checksum_buf(jop->csum, buf, len);
fiu_exit_on("jio/commit/tf_pre_addop");
rv = swritev(jop->fd, iov, 2);
if (rv != sizeof(ophdr) + len)
goto error;
fiu_exit_on("jio/commit/tf_addop");
jop->numops++;
return 0;
libjio/libjio/journal.c view on Meta::CPAN
/* we do not want to leave a possibly complete transaction
* file around when the transaction was not commited and the
* unlink failed, so we attempt to truncate it, and if that
* fails we corrupt it as a last resort. */
if (ftruncate(jop->fd, 0) != 0) {
if (corrupt_journal_file(jop) != 0) {
mark_broken(jop->fs);
goto exit;
}
}
}
if (fsync_dir(jop->fs->jdirfd) != 0) {
mark_broken(jop->fs);
goto exit;
}
fiu_exit_on("jio/commit/pre_ok_free_tid");
free_tid(jop->fs, jop->id);
rv = 0;
exit:
close(jop->fd);
free(jop->name);
free(jop);
return rv;
}
/** Fill a transaction structure from a mmapped transaction file. Useful for
* checking purposes.
* @returns 0 on success, -1 if the file was broken, -2 if the checksums didn't
* match
*/
int fill_trans(unsigned char *map, off_t len, struct jtrans *ts)
{
int rv;
unsigned char *p;
struct operation *op, *tmp;
struct on_disk_hdr hdr;
struct on_disk_ophdr ophdr;
struct on_disk_trailer trailer;
rv = -1;
if (len < sizeof(hdr) + sizeof(ophdr) + sizeof(trailer))
return -1;
p = map;
memcpy(&hdr, p, sizeof(hdr));
p += sizeof(hdr);
hdr_ntoh(&hdr);
if (hdr.ver != 1)
return -1;
ts->id = hdr.trans_id;
ts->flags = hdr.flags;
ts->numops_r = 0;
ts->numops_w = 0;
ts->len_w = 0;
for (;;) {
if (p + sizeof(ophdr) > map + len)
goto error;
memcpy(&ophdr, p, sizeof(ophdr));
p += sizeof(ophdr);
ophdr_ntoh(&ophdr);
if (ophdr.len == 0 && ophdr.offset == 0) {
/* This header marks the end of the operations */
break;
}
if (p + ophdr.len > map + len)
goto error;
op = malloc(sizeof(struct operation));
if (op == NULL)
goto error;
op->len = ophdr.len;
op->offset = ophdr.offset;
op->direction = D_WRITE;
op->buf = (void *) p;
p += op->len;
op->pdata = NULL;
if (ts->op == NULL) {
ts->op = op;
op->prev = NULL;
op->next = NULL;
} else {
for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next)
;
tmp->next = op;
op->prev = tmp;
op->next = NULL;
}
ts->numops_w++;
ts->len_w += op->len;
}
if (p + sizeof(trailer) > map + len)
goto error;
memcpy(&trailer, p, sizeof(trailer));
p += sizeof(trailer);
trailer_ntoh(&trailer);
if (trailer.numops != ts->numops_w)
goto error;
( run in 1.185 second using v1.01-cache-2.11-cpan-524268b4103 )