Math-String-Charset-Wordlist
view release on metacpan or search on metacpan
Wordlist.xs view on Meta::CPAN
(C) 2003-2004 by Tels <http://bloodgate.com/perl/>
Provide routines that let us get the offsets and records from a file
containing a list of words (one word on each line)
*/
struct Offsets
{
/* offsets into the file, for each line one*/
long* record_offsets;
/* how many do we have? */
long max_offsets;
/* how many slots for offsets to we have allocated in record_offsets? */
long cur_size;
/* set to 1 when we saw the EOF */
unsigned int eof;
/* the wordlist file */
FILE* file;
};
/* if buffer below grows bigger than 8192 bytes, adapt test in testsuite! */
#define READ_BUFFER_SIZE 8 * 1024
#define BUFFER_SIZE 8192
MODULE = Math::String::Charset::Wordlist PACKAGE = Math::String::Charset::Wordlist
PROTOTYPES: ENABLE
#############################################################################
# 2003-04-26 0.01 Tels
# * first try
# 2003-04-27 0.02 Tels
# * _offset(): return undef on negative indices
# * removed unused global variables (esp. 8K buffer)
# * _file(): read block-wise
##############################################################################
# _file() - set the filename (open the file, ed all the offsets, close it)
# return number of records on success, undef on failure
void
_file(n)
SV* n
INIT:
int c;
long i;
int len;
unsigned char *name;
struct Offsets* offset;
long buffer[BUFFER_SIZE];
unsigned char read_buffer[READ_BUFFER_SIZE];
long buffered, idx, base, ofs;
size_t read;
PPCODE:
name = SvPVX(n); /* get ptr to storage */
len = sizeof (struct Offsets);
ST(0) = newSV(len); /* alloc enough to store one ptr */
SvPOK_on(ST(0));
offset = (struct Offsets*) SvPVX(ST(0)); /* get ptr to storage */
SvCUR_set(ST(0), len); /* and set real length */
offset->file = fopen( name, "r");
if (offset->file == NULL)
{
printf ("Cannot open file %s\n", SvPV_nolen(n));
ST(0) = &PL_sv_undef;
XSRETURN(1);
}
/* printf ("Opening %s\n", name); */
offset->eof = 1;
offset->max_offsets = 0;
New( 42, offset->record_offsets, BUFFER_SIZE, long);
/* printf ("size of one offset: %i\n",sizeof(long)); */
offset->cur_size = BUFFER_SIZE;
buffered = 0;
ofs = 0; /* 0 for first record */
base = 0; /* 0 for first block */
read = fread(
read_buffer, sizeof(unsigned char), READ_BUFFER_SIZE, offset->file);
idx = 0;
while (read != 0)
{
c = read_buffer[idx]; idx++;
# line end?
if (c == 0x0a)
{
buffer[buffered++] = ofs + base; ofs = idx;
if (buffered >= BUFFER_SIZE)
{
if (offset->max_offsets + buffered > offset->cur_size)
{
Renew( offset->record_offsets, offset->cur_size + buffered, long);
offset->cur_size += buffered;
}
/* copy over the buffered records to our offset storage */
for (i = 0; i < buffered; i++)
{
offset->record_offsets[offset->max_offsets++] = buffer[i];
}
buffered = 0;
}
}
if (idx == read)
{
read = fread(
read_buffer, sizeof(unsigned char), READ_BUFFER_SIZE, offset->file);
base += idx; ofs -= idx; idx = 0;
}
}
if (buffered != 0)
{
if (offset->max_offsets + buffered > offset->cur_size)
{
Renew( offset->record_offsets, offset->cur_size + buffered, long);
offset->cur_size += buffered;
}
/* copy over the buffered records to our offset storage */
for (i = 0; i < buffered; i++)
{
offset->record_offsets[offset->max_offsets++] = buffer[i];
}
}
if (c != 0x0a)
{
/* TODO: last character in file was not line end, so we missed the last
record */
}
XSRETURN(1);
void
_free(ptr)
SV* ptr
INIT:
struct Offsets* offset;
CODE:
offset = (struct Offsets*) SvPVX(ptr); /* get ptr to storage */
if (offset != NULL)
{
fclose(offset->file);
Safefree(offset->record_offsets);
}
##############################################################################
# _records(ptr,n), return the number of records
void
_records(ptr)
SV* ptr
INIT:
struct Offsets* offset;
PPCODE:
offset = (struct Offsets*) SvPVX(ptr); /* get ptr to storage */
ST(0) = sv_2mortal( newSVnv( offset->max_offsets ));
XSRETURN(1);
##############################################################################
# _offset(n), return the offset for record n. If the offset was not yet read,
# and we did not see the EOF yet, read all ofsets until n. Returns either
# the offset, or undef for "record does not exists" (e.g. file has fewer
# records than n).
void
_offset(ptr,n)
SV* ptr
SV* n
INIT:
long N;
struct Offsets* offset;
PPCODE:
N = SvNV(n);
offset = (struct Offsets*) SvPVX(ptr); /* get ptr to storage */
/* offset exists? */
if (N >= 0 && N < offset->max_offsets)
{
ST(0) = sv_2mortal( newSVnv( offset->record_offsets[N] ));
}
else
{
/* offset for record N does not exist, and file read completely */
ST(0) = &PL_sv_undef;
}
XSRETURN(1);
##############################################################################
# _record(n), return the record number N
void
_record(ptr,n)
SV* ptr
SV* n
INIT:
unsigned int N;
unsigned int ofs,len;
char* buf;
struct Offsets* offset;
PPCODE:
N = (int)SvNV(n);
offset = (struct Offsets*) SvPVX(ptr); /* get ptr to storage */
if (offset == NULL)
{
printf ("Offset is empty!");
ST(0) = &PL_sv_undef;
XSRETURN(1);
}
//printf ("Fetching record %i (%p)\n",N,offset);
if (N >= offset->max_offsets)
{
# offset (and thus record) does not exist
ST(0) = &PL_sv_undef;
XSRETURN(1);
}
ofs = offset->record_offsets[N];
//printf ("Offset is %i\n",ofs);
/* seek to the position */
fseek (offset->file, ofs, SEEK_SET);
ST(0) = sv_2mortal(newSV(READ_BUFFER_SIZE)); /* alloc scratch buffer */
SvPOK_on(ST(0));
buf = SvPVX(ST(0)); /* get ptr to storage */
//printf ("Buffer %p\n",buf);
fgets(buf, READ_BUFFER_SIZE, offset->file); /* read in the record */
len = strlen(buf);
if (len > 0 && buf[len-1] == 0x0a)
{
len--; /* kill the 0x0a character at end */
buf[len] = 0;
}
if (len > 0 && buf[len-1] == 0x0d)
{
len--; /* kill the 0x0d character at end */
buf[len] = 0;
}
SvCUR_set(ST(0), len); /* and set real length */
XSRETURN(1);
( run in 0.937 second using v1.01-cache-2.11-cpan-71847e10f99 )