Text-VisualWidth
view release on metacpan or search on metacpan
VisualWidth.xs view on Meta::CPAN
(*pos)++; (*byte)++;
if( **pos >= 0xa1 && **pos <= 0xfe ) { (*pos)++; (*byte)++; }
if( **pos >= 0xa1 && **pos <= 0xfe ){
(*pos)++;
(*byte)++;
return 2;
}
return 1;
}else if( **pos >= 0xa1 && **pos <= 0xfe ){
(*pos)++;
(*byte)++;
if( **pos >= 0xa1 && **pos <= 0xfe ){
(*pos)++;
(*byte)++;
return 2;
}
return 1;
}
(*pos)++;
(*byte)++;
return 1;
}
SV* get_visualwidth_eucjp( SV* str ){
unsigned int length = 0;
int byte = 0;
const unsigned char* pos = (const unsigned char*)SvPV_nolen(str);
const unsigned char** posstr = &pos;
while( **posstr ){
length += count_single_char_eucjp( posstr, &byte );
}
return newSViv(length);
}
SV* trim_visualwidth_eucjp( SV* str, SV* length_sv ){
unsigned int length = SvIV(length_sv);
int byte = 0;
unsigned int byte_length = 0;
unsigned int view_length = 0;
int view_char = 0;
int continue_flg = 1;
unsigned char* default_pos = (unsigned char *)SvPV_nolen(str);
unsigned char* pos = default_pos;
unsigned char** posstr = &pos;
while( continue_flg ){
view_char = count_single_char_eucjp( (const unsigned char **)posstr, &byte );
if( byte && ( view_char + view_length ) <= length ){
view_length += view_char;
byte_length += byte;
}else{
continue_flg = 0;
}
}
return newSVpvn((const char *)default_pos , byte_length);
}
int count_single_char_utf8( const unsigned char** pos, int* byte ){
*byte = 0;
if( **pos == 0 ) return 0;
if( **pos == 0xef && *((*pos)+1) == 0xbb && *((*pos)+2) == 0xbf ){
// BOM
(*pos)+= 3;
(*byte)+= 3;
// printf("BOM\n");
return 0;
} else if( ( **pos & 0xe0 ) == 0xc0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 ) ){
(*pos)+= 2;
(*byte)+= 2;
// printf("2byte\n");
return 1;
} else if( ( **pos & 0xf0 ) == 0xe0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 ) && ( ( *((*pos)+2) & 0xc0 ) == 0x80 ) ){
if( **pos == 0xef && ( ( *((*pos)+1) == 0xbd && *((*pos)+2) >= 0xa1 && *((*pos)+2) <= 0xbf )
|| ( *((*pos)+1) == 0xbe && *((*pos)+2) >= 0x80 && *((*pos)+2) <= 0x9f ) ) ){
(*pos)+= 3;
(*byte)+= 3;
// printf("HALFWIDTH\n");
return 1;
}
(*pos)+= 3;
(*byte)+= 3;
// printf("FULLWIDTH\n");
return 2;
} else if( ( **pos & 0xf8 ) == 0xf0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 )
&& ( ( *((*pos)+2) & 0xc0 ) == 0x80 ) && ( ( *((*pos)+3) & 0xc0 ) == 0x80 )){
(*pos)+= 4;
(*byte)+= 4;
// printf("4byte\n");
return 2;
}
(*pos)++;
(*byte)++;
// printf("SINGLE\n");
return 1;
}
SV* get_visualwidth_utf8( SV* str ){
unsigned int length = 0;
int byte = 0;
const unsigned char* pos = (const unsigned char*)SvPV_nolen(str);
const unsigned char** posstr = &pos;
while( **posstr ){
length += count_single_char_utf8( posstr, &byte );
}
return newSViv(length);
}
SV* trim_visualwidth_utf8( SV* str, SV* length_sv ){
unsigned int length = SvIV(length_sv);
int byte = 0;
unsigned int byte_length = 0;
unsigned int view_length = 0;
int view_char = 0;
int continue_flg = 1;
unsigned char* default_pos = (unsigned char *)SvPV_nolen(str);
unsigned char* pos = default_pos;
unsigned char** posstr = &pos;
while( continue_flg ){
view_char = count_single_char_utf8( (const unsigned char **)posstr, &byte );
if( byte && ( view_char + view_length ) <= length ){
view_length += view_char;
byte_length += byte;
}else{
continue_flg = 0;
}
( run in 0.606 second using v1.01-cache-2.11-cpan-39bf76dae61 )