Note: This project is an experimental library. It is not designed for production use, and there may be bugs, limitations, or incomplete features. Use at your own discretion, and feel free to collaborate
Mojibake is a low-level Unicode library written in C99.
Initialize the library. Not needed to be called
bool mjb_initialize(void);
Initialize the library with custom values. Not needed to be called
bool mjb_initialize_v2(mjb_alloc_fn alloc_fn, mjb_realloc_fn realloc_fn, mjb_free_fn free_fn);
Shutdown the library. Not needed to be called
void mjb_shutdown(void);
Allocate and zero memory
void *mjb_alloc(size_t size);
Reallocate memory
void *mjb_realloc(void *ptr, size_t new_size);
Free memory
void mjb_free(void *ptr);
Output the current library version (MJB_VERSION)
char *mjb_version(void);
Output the current library version number (MJB_VERSION_NUMBER)
unsigned int mjb_version_number(void);
Output the current supported unicode version (MJB_UNICODE_VERSION)
char *mjb_unicode_version(void);
Return true if the plane is valid
bool mjb_plane_is_valid(mjb_plane plane);
Return the name of a plane, NULL if the place specified is not valid
const char *mjb_plane_name(mjb_plane plane, bool abbreviation);
Return the string encoding (the most probable)
mjb_encoding mjb_string_encoding(const char *buffer, size_t size);
Return true if the string is encoded in UTF-8
bool mjb_string_is_utf8(const char *buffer, size_t size);
Return nexy codepoint in the string
mjb_codepoint mjb_string_next_codepoint(const char *buffer, size_t size, size_t *next);
Return true if the string is encoded in ASCII
bool mjb_string_is_ascii(const char *buffer, size_t size);
Encode a codepoint to a string
unsigned int mjb_codepoint_encode(mjb_codepoint codepoint, char *buffer, size_t size, mjb_encoding encoding);
Return true if the codepoint is valid
bool mjb_codepoint_is_valid(mjb_codepoint codepoint);
Return the codepoint character
bool mjb_codepoint_character(mjb_character *character, mjb_codepoint codepoint);
Return hangul syllable name
bool mjb_hangul_syllable_name(mjb_codepoint codepoint, char *buffer, size_t size);
Hangul syllable decomposition
bool mjb_hangul_syllable_decomposition(mjb_codepoint codepoint, mjb_codepoint *codepoints);
Return if the codepoint is an hangul syllable
bool mjb_codepoint_is_hangul_syllable(mjb_codepoint codepoint);
Return if the codepoint is CJK ideograph
bool mjb_codepoint_is_cjk_ideograph(mjb_codepoint codepoint);
Return true if the codepoint has the category
bool mjb_codepoint_category_is(mjb_codepoint codepoint, mjb_category category);
Return true if the codepoint has the block
bool mjb_codepoint_block_is(mjb_codepoint codepoint, mjb_block block);
Return true if the codepoint is graphic
bool mjb_codepoint_is_graphic(mjb_codepoint codepoint);
Return true if the codepoint is combining
bool mjb_codepoint_is_combining(mjb_codepoint codepoint);
Return true if the category is combining
bool mjb_category_is_combining(mjb_category category);
Return the codepoint lowercase codepoint
mjb_codepoint mjb_codepoint_to_lowercase(mjb_codepoint codepoint);
Return the codepoint uppercase codepoint
mjb_codepoint mjb_codepoint_to_uppercase(mjb_codepoint codepoint);
Return the codepoint titlecase codepoint
mjb_codepoint mjb_codepoint_to_titlecase(mjb_codepoint codepoint);
Normalize a string
char *mjb_normalize(char *buffer, size_t size, size_t *output_size, mjb_encoding encoding, mjb_normalization form);
Sort
void mjb_sort(mjb_character arr[], size_t size);