GDevelop Core
Core library for developing platforms and tools compatible with GDevelop.
utf8proc.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 
52 #ifndef UTF8PROC_H
53 #define UTF8PROC_H
54 
70 #define UTF8PROC_VERSION_MAJOR 1
72 #define UTF8PROC_VERSION_MINOR 3
74 #define UTF8PROC_VERSION_PATCH 0
77 #include <stdlib.h>
78 #include <sys/types.h>
79 #ifdef _MSC_VER
80 typedef signed char utf8proc_int8_t;
81 typedef unsigned char utf8proc_uint8_t;
82 typedef short utf8proc_int16_t;
83 typedef unsigned short utf8proc_uint16_t;
84 typedef int utf8proc_int32_t;
85 typedef unsigned int utf8proc_uint32_t;
86 # ifdef _WIN64
87 typedef __int64 utf8proc_ssize_t;
88 typedef unsigned __int64 utf8proc_size_t;
89 # else
90 typedef int utf8proc_ssize_t;
91 typedef unsigned int utf8proc_size_t;
92 # endif
93 # ifndef __cplusplus
94 typedef unsigned char utf8proc_bool;
95 enum {false, true};
96 # else
97 typedef bool utf8proc_bool;
98 # endif
99 #else
100 # include <stdbool.h>
101 # include <inttypes.h>
102 typedef int8_t utf8proc_int8_t;
103 typedef uint8_t utf8proc_uint8_t;
104 typedef int16_t utf8proc_int16_t;
105 typedef uint16_t utf8proc_uint16_t;
106 typedef int32_t utf8proc_int32_t;
107 typedef uint32_t utf8proc_uint32_t;
108 typedef size_t utf8proc_size_t;
109 typedef ssize_t utf8proc_ssize_t;
110 typedef bool utf8proc_bool;
111 #endif
112 #include <limits.h>
113 
114 //PATCH FOR GDevelop
115 
116 /*#ifdef _WIN32
117 # ifdef UTF8PROC_EXPORTS
118 # define UTF8PROC_DLLEXPORT __declspec(dllexport)
119 # else
120 # define UTF8PROC_DLLEXPORT __declspec(dllimport)
121 # endif
122 #elif __GNUC__ >= 4
123 # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
124 #else
125 # define UTF8PROC_DLLEXPORT
126 #endif*/
127 
128 #define UTF8PROC_DLLEXPORT GD_CORE_API
129 
130 //END OF PATCH FOR GDevelop
131 
132 #ifdef __cplusplus
133 extern "C" {
134 #endif
135 
136 #ifndef SSIZE_MAX
137 #define SSIZE_MAX ((size_t)SIZE_MAX/2)
138 #endif
139 
143 typedef enum {
147  UTF8PROC_STABLE = (1<<1),
149  UTF8PROC_COMPAT = (1<<2),
155  UTF8PROC_IGNORE = (1<<5),
163  UTF8PROC_NLF2LS = (1<<7),
169  UTF8PROC_NLF2PS = (1<<8),
184  UTF8PROC_CASEFOLD = (1<<10),
197  UTF8PROC_LUMP = (1<<12),
206 
212 #define UTF8PROC_ERROR_NOMEM -1
214 #define UTF8PROC_ERROR_OVERFLOW -2
216 #define UTF8PROC_ERROR_INVALIDUTF8 -3
218 #define UTF8PROC_ERROR_NOTASSIGNED -4
220 #define UTF8PROC_ERROR_INVALIDOPTS -5
223 /* @name Types */
224 
226 typedef utf8proc_int16_t utf8proc_propval_t;
227 
229 typedef struct utf8proc_property_struct {
235  utf8proc_propval_t combining_class;
246  const utf8proc_int32_t *decomp_mapping;
247  const utf8proc_int32_t *casefold_mapping;
248  utf8proc_int32_t uppercase_mapping;
249  utf8proc_int32_t lowercase_mapping;
250  utf8proc_int32_t titlecase_mapping;
251  utf8proc_int32_t comb1st_index;
252  utf8proc_int32_t comb2nd_index;
253  unsigned bidi_mirrored:1;
254  unsigned comp_exclusion:1;
261  unsigned ignorable:1;
262  unsigned control_boundary:1;
267  unsigned boundclass:4;
269  unsigned charwidth:2;
271 
273 typedef enum {
305 
307 typedef enum {
332 
334 typedef enum {
352 
354 typedef enum {
369 
374 UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
375 
381 UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
382 
387 UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
388 
399 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
400 
407 UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
408 
418 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
419 
432 UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);
433 
461 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
462  utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
463  utf8proc_option_t options, int *last_boundclass
464 );
465 
482 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
483  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
484  utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
485 );
486 
512 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
513 
518 UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
519 
520 
526 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
527 
533 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
534 
543 UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint);
544 
549 UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);
550 
555 UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint);
556 
575 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
576  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
577 );
578 
588 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
590 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
592 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
594 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
597 #ifdef __cplusplus
598 }
599 #endif
600 
601 #endif
Definition: utf8proc.h:229
unsigned boundclass
Definition: utf8proc.h:267
unsigned charwidth
Definition: utf8proc.h:269
unsigned ignorable
Definition: utf8proc.h:261
utf8proc_propval_t category
Definition: utf8proc.h:234
utf8proc_propval_t bidi_class
Definition: utf8proc.h:240
utf8proc_propval_t decomp_type
Definition: utf8proc.h:245
UTF8PROC_DLLEXPORT utf8proc_uint8_t * utf8proc_NFD(const utf8proc_uint8_t *str)
Definition: utf8proc.c:615
UTF8PROC_DLLEXPORT const char * utf8proc_category_string(utf8proc_int32_t codepoint)
Definition: utf8proc.c:289
utf8proc_option_t
Definition: utf8proc.h:143
@ UTF8PROC_NULLTERM
Definition: utf8proc.h:145
@ UTF8PROC_LUMP
Definition: utf8proc.h:197
@ UTF8PROC_REJECTNA
Definition: utf8proc.h:157
@ UTF8PROC_NLF2LF
Definition: utf8proc.h:171
@ UTF8PROC_STRIPCC
Definition: utf8proc.h:179
@ UTF8PROC_NLF2LS
Definition: utf8proc.h:163
@ UTF8PROC_COMPOSE
Definition: utf8proc.h:151
@ UTF8PROC_CASEFOLD
Definition: utf8proc.h:184
@ UTF8PROC_STABLE
Definition: utf8proc.h:147
@ UTF8PROC_DECOMPOSE
Definition: utf8proc.h:153
@ UTF8PROC_IGNORE
Definition: utf8proc.h:155
@ UTF8PROC_NLF2PS
Definition: utf8proc.h:169
@ UTF8PROC_CHARBOUND
Definition: utf8proc.h:189
@ UTF8PROC_COMPAT
Definition: utf8proc.h:149
@ UTF8PROC_STRIPMARK
Definition: utf8proc.h:204
UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256]
Definition: utf8proc.c:47
UTF8PROC_DLLEXPORT utf8proc_uint8_t * utf8proc_NFC(const utf8proc_uint8_t *str)
Definition: utf8proc.c:622
UTF8PROC_DLLEXPORT utf8proc_uint8_t * utf8proc_NFKD(const utf8proc_uint8_t *str)
Definition: utf8proc.c:629
UTF8PROC_DLLEXPORT const char * utf8proc_errmsg(utf8proc_ssize_t errcode)
Definition: utf8proc.c:94
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst)
Definition: utf8proc.c:164
UTF8PROC_DLLEXPORT const char * utf8proc_version(void)
Definition: utf8proc.c:90
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options)
Definition: utf8proc.c:402
utf8proc_int16_t utf8proc_propval_t
Definition: utf8proc.h:226
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref)
Definition: utf8proc.c:112
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
Definition: utf8proc.c:267
struct utf8proc_property_struct utf8proc_property_t
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options)
Definition: utf8proc.c:464
utf8proc_decomp_type_t
Definition: utf8proc.h:334
@ UTF8PROC_DECOMP_TYPE_NOBREAK
Definition: utf8proc.h:336
@ UTF8PROC_DECOMP_TYPE_SUB
Definition: utf8proc.h:343
@ UTF8PROC_DECOMP_TYPE_INITIAL
Definition: utf8proc.h:337
@ UTF8PROC_DECOMP_TYPE_WIDE
Definition: utf8proc.h:345
@ UTF8PROC_DECOMP_TYPE_SMALL
Definition: utf8proc.h:347
@ UTF8PROC_DECOMP_TYPE_FONT
Definition: utf8proc.h:335
@ UTF8PROC_DECOMP_TYPE_CIRCLE
Definition: utf8proc.h:341
@ UTF8PROC_DECOMP_TYPE_ISOLATED
Definition: utf8proc.h:340
@ UTF8PROC_DECOMP_TYPE_NARROW
Definition: utf8proc.h:346
@ UTF8PROC_DECOMP_TYPE_SUPER
Definition: utf8proc.h:342
@ UTF8PROC_DECOMP_TYPE_FRACTION
Definition: utf8proc.h:349
@ UTF8PROC_DECOMP_TYPE_FINAL
Definition: utf8proc.h:339
@ UTF8PROC_DECOMP_TYPE_VERTICAL
Definition: utf8proc.h:344
@ UTF8PROC_DECOMP_TYPE_COMPAT
Definition: utf8proc.h:350
@ UTF8PROC_DECOMP_TYPE_SQUARE
Definition: utf8proc.h:348
@ UTF8PROC_DECOMP_TYPE_MEDIAL
Definition: utf8proc.h:338
utf8proc_boundclass_t
Definition: utf8proc.h:354
@ UTF8PROC_BOUNDCLASS_V
Definition: utf8proc.h:362
@ UTF8PROC_BOUNDCLASS_OTHER
Definition: utf8proc.h:356
@ UTF8PROC_BOUNDCLASS_START
Definition: utf8proc.h:355
@ UTF8PROC_BOUNDCLASS_EXTEND
Definition: utf8proc.h:360
@ UTF8PROC_BOUNDCLASS_CONTROL
Definition: utf8proc.h:359
@ UTF8PROC_BOUNDCLASS_SPACINGMARK
Definition: utf8proc.h:367
@ UTF8PROC_BOUNDCLASS_L
Definition: utf8proc.h:361
@ UTF8PROC_BOUNDCLASS_T
Definition: utf8proc.h:363
@ UTF8PROC_BOUNDCLASS_LV
Definition: utf8proc.h:364
@ UTF8PROC_BOUNDCLASS_LF
Definition: utf8proc.h:358
@ UTF8PROC_BOUNDCLASS_CR
Definition: utf8proc.h:357
@ UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR
Definition: utf8proc.h:366
@ UTF8PROC_BOUNDCLASS_LVT
Definition: utf8proc.h:365
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint)
Definition: utf8proc.c:160
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options)
Definition: utf8proc.c:586
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
Definition: utf8proc.c:273
UTF8PROC_DLLEXPORT utf8proc_uint8_t * utf8proc_NFKC(const utf8proc_uint8_t *str)
Definition: utf8proc.c:636
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass)
Definition: utf8proc.c:298
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2)
Definition: utf8proc.c:262
UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint)
Definition: utf8proc.c:281
utf8proc_category_t
Definition: utf8proc.h:273
@ UTF8PROC_CATEGORY_SO
Definition: utf8proc.h:296
@ UTF8PROC_CATEGORY_ME
Definition: utf8proc.h:282
@ UTF8PROC_CATEGORY_SK
Definition: utf8proc.h:295
@ UTF8PROC_CATEGORY_PF
Definition: utf8proc.h:291
@ UTF8PROC_CATEGORY_MN
Definition: utf8proc.h:280
@ UTF8PROC_CATEGORY_CN
Definition: utf8proc.h:274
@ UTF8PROC_CATEGORY_SC
Definition: utf8proc.h:294
@ UTF8PROC_CATEGORY_ND
Definition: utf8proc.h:283
@ UTF8PROC_CATEGORY_LT
Definition: utf8proc.h:277
@ UTF8PROC_CATEGORY_PC
Definition: utf8proc.h:286
@ UTF8PROC_CATEGORY_NO
Definition: utf8proc.h:285
@ UTF8PROC_CATEGORY_MC
Definition: utf8proc.h:281
@ UTF8PROC_CATEGORY_ZS
Definition: utf8proc.h:297
@ UTF8PROC_CATEGORY_CF
Definition: utf8proc.h:301
@ UTF8PROC_CATEGORY_LU
Definition: utf8proc.h:275
@ UTF8PROC_CATEGORY_ZL
Definition: utf8proc.h:298
@ UTF8PROC_CATEGORY_NL
Definition: utf8proc.h:284
@ UTF8PROC_CATEGORY_SM
Definition: utf8proc.h:293
@ UTF8PROC_CATEGORY_ZP
Definition: utf8proc.h:299
@ UTF8PROC_CATEGORY_PI
Definition: utf8proc.h:290
@ UTF8PROC_CATEGORY_PO
Definition: utf8proc.h:292
@ UTF8PROC_CATEGORY_PS
Definition: utf8proc.h:288
@ UTF8PROC_CATEGORY_CS
Definition: utf8proc.h:302
@ UTF8PROC_CATEGORY_PD
Definition: utf8proc.h:287
@ UTF8PROC_CATEGORY_LM
Definition: utf8proc.h:278
@ UTF8PROC_CATEGORY_PE
Definition: utf8proc.h:289
@ UTF8PROC_CATEGORY_LL
Definition: utf8proc.h:276
@ UTF8PROC_CATEGORY_LO
Definition: utf8proc.h:279
@ UTF8PROC_CATEGORY_CO
Definition: utf8proc.h:303
@ UTF8PROC_CATEGORY_CC
Definition: utf8proc.h:300
UTF8PROC_DLLEXPORT const utf8proc_property_t * utf8proc_get_property(utf8proc_int32_t codepoint)
Definition: utf8proc.c:231
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint)
Definition: utf8proc.c:285
utf8proc_bidi_class_t
Definition: utf8proc.h:307
@ UTF8PROC_BIDI_CLASS_ES
Definition: utf8proc.h:317
@ UTF8PROC_BIDI_CLASS_RLE
Definition: utf8proc.h:313
@ UTF8PROC_BIDI_CLASS_L
Definition: utf8proc.h:308
@ UTF8PROC_BIDI_CLASS_AN
Definition: utf8proc.h:319
@ UTF8PROC_BIDI_CLASS_CS
Definition: utf8proc.h:320
@ UTF8PROC_BIDI_CLASS_B
Definition: utf8proc.h:323
@ UTF8PROC_BIDI_CLASS_WS
Definition: utf8proc.h:325
@ UTF8PROC_BIDI_CLASS_EN
Definition: utf8proc.h:316
@ UTF8PROC_BIDI_CLASS_LRI
Definition: utf8proc.h:327
@ UTF8PROC_BIDI_CLASS_ON
Definition: utf8proc.h:326
@ UTF8PROC_BIDI_CLASS_FSI
Definition: utf8proc.h:329
@ UTF8PROC_BIDI_CLASS_PDI
Definition: utf8proc.h:330
@ UTF8PROC_BIDI_CLASS_RLO
Definition: utf8proc.h:314
@ UTF8PROC_BIDI_CLASS_LRO
Definition: utf8proc.h:310
@ UTF8PROC_BIDI_CLASS_ET
Definition: utf8proc.h:318
@ UTF8PROC_BIDI_CLASS_NSM
Definition: utf8proc.h:321
@ UTF8PROC_BIDI_CLASS_LRE
Definition: utf8proc.h:309
@ UTF8PROC_BIDI_CLASS_RLI
Definition: utf8proc.h:328
@ UTF8PROC_BIDI_CLASS_S
Definition: utf8proc.h:324
@ UTF8PROC_BIDI_CLASS_R
Definition: utf8proc.h:311
@ UTF8PROC_BIDI_CLASS_BN
Definition: utf8proc.h:322
@ UTF8PROC_BIDI_CLASS_AL
Definition: utf8proc.h:312
@ UTF8PROC_BIDI_CLASS_PDF
Definition: utf8proc.h:315