1 /******************************************************************************
2 
3     Bindings to GLIB unicode manipulation functions.
4 
5     Documentation:
6 
7         http://www.gtk.org/api/2.6/glib/glib-Unicode-Manipulation.html
8 
9     Note: Requires linking against -lglib-2.0
10 
11     Copyright:
12         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
13         All rights reserved.
14 
15     License:
16         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
17         Alternatively, this file may be distributed under the terms of the Tango
18         3-Clause BSD License (see LICENSE_BSD.txt for details).
19 
20         Bear in mind this module provides bindings to an external library that
21         has its own license, which might be more restrictive. Please check the
22         external library license to see which conditions apply for linking.
23 
24  /*****************************************************************************/
25 
26 module ocean.text.utf.c.glib_unicode;
27 
28 import ocean.meta.types.Qualifiers;
29 
30 enum GUnicodeType
31 {
32     G_UNICODE_CONTROL,
33     G_UNICODE_FORMAT,
34     G_UNICODE_UNASSIGNED,
35     G_UNICODE_PRIVATE_USE,
36     G_UNICODE_SURROGATE,
37     G_UNICODE_LOWERCASE_LETTER,
38     G_UNICODE_MODIFIER_LETTER,
39     G_UNICODE_OTHER_LETTER,
40     G_UNICODE_TITLECASE_LETTER,
41     G_UNICODE_UPPERCASE_LETTER,
42     G_UNICODE_COMBINING_MARK,
43     G_UNICODE_ENCLOSING_MARK,
44     G_UNICODE_NON_SPACING_MARK,
45     G_UNICODE_DECIMAL_NUMBER,
46     G_UNICODE_LETTER_NUMBER,
47     G_UNICODE_OTHER_NUMBER,
48     G_UNICODE_CONNECT_PUNCTUATION,
49     G_UNICODE_DASH_PUNCTUATION,
50     G_UNICODE_CLOSE_PUNCTUATION,
51     G_UNICODE_FINAL_PUNCTUATION,
52     G_UNICODE_INITIAL_PUNCTUATION,
53     G_UNICODE_OTHER_PUNCTUATION,
54     G_UNICODE_OPEN_PUNCTUATION,
55     G_UNICODE_CURRENCY_SYMBOL,
56     G_UNICODE_MODIFIER_SYMBOL,
57     G_UNICODE_MATH_SYMBOL,
58     G_UNICODE_OTHER_SYMBOL,
59     G_UNICODE_LINE_SEPARATOR,
60     G_UNICODE_PARAGRAPH_SEPARATOR,
61     G_UNICODE_SPACE_SEPARATOR
62 }
63 
64 enum GUnicodeBreakType
65 {
66     G_UNICODE_BREAK_MANDATORY,
67     G_UNICODE_BREAK_CARRIAGE_RETURN,
68     G_UNICODE_BREAK_LINE_FEED,
69     G_UNICODE_BREAK_COMBINING_MARK,
70     G_UNICODE_BREAK_SURROGATE,
71     G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
72     G_UNICODE_BREAK_INSEPARABLE,
73     G_UNICODE_BREAK_NON_BREAKING_GLUE,
74     G_UNICODE_BREAK_CONTINGENT,
75     G_UNICODE_BREAK_SPACE,
76     G_UNICODE_BREAK_AFTER,
77     G_UNICODE_BREAK_BEFORE,
78     G_UNICODE_BREAK_BEFORE_AND_AFTER,
79     G_UNICODE_BREAK_HYPHEN,
80     G_UNICODE_BREAK_NON_STARTER,
81     G_UNICODE_BREAK_OPEN_PUNCTUATION,
82     G_UNICODE_BREAK_CLOSE_PUNCTUATION,
83     G_UNICODE_BREAK_QUOTATION,
84     G_UNICODE_BREAK_EXCLAMATION,
85     G_UNICODE_BREAK_IDEOGRAPHIC,
86     G_UNICODE_BREAK_NUMERIC,
87     G_UNICODE_BREAK_INFIX_SEPARATOR,
88     G_UNICODE_BREAK_SYMBOL,
89     G_UNICODE_BREAK_ALPHABETIC,
90     G_UNICODE_BREAK_PREFIX,
91     G_UNICODE_BREAK_POSTFIX,
92     G_UNICODE_BREAK_COMPLEX_CONTEXT,
93     G_UNICODE_BREAK_AMBIGUOUS,
94     G_UNICODE_BREAK_UNKNOWN,
95     G_UNICODE_BREAK_NEXT_LINE,
96     G_UNICODE_BREAK_WORD_JOINER
97 };
98 
99 enum GNormalizeMode
100 {
101     G_NORMALIZE_DEFAULT,
102     G_NORMALIZE_NFD             = G_NORMALIZE_DEFAULT,
103     G_NORMALIZE_DEFAULT_COMPOSE,
104     G_NORMALIZE_NFC             = G_NORMALIZE_DEFAULT_COMPOSE,
105     G_NORMALIZE_ALL,
106     G_NORMALIZE_NFKD            = G_NORMALIZE_ALL,
107     G_NORMALIZE_ALL_COMPOSE,
108     G_NORMALIZE_NFKC            = G_NORMALIZE_ALL_COMPOSE
109 }
110 
111 enum GUtf8Validation: dchar
112 {
113     Invalid    = cast (dchar) -1,
114     Incomplete = cast (dchar) -2
115 }
116 
117 extern (C) static:
118 
119 struct GError
120 {
121     uint   domain;
122     int    code;
123     char*  message;
124 }
125 
126 
127 bool                g_unichar_validate      (dchar c);
128 bool                g_unichar_isalnum       (dchar c);
129 bool                g_unichar_isalpha       (dchar c);
130 bool                g_unichar_iscntrl       (dchar c);
131 bool                g_unichar_isdigit       (dchar c);
132 bool                g_unichar_isgraph       (dchar c);
133 bool                g_unichar_islower       (dchar c);
134 bool                g_unichar_isprint       (dchar c);
135 bool                g_unichar_ispunct       (dchar c);
136 bool                g_unichar_isspace       (dchar c);
137 bool                g_unichar_isupper       (dchar c);
138 bool                g_unichar_isxdigit      (dchar c);
139 bool                g_unichar_istitle       (dchar c);
140 bool                g_unichar_isdefined     (dchar c);
141 bool                g_unichar_iswide        (dchar c);
142 dchar               g_unichar_toupper       (dchar c);
143 dchar               g_unichar_tolower       (dchar c);
144 dchar               g_unichar_totitle       (dchar c);
145 int                 g_unichar_digit_value   (dchar c);
146 int                 g_unichar_xdigit_value  (dchar c);
147 GUnicodeType        g_unichar_type          (dchar c);
148 GUnicodeBreakType   g_unichar_break_type    (dchar c);
149 
150 void        g_unicode_canonical_ordering        (dchar* str, size_t len);
151 dchar*      g_unicode_canonical_decomposition   (dchar c, size_t* result_len);
152 bool        g_unichar_get_mirror_char           (dchar c, dchar* mirrored_ch);
153 //alias       p                                   g_utf8_next_char;
154 dchar       g_utf8_get_char             (char* p);
155 dchar       g_utf8_get_char_validated   (char* p,   ptrdiff_t max_len);
156 
157 char*       g_utf8_offset_to_pointer    (char* str, long offset);
158 long        g_utf8_pointer_to_offset    (char* str, char* pos);
159 char*       g_utf8_prev_char            (char* p);
160 char*       g_utf8_find_next_char       (char* p,   char* end);
161 char*       g_utf8_find_prev_char       (char* str, char* p);
162 long        g_utf8_strlen               (const(char)* p,   ptrdiff_t max);
163 char*       g_utf8_strncpy              (char* dest, char* src, size_t n);
164 char*       g_utf8_strchr               (char* p,   ptrdiff_t len, dchar c);
165 char*       g_utf8_strrchr              (char* p,   ptrdiff_t len, dchar c);
166 char*       g_utf8_strreverse           (char* str, ptrdiff_t len);
167 bool        g_utf8_validate             (const(char)* str, ptrdiff_t max_len, char** end);
168 
169 char*       g_utf8_strup        (char* str, ptrdiff_t len);
170 char*       g_utf8_strdown      (char* str, ptrdiff_t len);
171 char*       g_utf8_casefold     (char* str, ptrdiff_t len);
172 char*       g_utf8_normalize    (char* str, ptrdiff_t len, GNormalizeMode mode);
173 
174 char*       g_utf8_collate_key  (char* str, ptrdiff_t len);
175 int         g_utf8_collate      (char* str1, char* str2);
176 
177 wchar*      g_utf8_to_utf16     (char* str,  long len, long* items_read, long* items_written, GError** error);
178 dchar*      g_utf8_to_ucs4      (char* str,  long len, long* items_read, long* items_written, GError** error);
179 dchar*      g_utf8_to_ucs4_fast (char* str,  long len, long* items_written);
180 dchar*      g_utf16_to_ucs4     (wchar* str, long len, long* items_read, long* items_written, GError** error);
181 char*       g_utf16_to_utf8     (wchar* str, long len, long* items_read, long* items_written, GError** error);
182 wchar*      g_ucs4_to_utf16     (dchar* str, long len, long* items_read, long* items_written, GError** error);
183 char*       g_ucs4_to_utf8      (dchar* str, long len, long* items_read, long* items_written, GError** error);
184 
185 int         g_unichar_to_utf8   (dchar c, char *outbuf);
186 
187 
188 
189 alias       g_ucs4_to_utf8   g_to_utf8;
190 alias       g_utf16_to_utf8  g_to_utf8;
191 
192 alias       g_ucs4_to_utf16  g_to_utf16;
193 alias       g_utf8_to_utf16  g_to_utf16;
194 
195 alias       g_utf8_to_ucs4   g_to_ucs4;
196 alias       g_utf16_to_ucs4  g_to_ucs4;