1 /******************************************************************************
2 
3     Unicode character case conversion based on GLIB
4 
5     Note: Requires linking against glib-2: "libglib-2.0.so" on Linux
6 
7     TODO: Conversion from UTF-8
8 
9     Copyright:
10         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
11         All rights reserved.
12 
13     License:
14         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
15         Alternatively, this file may be distributed under the terms of the Tango
16         3-Clause BSD License (see LICENSE_BSD.txt for details).
17 
18  ******************************************************************************/
19 
20 module ocean.text.utf.GlibUnicode;
21 
22 
23 import ocean.text.utf.c.glib_unicode: g_unichar_to_utf8,
24                                       g_unichar_tolower,
25                                       g_unichar_toupper,
26                                       g_unichar_totitle;
27 
28 public  import ocean.text.utf.c.glib_unicode: GUtf8Validation;
29 
30 /******************************************************************************
31 
32     GlibUnicode structure
33 
34  ******************************************************************************/
35 
36 struct GlibUnicode
37 {
38 
39     /**************************************************************************
40 
41         Converter function alias definition
42 
43      **************************************************************************/
44 
45     extern (C) alias dchar function ( dchar c ) Converter;
46 
47     /**************************************************************************
48 
49         Converts UTF-32 input to lower case
50 
51         Params:
52             input  = UTF-32 input string
53             output = result output (UTF-32 as input)
54 
55      **************************************************************************/
56 
57     static void toLower ( dchar[] input, ref dchar[] output )
58     {
59         return convert(input, output, &g_unichar_tolower);
60     }
61 
62     /**************************************************************************
63 
64         Converts UTF-32 input to UTF-8 lower case
65 
66         Params:
67             input  = UTF-32 input string
68             output = result output (UTF-8)
69 
70      **************************************************************************/
71 
72     static void toLower ( dchar[] input, ref char[] output )
73     {
74         return convert(input, output, &g_unichar_tolower);
75     }
76 
77     /**************************************************************************
78 
79         Converts UTF-32 content in-place to lower case
80 
81         Params:
82             content = content buffer
83 
84      **************************************************************************/
85 
86     static void toLower ( ref dchar[] content )
87     {
88         return convert(content, &g_unichar_tolower);
89     }
90 
91     /**************************************************************************
92 
93         Converts UTF-32 input to upper case
94 
95         Params:
96             input  = UTF-32 input string
97             output = result output (UTF-32 as input)
98 
99      **************************************************************************/
100 
101     static void toUpper ( dchar[] input, ref dchar[] output )
102     {
103         return convert(input, output, &g_unichar_toupper);
104     }
105 
106     /**************************************************************************
107 
108         Converts UTF-32 input to UTF-8 upper case
109 
110         Params:
111             input  = UTF-32 input string
112             output = result output (UTF-8)
113 
114      **************************************************************************/
115 
116     static void toUpper ( dchar[] input, ref char[] output )
117     {
118         return convert(input, output, &g_unichar_toupper);
119     }
120 
121     /**************************************************************************
122 
123         Converts UTF-32 content in-place to upper case
124 
125         Params:
126             content = content buffer
127 
128      **************************************************************************/
129 
130     static void toUpper ( ref dchar[] content )
131     {
132         return convert(content, &g_unichar_toupper);
133     }
134 
135     /**************************************************************************
136 
137         Converts UTF-32 input to title case
138 
139         Params:
140             input  = UTF-32 input string
141             output = result output (UTF-32 as input)
142 
143      **************************************************************************/
144 
145     static void toTitle ( dchar[] input, ref dchar[] output )
146     {
147         return convert(input, output, &g_unichar_totitle);
148     }
149 
150     /**************************************************************************
151 
152         Converts UTF-32 input to UTF-8 title case
153 
154         Params:
155             input  = UTF-32 input string
156             output = result output (UTF-8)
157 
158      **************************************************************************/
159 
160     static void toTitle ( dchar[] input, ref char[] output )
161     {
162         return convert(input, output, &g_unichar_totitle);
163     }
164 
165     /**************************************************************************
166 
167         Converts UTF-32 content in-place to title case
168 
169         Params:
170             content = content buffer
171 
172      **************************************************************************/
173 
174     static void toTitle ( ref dchar[] content )
175     {
176         return convert(content, &g_unichar_totitle);
177     }
178 
179     /**************************************************************************
180 
181         Converts UTF-32 input using convert_fn
182 
183         Params:
184             input      = UTF-32 input string
185             output     = result output (UTF-32 as input)
186             convert_fn = convert function
187 
188      **************************************************************************/
189 
190     static void convert ( dchar[] input, ref char[] output, scope Converter convert_fn )
191     {
192         char[6] tmp;
193 
194         output.length = 0;
195 
196         foreach ( c; input )
197         {
198             int n = g_unichar_to_utf8(convert_fn(c), tmp.ptr);
199 
200             output ~= tmp[0 .. n].dup;
201         }
202     }
203 
204     /**************************************************************************
205 
206         Converts UTF-32 input using convert_fn
207 
208         Params:
209             input      = UTF-32 input string
210             output     = result output (UTF-8)
211             convert_fn = convert function
212 
213      **************************************************************************/
214 
215     static void convert ( dchar[] input, ref dchar[] output, scope Converter convert_fn )
216     {
217         output.length = input.length;
218 
219         foreach ( i, c; input )
220         {
221             output[i] = convert_fn(c);
222         }
223     }
224 
225     /**************************************************************************
226 
227         Converts UTF-32 content in-place using convert_fn
228 
229         Params:
230             content    = content buffer
231             convert_fn = convert function
232 
233      **************************************************************************/
234 
235     static void convert ( ref dchar[] content, scope Converter convert_fn )
236     {
237         foreach ( ref c; content )
238         {
239             c = convert_fn(c);
240         }
241     }
242 
243     /**************************************************************************
244 
245         Converts UTF-32 input to UTF-8
246 
247         Params:
248             input      = UTF-32 string
249             output     = result output (UTF-8)
250 
251      **************************************************************************/
252 
253     static void toUtf8 ( Char ) ( Char[] input, ref char[] output )
254     {
255         output.length = 0;
256 
257         foreach ( c; input )
258         {
259             output ~= toUtf8(c);
260         }
261     }
262 
263 
264     /**************************************************************************
265 
266         Converts an UTF-32 charachter to UTF-8
267 
268         Params:
269             c = UTF-32 character
270 
271         Returns:
272             UTF-8 character
273 
274      **************************************************************************/
275 
276     static char[] toUtf8 ( Char ) ( Char c )
277     {
278         static if (Char.sizeof == wchar.sizeof)
279             pragma (msg, typeof (this).stringof
280                     ~ ".toUtf8: Only Basic Multilingual Plane supported with "
281                     ~ "type '" ~ Char.stringof ~ "'; use 'dchar' "
282                     ~ "for full Unicode support");
283 
284         char[6] tmp;
285 
286         int n = g_unichar_to_utf8(c, tmp.ptr);
287 
288         return tmp[0 .. n].dup;
289     }
290 
291     /**************************************************************************
292 
293         Converts an UTF-8 character to UTF-32. If the input character is not
294         valid or incomplete, a GUtf8Validation code is returned instead of the
295         character.
296 
297         Params:
298             c = UTF-8 character
299 
300         Returns:
301             UTF-32 character or GUtf8Validation code
302 
303      **************************************************************************/
304 
305     static Char toUtf32 ( Char ) ( char[] c )
306     {
307         static if (Char.sizeof == wchar.sizeof)
308             pragma (msg, typeof (this).stringof
309                     ~ ".toUtf8: Only Basic Multilingual Plane supported with "
310                     ~ "type '" ~ Char.stringof ~ "'; use 'dchar' "
311                     ~ "for full Unicode support");
312 
313         return result = g_utf8_get_char_validated(c.ptr, c.length);
314     }
315 
316 }