1 /****************************************************************************** 2 3 Unicode character case conversion based on GLIB 4 5 Note: Requires linking against glib-2: "libglib-2.0.so" on Linux 6 7 TODO: Conversion from UTF-8 8 9 Copyright: 10 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 11 All rights reserved. 12 13 License: 14 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 15 Alternatively, this file may be distributed under the terms of the Tango 16 3-Clause BSD License (see LICENSE_BSD.txt for details). 17 18 ******************************************************************************/ 19 20 module ocean.text.utf.GlibUnicode; 21 22 23 import ocean.text.utf.c.glib_unicode: g_unichar_to_utf8, 24 g_unichar_tolower, 25 g_unichar_toupper, 26 g_unichar_totitle; 27 28 public import ocean.text.utf.c.glib_unicode: GUtf8Validation; 29 30 /****************************************************************************** 31 32 GlibUnicode structure 33 34 ******************************************************************************/ 35 36 struct GlibUnicode 37 { 38 39 /************************************************************************** 40 41 Converter function alias definition 42 43 **************************************************************************/ 44 45 extern (C) alias dchar function ( dchar c ) Converter; 46 47 /************************************************************************** 48 49 Converts UTF-32 input to lower case 50 51 Params: 52 input = UTF-32 input string 53 output = result output (UTF-32 as input) 54 55 **************************************************************************/ 56 57 static void toLower ( dchar[] input, ref dchar[] output ) 58 { 59 return convert(input, output, &g_unichar_tolower); 60 } 61 62 /************************************************************************** 63 64 Converts UTF-32 input to UTF-8 lower case 65 66 Params: 67 input = UTF-32 input string 68 output = result output (UTF-8) 69 70 **************************************************************************/ 71 72 static void toLower ( dchar[] input, ref char[] output ) 73 { 74 return convert(input, output, &g_unichar_tolower); 75 } 76 77 /************************************************************************** 78 79 Converts UTF-32 content in-place to lower case 80 81 Params: 82 content = content buffer 83 84 **************************************************************************/ 85 86 static void toLower ( ref dchar[] content ) 87 { 88 return convert(content, &g_unichar_tolower); 89 } 90 91 /************************************************************************** 92 93 Converts UTF-32 input to upper case 94 95 Params: 96 input = UTF-32 input string 97 output = result output (UTF-32 as input) 98 99 **************************************************************************/ 100 101 static void toUpper ( dchar[] input, ref dchar[] output ) 102 { 103 return convert(input, output, &g_unichar_toupper); 104 } 105 106 /************************************************************************** 107 108 Converts UTF-32 input to UTF-8 upper case 109 110 Params: 111 input = UTF-32 input string 112 output = result output (UTF-8) 113 114 **************************************************************************/ 115 116 static void toUpper ( dchar[] input, ref char[] output ) 117 { 118 return convert(input, output, &g_unichar_toupper); 119 } 120 121 /************************************************************************** 122 123 Converts UTF-32 content in-place to upper case 124 125 Params: 126 content = content buffer 127 128 **************************************************************************/ 129 130 static void toUpper ( ref dchar[] content ) 131 { 132 return convert(content, &g_unichar_toupper); 133 } 134 135 /************************************************************************** 136 137 Converts UTF-32 input to title case 138 139 Params: 140 input = UTF-32 input string 141 output = result output (UTF-32 as input) 142 143 **************************************************************************/ 144 145 static void toTitle ( dchar[] input, ref dchar[] output ) 146 { 147 return convert(input, output, &g_unichar_totitle); 148 } 149 150 /************************************************************************** 151 152 Converts UTF-32 input to UTF-8 title case 153 154 Params: 155 input = UTF-32 input string 156 output = result output (UTF-8) 157 158 **************************************************************************/ 159 160 static void toTitle ( dchar[] input, ref char[] output ) 161 { 162 return convert(input, output, &g_unichar_totitle); 163 } 164 165 /************************************************************************** 166 167 Converts UTF-32 content in-place to title case 168 169 Params: 170 content = content buffer 171 172 **************************************************************************/ 173 174 static void toTitle ( ref dchar[] content ) 175 { 176 return convert(content, &g_unichar_totitle); 177 } 178 179 /************************************************************************** 180 181 Converts UTF-32 input using convert_fn 182 183 Params: 184 input = UTF-32 input string 185 output = result output (UTF-32 as input) 186 convert_fn = convert function 187 188 **************************************************************************/ 189 190 static void convert ( dchar[] input, ref char[] output, scope Converter convert_fn ) 191 { 192 char[6] tmp; 193 194 output.length = 0; 195 196 foreach ( c; input ) 197 { 198 int n = g_unichar_to_utf8(convert_fn(c), tmp.ptr); 199 200 output ~= tmp[0 .. n].dup; 201 } 202 } 203 204 /************************************************************************** 205 206 Converts UTF-32 input using convert_fn 207 208 Params: 209 input = UTF-32 input string 210 output = result output (UTF-8) 211 convert_fn = convert function 212 213 **************************************************************************/ 214 215 static void convert ( dchar[] input, ref dchar[] output, scope Converter convert_fn ) 216 { 217 output.length = input.length; 218 219 foreach ( i, c; input ) 220 { 221 output[i] = convert_fn(c); 222 } 223 } 224 225 /************************************************************************** 226 227 Converts UTF-32 content in-place using convert_fn 228 229 Params: 230 content = content buffer 231 convert_fn = convert function 232 233 **************************************************************************/ 234 235 static void convert ( ref dchar[] content, scope Converter convert_fn ) 236 { 237 foreach ( ref c; content ) 238 { 239 c = convert_fn(c); 240 } 241 } 242 243 /************************************************************************** 244 245 Converts UTF-32 input to UTF-8 246 247 Params: 248 input = UTF-32 string 249 output = result output (UTF-8) 250 251 **************************************************************************/ 252 253 static void toUtf8 ( Char ) ( Char[] input, ref char[] output ) 254 { 255 output.length = 0; 256 257 foreach ( c; input ) 258 { 259 output ~= toUtf8(c); 260 } 261 } 262 263 264 /************************************************************************** 265 266 Converts an UTF-32 charachter to UTF-8 267 268 Params: 269 c = UTF-32 character 270 271 Returns: 272 UTF-8 character 273 274 **************************************************************************/ 275 276 static char[] toUtf8 ( Char ) ( Char c ) 277 { 278 static if (Char.sizeof == wchar.sizeof) 279 pragma (msg, typeof (this).stringof 280 ~ ".toUtf8: Only Basic Multilingual Plane supported with " 281 ~ "type '" ~ Char.stringof ~ "'; use 'dchar' " 282 ~ "for full Unicode support"); 283 284 char[6] tmp; 285 286 int n = g_unichar_to_utf8(c, tmp.ptr); 287 288 return tmp[0 .. n].dup; 289 } 290 291 /************************************************************************** 292 293 Converts an UTF-8 character to UTF-32. If the input character is not 294 valid or incomplete, a GUtf8Validation code is returned instead of the 295 character. 296 297 Params: 298 c = UTF-8 character 299 300 Returns: 301 UTF-32 character or GUtf8Validation code 302 303 **************************************************************************/ 304 305 static Char toUtf32 ( Char ) ( char[] c ) 306 { 307 static if (Char.sizeof == wchar.sizeof) 308 pragma (msg, typeof (this).stringof 309 ~ ".toUtf8: Only Basic Multilingual Plane supported with " 310 ~ "type '" ~ Char.stringof ~ "'; use 'dchar' " 311 ~ "for full Unicode support"); 312 313 return result = g_utf8_get_char_validated(c.ptr, c.length); 314 } 315 316 }