1 /*******************************************************************************
2 
3     An abstract class template representing an entity de/coder, over a specific
4     set of entities.
5 
6     The class has various abstract methods, which must be implemented, to decode
7     and encode strings.
8 
9     Copyright:
10         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
11         All rights reserved.
12 
13     License:
14         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
15         Alternatively, this file may be distributed under the terms of the Tango
16         3-Clause BSD License (see LICENSE_BSD.txt for details).
17 
18 *******************************************************************************/
19 
20 module ocean.text.entities.model.IEntityCodec;
21 
22 
23 
24 
25 import ocean.text.entities.model.IEntitySet;
26 
27 import Utf = ocean.text.convert.Utf;
28 
29 import ocean.meta.types.Qualifiers;
30 
31 /*******************************************************************************
32 
33     Abstract entity codec template class. Provides
34 
35     Params:
36         E = entity set the codec deals with
37 
38 *******************************************************************************/
39 
40 public abstract class IEntityCodec ( E : IEntitySet )
41 {
42     /***************************************************************************
43 
44         Abstract methods to encode any unencoded entities in a string.
45 
46         (Unfortunately template methods can't be abstract.)
47 
48     ***************************************************************************/
49 
50     public abstract char[]  encode ( const(char)[]  text, ref char[] encoded );
51     public abstract wchar[] encode ( const(wchar)[] text, ref wchar[] encoded );
52     public abstract dchar[] encode ( const(dchar)[] text, ref dchar[] encoded );
53 
54 
55     /***************************************************************************
56 
57         Abstract methods to decode any encoded entities in a string.
58 
59         (Unfortunately template methods can't be abstract.)
60 
61     ***************************************************************************/
62 
63     public abstract char[] decode  ( const(char)[]  text, ref char[] decoded );
64     public abstract wchar[] decode ( const(wchar)[] text, ref wchar[] decoded );
65     public abstract dchar[] decode ( const(dchar)[] text, ref dchar[] decoded );
66 
67 
68     /***************************************************************************
69 
70         Abstract methods to tell whether a string contains any unencoded
71         entities.
72 
73         (Unfortunately template methods can't be abstract.)
74 
75     ***************************************************************************/
76 
77     public abstract bool containsUnencoded ( const(char)[]  text );
78     public abstract bool containsUnencoded ( const(wchar)[] text );
79     public abstract bool containsUnencoded ( const(dchar)[] text );
80 
81 
82     /***************************************************************************
83 
84         Abstract methods to tell whether a string contains any encoded entities.
85 
86         (Unfortunately template methods can't be abstract.)
87 
88     ***************************************************************************/
89 
90     public abstract bool containsEncoded ( const(char)[]  text );
91     public abstract bool containsEncoded ( const(wchar)[] text );
92     public abstract bool containsEncoded ( const(dchar)[] text );
93 
94 
95     /***************************************************************************
96 
97         Internal entity set
98 
99     ***************************************************************************/
100 
101     protected E entities;
102 
103 
104     /***************************************************************************
105 
106         Constructor.
107 
108     ***************************************************************************/
109 
110     public this ( )
111     {
112         this.entities = new E();
113     }
114 
115 
116     /***************************************************************************
117 
118         Tells whether a string is fully encoded (ie contains no unencoded
119         entities).
120 
121         Params:
122             text = string to check
123 
124         Returns:
125             true if there are no unencoded entities in the string
126 
127     ***************************************************************************/
128 
129     public bool encoded ( Char ) ( Char[] text )
130     {
131         return !this.unencoded();
132     }
133 
134 
135     /***************************************************************************
136 
137         Tells whether a string is unencoded (ie contains one or more unencoded
138         entities).
139 
140         Params:
141             text = string to check
142 
143         Returns:
144             true if there are unencoded entities in the string
145 
146     ***************************************************************************/
147 
148     public bool unencoded ( Char ) ( Char[] text )
149     {
150         return this.containsUnencoded(text);
151     }
152 
153 
154     /***************************************************************************
155 
156         Static template method to convert from a char to another type.
157 
158         Params:
159             Char = type to convert to
160             c = character to convert
161 
162         Returns:
163             converted character
164 
165     ***************************************************************************/
166 
167     protected static Char[] charTo ( Char ) ( char c )
168     {
169         char[1] str;
170         str[0] = c;
171         return this.charTo!(Char)(str);
172     }
173 
174 
175     /***************************************************************************
176 
177         Static template method to convert from a char[] to another type.
178 
179         Params:
180             Char = type to convert to
181             text = string to convert
182             output = buffer to write the output to
183 
184         Returns:
185             converted string
186 
187     ***************************************************************************/
188 
189     protected static Char[] charTo ( Char ) ( char[] text, ref Char[] output )
190     {
191         output.length = text.length;
192         assumeSafeAppend(output);
193 
194         static if ( is(Char == dchar) )
195         {
196             return Utf.toString32(text, output);
197         }
198         else static if ( is(Char == wchar) )
199         {
200             return Utf.toString16(text, output);
201         }
202         else static if ( is(Char == char) )
203         {
204             return text;
205         }
206         else
207         {
208             static assert(false, This.stringof ~ ".charTo - template parameter must be one of {char, wchar, dchar}");
209         }
210     }
211 
212 
213     /***************************************************************************
214 
215         Static template method to convert from a dchar to another type.
216 
217         Params:
218             Char = type to convert to
219             c = character to convert
220             output = buffer to write the output to
221 
222         Returns:
223             converted character
224 
225     ***************************************************************************/
226 
227     protected static Char[] dcharTo ( Char ) ( dchar c, ref Char[] output )
228     {
229         dchar[1] str;
230         str[0] = c;
231         return this.dcharTo!(Char)(str, output);
232     }
233 
234     /***************************************************************************
235 
236         Static template method to convert from a dchar[] to another type.
237 
238         Params:
239             Char = type to convert to
240             text = string to convert
241             output = buffer to write the output to
242 
243         Returns:
244             converted string
245 
246     ***************************************************************************/
247 
248     protected static Char[] dcharTo ( Char ) ( dchar[] text, ref Char[] output )
249     {
250         output.length = text.length * 4; // Maximum one unicode character -> 4 bytes
251         assumeSafeAppend(output);
252 
253         static if ( is(Char == dchar) )
254         {
255             output[0..text.length] = text[];
256 
257             return output[0..text.length];
258         }
259         else static if ( is(Char == wchar) )
260         {
261             return Utf.toString16(text, output);
262         }
263         else static if ( is(Char == char) )
264         {
265             return Utf.toString(text, output);
266         }
267         else
268         {
269             static assert(false, This.stringof ~ ".charTo - template parameter must be one of {char, wchar, dchar}");
270         }
271     }
272 }
273