1 /******************************************************************************* 2 3 An abstract class encapsulating a set of entities for en/decoding. A typical 4 example is the various html entities which are required to be encoded, for 5 example: 6 7 '&' should be encoded as "&" 8 9 The class should be implemented, and the entities() methods made to return 10 the list of entities to be handled. 11 12 Copyright: 13 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 14 All rights reserved. 15 16 License: 17 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 18 Alternatively, this file may be distributed under the terms of the Tango 19 3-Clause BSD License (see LICENSE_BSD.txt for details). 20 21 *******************************************************************************/ 22 23 module ocean.text.entities.model.IEntitySet; 24 25 26 27 28 import ocean.text.utf.UtfString : InvalidUnicode, utf_match; 29 30 import ocean.meta.types.Qualifiers; 31 32 /******************************************************************************* 33 34 Abstract entity set class. 35 36 *******************************************************************************/ 37 38 public abstract class IEntitySet 39 { 40 /*************************************************************************** 41 42 An entity. Simply a tuple of a name and a unicode value (eg "amp", '&'). 43 44 ***************************************************************************/ 45 46 public struct Entity 47 { 48 istring name; 49 dchar unicode; 50 } 51 52 53 /*************************************************************************** 54 55 Abstract method to return the list of entities. 56 57 ***************************************************************************/ 58 59 public abstract const(Entity)[] entities ( ); 60 61 62 /*************************************************************************** 63 64 Abstract method to get the encoded form of an entity. 65 66 ***************************************************************************/ 67 68 abstract public char[] getEncodedEntity ( dchar unicode, ref char[] output ); 69 70 71 /*************************************************************************** 72 73 Gets the unicode character associated with the passed name. 74 75 Params: 76 Char = character type of name 77 name = name to check 78 79 Returns: 80 unicode corresponding to name, or InvalidUnicode if name is not in 81 the entity list 82 83 ***************************************************************************/ 84 85 public dchar getUnicode ( Char ) ( Char[] name ) 86 { 87 foreach ( check_name, unicode; this ) 88 { 89 if ( utf_match(name, check_name) ) 90 { 91 return unicode; 92 } 93 } 94 95 return InvalidUnicode; 96 } 97 98 99 /*************************************************************************** 100 101 Gets the name associated with the passed unicode character. 102 103 Params: 104 unicode = unicode value to check 105 106 Returns: 107 name corresponding to unicode, or "" if unicode is not in the entity 108 list 109 110 ***************************************************************************/ 111 112 public istring getName ( dchar unicode ) 113 { 114 foreach ( name, check_unicode; this ) 115 { 116 if ( check_unicode == unicode ) 117 { 118 return name; 119 } 120 } 121 122 return ""; 123 } 124 125 126 /*************************************************************************** 127 128 Support for the 'in' operator 129 130 Aliased to the various opIn_r member functions, for backwards 131 compatibility 132 133 ***************************************************************************/ 134 135 public alias opBinaryRight ( istring op : "in" ) = opIn_r; 136 137 138 /*************************************************************************** 139 140 Checks whether the passed name is in the list of entities. 141 142 Params: 143 name = name to check 144 145 Returns: 146 true if name is an entity 147 148 ***************************************************************************/ 149 150 public bool opIn_r ( char[] name ) 151 { 152 foreach ( ref entity; this.entities ) 153 { 154 if ( utf_match(name, entity.name) ) 155 { 156 return true; 157 } 158 } 159 160 return false; 161 } 162 163 164 /*************************************************************************** 165 166 Checks whether the passed name is in the list of entities. 167 168 Params: 169 name = name to check 170 171 Returns: 172 true if name is an entity 173 174 ***************************************************************************/ 175 176 public bool opIn_r ( wchar[] name ) 177 { 178 foreach ( ref entity; this.entities ) 179 { 180 if ( utf_match(name, entity.name) ) 181 { 182 return true; 183 } 184 } 185 186 return false; 187 } 188 189 190 /*************************************************************************** 191 192 Checks whether the passed name is in the list of entities. 193 194 Params: 195 name = name to check 196 197 Returns: 198 true if name is an entity 199 200 ***************************************************************************/ 201 202 public bool opIn_r ( dchar[] name ) 203 { 204 foreach ( ref entity; this.entities ) 205 { 206 if ( utf_match(name, entity.name) ) 207 { 208 return true; 209 } 210 } 211 212 return false; 213 } 214 215 216 /*************************************************************************** 217 218 Checks whether the passed unicode is in the list of entities. 219 220 Params: 221 unicode = unicode value to check 222 223 Returns: 224 true if unicode is an entity 225 226 ***************************************************************************/ 227 228 public bool opIn_r ( dchar unicode ) 229 { 230 foreach ( ref entity; this.entities ) 231 { 232 if ( entity.unicode == unicode ) 233 { 234 return true; 235 } 236 } 237 238 return false; 239 } 240 241 242 /*************************************************************************** 243 244 Checks whether the passed unicode is in the list of entities. 245 246 Params: 247 unicode = unicode value to check 248 249 Returns: 250 true if unicode is an entity 251 252 ***************************************************************************/ 253 254 public bool opIn_r ( wchar unicode ) 255 { 256 return (cast(dchar)unicode) in this; 257 } 258 259 260 /*************************************************************************** 261 262 Checks whether the passed unicode is in the list of entities. 263 264 Params: 265 unicode = unicode value to check 266 267 Returns: 268 true if unicode is an entity 269 270 ***************************************************************************/ 271 272 public bool opIn_r ( char unicode ) 273 { 274 return (cast(dchar)unicode) in this; 275 } 276 277 278 /*************************************************************************** 279 280 foreach iterator over the list of entities. 281 282 foreach arguments exposed: 283 char[] name = entity name 284 dchar unicode = entity unicode value 285 286 287 ***************************************************************************/ 288 289 public int opApply ( scope int delegate ( ref const(istring), ref const(dchar) ) dg ) 290 { 291 int res; 292 foreach ( ref entity; this.entities ) 293 { 294 res = dg(entity.name, entity.unicode); 295 if ( res ) 296 { 297 break; 298 } 299 } 300 301 return res; 302 } 303 } 304