1 /******************************************************************************* 2 3 An abstract class encapsulating a set of entities for en/decoding. A typical 4 example is the various html entities which are required to be encoded, for 5 example: 6 7 '&' should be encoded as "&" 8 9 The class should be implemented, and the entities() methods made to return 10 the list of entities to be handled. 11 12 Copyright: 13 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 14 All rights reserved. 15 16 License: 17 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 18 Alternatively, this file may be distributed under the terms of the Tango 19 3-Clause BSD License (see LICENSE_BSD.txt for details). 20 21 *******************************************************************************/ 22 23 module ocean.text.entities.model.IEntitySet; 24 25 26 27 28 import ocean.text.utf.UtfString : InvalidUnicode, utf_match; 29 30 import ocean.transition; 31 32 /******************************************************************************* 33 34 Abstract entity set class. 35 36 *******************************************************************************/ 37 38 public abstract class IEntitySet 39 { 40 /*************************************************************************** 41 42 An entity. Simply a tuple of a name and a unicode value (eg "amp", '&'). 43 44 ***************************************************************************/ 45 46 public struct Entity 47 { 48 istring name; 49 dchar unicode; 50 } 51 52 53 /*************************************************************************** 54 55 Abstract method to return the list of entities. 56 57 ***************************************************************************/ 58 59 public abstract Const!(Entity)[] entities ( ); 60 61 62 /*************************************************************************** 63 64 Abstract method to get the encoded form of an entity. 65 66 ***************************************************************************/ 67 68 abstract public char[] getEncodedEntity ( dchar unicode, ref char[] output ); 69 70 71 /*************************************************************************** 72 73 Gets the unicode character associated with the passed name. 74 75 Params: 76 Char = character type of name 77 name = name to check 78 79 Returns: 80 unicode corresponding to name, or InvalidUnicode if name is not in 81 the entity list 82 83 ***************************************************************************/ 84 85 public dchar getUnicode ( Char ) ( Char[] name ) 86 { 87 foreach ( check_name, unicode; this ) 88 { 89 if ( utf_match(name, check_name) ) 90 { 91 return unicode; 92 } 93 } 94 95 return InvalidUnicode; 96 } 97 98 99 /*************************************************************************** 100 101 Gets the name associated with the passed unicode character. 102 103 Params: 104 unicode = unicode value to check 105 106 Returns: 107 name corresponding to unicode, or "" if unicode is not in the entity 108 list 109 110 ***************************************************************************/ 111 112 public istring getName ( dchar unicode ) 113 { 114 foreach ( name, check_unicode; this ) 115 { 116 if ( check_unicode == unicode ) 117 { 118 return name; 119 } 120 } 121 122 return ""; 123 } 124 125 126 /*************************************************************************** 127 128 Checks whether the passed name is in the list of entities. 129 130 Params: 131 name = name to check 132 133 Returns: 134 true if name is an entity 135 136 ***************************************************************************/ 137 138 public bool opIn_r ( char[] name ) 139 { 140 foreach ( ref entity; this.entities ) 141 { 142 if ( utf_match(name, entity.name) ) 143 { 144 return true; 145 } 146 } 147 148 return false; 149 } 150 151 152 /*************************************************************************** 153 154 Checks whether the passed name is in the list of entities. 155 156 Params: 157 name = name to check 158 159 Returns: 160 true if name is an entity 161 162 ***************************************************************************/ 163 164 public bool opIn_r ( wchar[] name ) 165 { 166 foreach ( ref entity; this.entities ) 167 { 168 if ( utf_match(name, entity.name) ) 169 { 170 return true; 171 } 172 } 173 174 return false; 175 } 176 177 178 /*************************************************************************** 179 180 Checks whether the passed name is in the list of entities. 181 182 Params: 183 name = name to check 184 185 Returns: 186 true if name is an entity 187 188 ***************************************************************************/ 189 190 public bool opIn_r ( dchar[] name ) 191 { 192 foreach ( ref entity; this.entities ) 193 { 194 if ( utf_match(name, entity.name) ) 195 { 196 return true; 197 } 198 } 199 200 return false; 201 } 202 203 204 /*************************************************************************** 205 206 Checks whether the passed unicode is in the list of entities. 207 208 Params: 209 unicode = unicode value to check 210 211 Returns: 212 true if unicode is an entity 213 214 ***************************************************************************/ 215 216 public bool opIn_r ( dchar unicode ) 217 { 218 foreach ( ref entity; this.entities ) 219 { 220 if ( entity.unicode == unicode ) 221 { 222 return true; 223 } 224 } 225 226 return false; 227 } 228 229 230 /*************************************************************************** 231 232 Checks whether the passed unicode is in the list of entities. 233 234 Params: 235 unicode = unicode value to check 236 237 Returns: 238 true if unicode is an entity 239 240 ***************************************************************************/ 241 242 public bool opIn_r ( wchar unicode ) 243 { 244 return (cast(dchar)unicode) in this; 245 } 246 247 248 /*************************************************************************** 249 250 Checks whether the passed unicode is in the list of entities. 251 252 Params: 253 unicode = unicode value to check 254 255 Returns: 256 true if unicode is an entity 257 258 ***************************************************************************/ 259 260 public bool opIn_r ( char unicode ) 261 { 262 return (cast(dchar)unicode) in this; 263 } 264 265 266 /*************************************************************************** 267 268 foreach iterator over the list of entities. 269 270 foreach arguments exposed: 271 char[] name = entity name 272 dchar unicode = entity unicode value 273 274 275 ***************************************************************************/ 276 277 public int opApply ( scope int delegate ( ref Const!(istring), ref Const!(dchar) ) dg ) 278 { 279 int res; 280 foreach ( ref entity; this.entities ) 281 { 282 res = dg(entity.name, entity.unicode); 283 if ( res ) 284 { 285 break; 286 } 287 } 288 289 return res; 290 } 291 } 292