1 /******************************************************************************* 2 3 Copyright: 4 Copyright (C) 2008 Kris Bell. 5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 6 All rights reserved. 7 8 License: 9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 10 See LICENSE_TANGO.txt for details. 11 12 Version: Aug 2008: Initial release 13 14 Authors: Kris 15 16 *******************************************************************************/ 17 18 module ocean.text.xml.DocEntity; 19 20 import Util = ocean.text.Util; 21 22 /****************************************************************************** 23 24 Convert XML entity patterns to normal characters 25 26 <pre> 27 & => ; 28 " => " 29 etc. 30 </pre> 31 32 ******************************************************************************/ 33 34 T[] fromEntity (T) (T[] src, T[] dst = null) 35 { 36 int delta; 37 auto s = src.ptr; 38 auto len = src.length; 39 40 // take a peek first to see if there's anything 41 if ((delta = Util.indexOf (s, '&', len)) < len) 42 { 43 // make some room if not enough provided 44 if (dst.length < src.length) 45 dst.length = src.length; 46 auto d = dst.ptr; 47 48 // copy segments over, a chunk at a time 49 do { 50 d [0 .. delta] = s [0 .. delta]; 51 len -= delta; 52 s += delta; 53 d += delta; 54 55 // translate entity 56 auto token = 0; 57 58 switch (s[1]) 59 { 60 case 'a': 61 if (len > 4 && s[1..5] == "amp;") 62 *d++ = '&', token = 5; 63 else 64 if (len > 5 && s[1..6] == "apos;") 65 *d++ = '\'', token = 6; 66 break; 67 68 case 'g': 69 if (len > 3 && s[1..4] == "gt;") 70 *d++ = '>', token = 4; 71 break; 72 73 case 'l': 74 if (len > 3 && s[1..4] == "lt;") 75 *d++ = '<', token = 4; 76 break; 77 78 case 'q': 79 if (len > 5 && s[1..6] == "quot;") 80 *d++ = '"', token = 6; 81 break; 82 83 default: 84 break; 85 } 86 87 if (token is 0) 88 *d++ = '&', token = 1; 89 90 s += token, len -= token; 91 } while ((delta = Util.indexOf (s, '&', len)) < len); 92 93 // copy tail too 94 d [0 .. len] = s [0 .. len]; 95 return dst [0 .. (d + len) - dst.ptr]; 96 } 97 return src; 98 } 99 100 101 /****************************************************************************** 102 103 Convert XML entity patterns to normal characters 104 --- 105 & => ; 106 " => " 107 etc 108 --- 109 110 This variant does not require an interim workspace, and instead 111 emits directly via the provided delegate 112 113 ******************************************************************************/ 114 115 void fromEntity (T) (T[] src, scope void delegate(T[]) emit) 116 { 117 int delta; 118 auto s = src.ptr; 119 auto len = src.length; 120 121 // take a peek first to see if there's anything 122 if ((delta = Util.indexOf (s, '&', len)) < len) 123 { 124 // copy segments over, a chunk at a time 125 do { 126 emit (s [0 .. delta]); 127 len -= delta; 128 s += delta; 129 130 // translate entity 131 auto token = 0; 132 133 switch (s[1]) 134 { 135 case 'a': 136 if (len > 4 && s[1..5] == "amp;") 137 emit("&"), token = 5; 138 else 139 if (len > 5 && s[1..6] == "apos;") 140 emit("'"), token = 6; 141 break; 142 143 case 'g': 144 if (len > 3 && s[1..4] == "gt;") 145 emit(">"), token = 4; 146 break; 147 148 case 'l': 149 if (len > 3 && s[1..4] == "lt;") 150 emit("<"), token = 4; 151 break; 152 153 case 'q': 154 if (len > 5 && s[1..6] == "quot;") 155 emit("\""), token = 6; 156 break; 157 158 default: 159 break; 160 } 161 162 if (token is 0) 163 emit ("&"), token = 1; 164 165 s += token, len -= token; 166 } while ((delta = Util.indexOf (s, '&', len)) < len); 167 168 // copy tail too 169 emit (s [0 .. len]); 170 } 171 else 172 emit (src); 173 } 174 175 176 /****************************************************************************** 177 178 Convert reserved chars to entities. For example: " => " 179 180 Either a slice of the provided output buffer is returned, or the 181 original content, depending on whether there were reserved chars 182 present or not. The output buffer should be sufficiently large to 183 accomodate the converted output, or it will be allocated from the 184 heap instead 185 186 ******************************************************************************/ 187 188 T[] toEntity(T) (T[] src, T[] dst = null) 189 { 190 T[] entity; 191 auto s = src.ptr; 192 auto t = s; 193 auto e = s + src.length; 194 auto index = 0; 195 196 while (s < e) 197 switch (*s) 198 { 199 case '"': 200 entity = """; 201 goto common; 202 203 case '>': 204 entity = ">"; 205 goto common; 206 207 case '<': 208 entity = "<"; 209 goto common; 210 211 case '&': 212 entity = "&"; 213 goto common; 214 215 case '\'': 216 entity = "'"; 217 goto common; 218 219 common: 220 auto len = s - t; 221 if (dst.length <= index + len + entity.length) 222 dst.length = (dst.length + len + entity.length) + dst.length / 2; 223 224 dst [index .. index + len] = t [0 .. len]; 225 index += len; 226 227 dst [index .. index + entity.length] = entity; 228 index += entity.length; 229 t = ++s; 230 break; 231 232 default: 233 ++s; 234 break; 235 } 236 237 238 // did we change anything? 239 if (index) 240 { 241 // copy tail too 242 auto len = e - t; 243 if (dst.length <= index + len) 244 dst.length = index + len; 245 246 dst [index .. index + len] = t [0 .. len]; 247 return dst [0 .. index + len]; 248 } 249 250 return src; 251 } 252 253 254 /****************************************************************************** 255 256 Convert reserved chars to entities. For example: " => " 257 258 This variant does not require an interim workspace, and instead 259 emits directly via the provided delegate 260 261 ******************************************************************************/ 262 263 void toEntity(T) (T[] src, scope void delegate(T[]) emit) 264 { 265 T[] entity; 266 auto s = src.ptr; 267 auto t = s; 268 auto e = s + src.length; 269 270 while (s < e) 271 switch (*s) 272 { 273 case '"': 274 entity = """; 275 goto common; 276 277 case '>': 278 entity = ">"; 279 goto common; 280 281 case '<': 282 entity = "<"; 283 goto common; 284 285 case '&': 286 entity = "&"; 287 goto common; 288 289 case '\'': 290 entity = "'"; 291 goto common; 292 293 common: 294 if (s - t > 0) 295 emit (t [0 .. s - t]); 296 emit (entity); 297 t = ++s; 298 break; 299 300 default: 301 ++s; 302 break; 303 } 304 305 // did we change anything? Copy tail also 306 if (entity.length) 307 emit (t [0 .. e - t]); 308 else 309 emit (src); 310 }