1 /**
2  * A UUID is a Universally Unique Identifier.
3  * It is a 128-bit number generated either randomly or according to some
4  * inscrutable algorithm, depending on the UUID version used.
5  *
6  * Here, we implement a data structure for holding and formatting UUIDs.
7  * To generate a UUID, use one of the other modules in the UUID package.
8  * You can also create a UUID by parsing a string containing a textual
9  * representation of a UUID, or by providing the constituent bytes.
10  *
11  * Copyright:
12  *     Copyright (c) 2009-2016 dunnhumby Germany GmbH.
13  *     All rights reserved.
14  *
15  * License:
16  *     Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
17  *     Alternatively, this file may be distributed under the terms of the Tango
18  *     3-Clause BSD License (see LICENSE_BSD.txt for details).
19  *
20  */
21 module ocean.util.uuid.Uuid;
22 
23 import ocean.core.TypeConvert: assumeUnique;
24 import ocean.meta.types.Qualifiers;
25 
26 import ocean.core.ExceptionDefinitions;
27 import Integer = ocean.text.convert.Integer_tango;
28 
29 private union UuidData
30 {
31         uint[4] ui;
32         ubyte[16] ub;
33 }
34 
35 /** This struct represents a UUID. It offers static members for creating and
36  * parsing UUIDs.
37  *
38  * This struct treats a UUID as an opaque type. The specification has fields
39  * for time, version, client MAC address, and several other data points, but
40  * these are meaningless for most applications and means of generating a UUID.
41  *
42  * There are versions of UUID generation involving the system time and MAC
43  * address. These are not used for several reasons:
44  *      - One version contains identifying information, which is undesirable.
45  *      - Ensuring uniqueness between processes requires inter-process
46  *              communication. This would be unreasonably slow and complex.
47  *      - Obtaining the MAC address is a system-dependent operation and beyond
48  *              the scope of this module.
49  *      - Using Java and .NET as a guide, they only implement randomized creation
50  *              of UUIDs, not the MAC address/time based generation.
51  *
52  * When generating a random UUID, use a carefully seeded random number
53  * generator. A poorly chosen seed may produce undesirably consistent results.
54  */
55 struct Uuid
56 {
57         private UuidData _data;
58 
59         /** Copy the givent bytes into a UUID. If you supply more or fewer than
60           * 16 bytes, throws an IllegalArgumentException. */
61         public static Uuid opCall(ubyte[] data)
62         {
63                 if (data.length != 16)
64                 {
65                         throw new IllegalArgumentException("A UUID is 16 bytes long.");
66                 }
67                 Uuid u;
68                 u._data.ub[] = data[];
69                 return u;
70         }
71 
72         /** Attempt to parse the representation of a UUID given in value. If the
73           * value is not in the correct format, throw IllegalArgumentException.
74           * If the value is in the correct format, return a UUID representing the
75           * given value.
76           *
77           * The following is an example of a UUID in the expected format:
78           *     67e55044-10b1-426f-9247-bb680e5fe0c8
79           */
80         public static Uuid parse(char[] value)
81         {
82                 Uuid u;
83                 if (!tryParse(value, u))
84                 {
85                     auto msg = "'" ~ value ~ "' is not in the correct format for a UUID";
86                     throw new IllegalArgumentException(assumeUnique(msg));
87                 }
88                 return u;
89         }
90 
91         /** Attempt to parse the representation of a UUID given in value. If the
92           * value is not in the correct format, return false rather than throwing
93           * an exception. If the value is in the correct format, set uuid to
94           * represent the given value.
95           *
96           * The following is an example of a UUID in the expected format:
97           *     67e55044-10b1-426f-9247-bb680e5fe0c8
98           */
99         public static bool tryParse(char[] value, out Uuid uuid)
100         {
101                 if (value.length != 36 ||
102                         value[8] != '-' ||
103                         value[13] != '-' ||
104                         value[18] != '-' ||
105                         value[23] != '-')
106                 {
107                         return false;
108                 }
109                 int hyphens = 0;
110                 foreach (i, v; value)
111                 {
112                         if ('a' <= v && 'f' >= v) continue;
113                         if ('A' <= v && 'F' >= v) continue;
114                         if ('0' <= v && '9' >= v) continue;
115                         if (v == '-')
116                         {
117                                 hyphens++;
118                                 continue;
119                         }
120                         // illegal character
121                         return false;
122                 }
123                 if (hyphens != 4)
124                 {
125                         return false;
126                 }
127 
128                 with (uuid._data)
129                 {
130                         // This is verbose, but it's simple, and it gets around endian
131                         // issues if you try parsing an integer at a time.
132                         ub[0] = cast(ubyte) Integer.parse(value[0..2], 16);
133                         ub[1] = cast(ubyte) Integer.parse(value[2..4], 16);
134                         ub[2] = cast(ubyte) Integer.parse(value[4..6], 16);
135                         ub[3] = cast(ubyte) Integer.parse(value[6..8], 16);
136 
137                         ub[4] = cast(ubyte) Integer.parse(value[9..11], 16);
138                         ub[5] = cast(ubyte) Integer.parse(value[11..13], 16);
139 
140                         ub[6] = cast(ubyte) Integer.parse(value[14..16], 16);
141                         ub[7] = cast(ubyte) Integer.parse(value[16..18], 16);
142 
143                         ub[8] = cast(ubyte) Integer.parse(value[19..21], 16);
144                         ub[9] = cast(ubyte) Integer.parse(value[21..23], 16);
145 
146                         ub[10] = cast(ubyte) Integer.parse(value[24..26], 16);
147                         ub[11] = cast(ubyte) Integer.parse(value[26..28], 16);
148                         ub[12] = cast(ubyte) Integer.parse(value[28..30], 16);
149                         ub[13] = cast(ubyte) Integer.parse(value[30..32], 16);
150                         ub[14] = cast(ubyte) Integer.parse(value[32..34], 16);
151                         ub[15] = cast(ubyte) Integer.parse(value[34..36], 16);
152                 }
153 
154                 return true;
155         }
156 
157         /** Generate a UUID based on the given random number generator.
158           * The generator must have a method 'uint natural()' that returns
159           * a random number. The generated UUID conforms to version 4 of the
160           * specification. */
161         public static Uuid random(Random)(Random generator)
162         {
163                 Uuid u;
164                 with (u)
165                 {
166                         _data.ui[0] = generator.natural;
167                         _data.ui[1] = generator.natural;
168                         _data.ui[2] = generator.natural;
169                         _data.ui[3] = generator.natural;
170 
171                         // v4: 7th bytes' first half is 0b0100: 4 in hex
172                         _data.ub[6] &= 0b01001111;
173                         _data.ub[6] |= 0b01000000;
174 
175                         // v4: 9th byte's 1st half is 0b1000 to 0b1011: 8, 9, A, B in hex
176                         _data.ub[8] &= 0b10111111;
177                         _data.ub[8] |= 0b10000000;
178                 }
179                 return u;
180         }
181 
182         /* Generate a UUID based on the given namespace and name. This conforms to
183          * versions 3 and 5 of the standard -- version 3 if you use MD5, or version
184          * 5 if you use SHA1.
185          *
186          * You should pass 3 as the value for uuidVersion if you are using the
187          * MD5 hash, and 5 if you are using the SHA1 hash. To do otherwise is an
188          * Abomination Unto Nuggan.
189          *
190          * This method is exposed mainly for the convenience methods in
191          * ocean.util.uuid.*. You can use this method directly if you prefer.
192          */
193         public static Uuid byName(Digest)(Uuid namespace, char[] name, Digest digest,
194                                                                           ubyte uuidVersion)
195         {
196                 /* o  Compute the hash of the name space ID concatenated with the name.
197                    o  Set octets zero through 15 to octets zero through 15 of the hash.
198                    o  Set the four most significant bits (bits 12 through 15) of octet
199                           6 to the appropriate 4-bit version number from Section 4.1.3.
200                    o  Set the two most significant bits (bits 6 and 7) of octet 8 to
201                           zero and one, respectively.  */
202                 auto nameBytes = namespace.toBytes;
203                 nameBytes ~= cast(ubyte[])name;
204                 digest.update(nameBytes);
205                 nameBytes = digest.binaryDigest;
206                 nameBytes[6] = cast(ubyte) ((uuidVersion << 4) | (nameBytes[6] & 0b1111));
207                 nameBytes[8] |= 0b1000_0000;
208                 nameBytes[8] &= 0b1011_1111;
209                 return Uuid(nameBytes[0..16]);
210         }
211 
212         /** Return an empty UUID (with all bits set to 0). This doesn't conform
213           * to any particular version of the specification. It's equivalent to
214           * using an uninitialized UUID. This method is provided for clarity. */
215         public static Uuid empty()
216         {
217                 Uuid uuid;
218                 uuid._data.ui[] = 0;
219                 return uuid;
220         }
221 
222         /** Get a copy of this UUID's value as an array of unsigned bytes. */
223         public ubyte[] toBytes()
224         {
225                 return _data.ub.dup;
226         }
227 
228         /** Gets the version of this UUID.
229           * RFC 4122 defines five types of UUIDs:
230           *     -       Version 1 is based on the system's MAC address and the current time.
231           *     -       Version 2 uses the current user's userid and user domain in
232           *                     addition to the time and MAC address.
233           * -   Version 3 is namespace-based, as generated by the NamespaceGenV3
234           *                     module. It uses MD5 as a hash algorithm. RFC 4122 states that
235           *                     version 5 is preferred over version 3.
236           * -   Version 4 is generated randomly.
237           * -   Version 5 is like version 3, but uses SHA-1 rather than MD5. Use
238           *                     the NamespaceGenV5 module to create UUIDs like this.
239           *
240           * The following additional versions exist:
241           * -   Version 0 is reserved for backwards compatibility.
242           * -   Version 6 is a non-standard Microsoft extension.
243           * -   Version 7 is reserved for future use.
244           */
245         public ubyte format()
246         {
247                 return cast(ubyte) (_data.ub[6] >> 4);
248         }
249 
250         /** Get the canonical string representation of a UUID.
251           * The canonical representation is in hexidecimal, with hyphens inserted
252           * after the eighth, twelfth, sixteenth, and twentieth digits. For example:
253           *     67e55044-10b1-426f-9247-bb680e5fe0c8
254           * This is the format used by the parsing functions.
255           */
256         public char[] toString()
257         {
258                 // Look, only one allocation.
259                 char[] buf = new char[36];
260                 buf[8] = '-';
261                 buf[13] = '-';
262                 buf[18] = '-';
263                 buf[23] = '-';
264                 with (_data)
265                 {
266                         // See above with tryParse: this ignores endianness.
267                         // Technically, it's sufficient that the conversion to string
268                         // matches the conversion from string and from byte array. But
269                         // this is the simplest way to make sure of that. Plus you can
270                         // serialize and deserialize on machines with different endianness
271                         // without a bunch of strange conversions, and with consistent
272                         // string representations.
273                         Integer.format(buf[0..2], ub[0], "x2");
274                         Integer.format(buf[2..4], ub[1], "x2");
275                         Integer.format(buf[4..6], ub[2], "x2");
276                         Integer.format(buf[6..8], ub[3], "x2");
277                         Integer.format(buf[9..11], ub[4], "x2");
278                         Integer.format(buf[11..13], ub[5], "x2");
279                         Integer.format(buf[14..16], ub[6], "x2");
280                         Integer.format(buf[16..18], ub[7], "x2");
281                         Integer.format(buf[19..21], ub[8], "x2");
282                         Integer.format(buf[21..23], ub[9], "x2");
283                         Integer.format(buf[24..26], ub[10], "x2");
284                         Integer.format(buf[26..28], ub[11], "x2");
285                         Integer.format(buf[28..30], ub[12], "x2");
286                         Integer.format(buf[30..32], ub[13], "x2");
287                         Integer.format(buf[32..34], ub[14], "x2");
288                         Integer.format(buf[34..36], ub[15], "x2");
289                 }
290                 return buf;
291         }
292 
293         /** Determines if this UUID has the same value as another. */
294         public equals_t opEquals(Uuid other)
295         {
296                 return
297                         _data.ui[0] == other._data.ui[0] &&
298                         _data.ui[1] == other._data.ui[1] &&
299                         _data.ui[2] == other._data.ui[2] &&
300                         _data.ui[3] == other._data.ui[3];
301         }
302 
303         /** Get a hash code representing this UUID. */
304         public hash_t toHash()
305         {
306                 with (_data)
307                 {
308                         // 29 is just a convenient prime number
309                         return (((((ui[0] * 29) ^ ui[1]) * 29) ^ ui[2]) * 29) ^ ui[3];
310                 }
311         }
312 }
313 
314 
315 version (TangoTest)
316 {
317         import ocean.math.random.Kiss;
318         unittest
319         {
320                 // Generate them in the correct format
321                 for (int i = 0; i < 20; i++)
322                 {
323                         auto uu = Uuid.random(&Kiss.instance).toString;
324                         auto c = uu[19];
325                         test (c == '9' || c == '8' || c == 'a' || c == 'b', uu);
326                         auto d = uu[14];
327                         test (d == '4', uu);
328                 }
329 
330                 // empty
331                 test (Uuid.empty.toString == "00000000-0000-0000-0000-000000000000", Uuid.empty.toString);
332 
333                 ubyte[] bytes = [0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x11, 0xd1,
334                                           0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4, 0x30, 0xc8];
335                 Uuid u = Uuid(bytes.dup);
336                 auto str = "64f2ad82-5182-4c6a-ade5-59728ca0567b";
337                 auto u2 = Uuid.parse(str);
338 
339                 // toString
340                 test (Uuid(bytes) == u);
341                 test (u2 != u);
342 
343                 test (u2.format == 4);
344 
345                 // tryParse
346                 Uuid u3;
347                 test (Uuid.tryParse(str, u3));
348                 test (u3 == u2);
349         }
350 
351         unittest
352         {
353                 Uuid fail;
354                 // contains 'r'
355                 test (!Uuid.tryParse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7", fail));
356                 // too short
357                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba", fail));
358                 // hyphens matter
359                 test (!Uuid.tryParse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7", fail));
360                 // hyphens matter (2)
361                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7", fail));
362                 // hyphens matter (3)
363                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7", fail));
364         }
365 
366         unittest
367         {
368                 // contains 'r'
369                 try
370                 {
371                         Uuid.parse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
372                 }
373                 catch (IllegalArgumentException) {}
374 
375                 // too short
376                 try
377                 {
378                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba"); assert (false);
379                 }
380                 catch (IllegalArgumentException) {}
381 
382                 // hyphens matter
383                 try
384                 {
385                         Uuid.parse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
386                 }
387                 catch (IllegalArgumentException) {}
388 
389                 // hyphens matter (2)
390                 try
391                 {
392                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7"); assert (false);
393                 }
394                 catch (IllegalArgumentException) {}
395 
396                 // hyphens matter (3)
397                 try
398                 {
399                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7"); assert (false);
400                 }
401                 catch (IllegalArgumentException) {}
402         }
403 
404         import ocean.util.digest.Sha1;
405         unittest
406         {
407                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
408                 auto name = "hello";
409                 // This was generated with the uuid utility on linux/amd64. It might have different results on
410                 // a ppc processor -- the spec says something about network byte order, but it's using an array
411                 // of bytes at that point, so converting to NBO is a noop...
412                 auto expected = Uuid.parse("2b1c6704-a43f-5d43-9abb-b13310b4458a");
413                 auto generated = Uuid.byName(namespace, name, new Sha1, 5);
414                 test (generated == expected, "\nexpected: " ~ expected.toString ~ "\nbut was:  " ~ generated.toString);
415         }
416 
417         import ocean.util.digest.Md5;
418         unittest
419         {
420                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
421                 auto name = "hello";
422                 auto expected = Uuid.parse("31a2b702-85a8-349a-9b0e-213b1bd753b8");
423                 auto generated = Uuid.byName(namespace, name, new Md5, 3);
424                 test (generated == expected, "\nexpected: " ~ expected.toString ~ "\nbut was:  " ~ generated.toString);
425         }
426         void main(){}
427 }
428 
429 /** A base interface for any UUID generator for UUIDs. That is,
430   * this interface is specified so that you write your code dependent on a
431   * UUID generator that takes an arbitrary random source, and easily switch
432   * to a different random source. Since the default uses KISS, if you find
433   * yourself needing more secure random numbers, you could trivially switch
434   * your code to use the Mersenne twister, or some other PRNG.
435   *
436   * You could also, if you wish, use this to switch to deterministic UUID
437   * generation, if your needs require it.
438   */
439 interface UuidGen
440 {
441         Uuid next();
442 }
443 
444 /** Given a random number generator conforming to Tango's standard random
445   * interface, this will generate random UUIDs according to version 4 of
446   * RFC 4122. */
447 class RandomGen(TRandom) : UuidGen
448 {
449         TRandom random;
450         this (TRandom random)
451         {
452                 this.random = random;
453         }
454 
455         Uuid next()
456         {
457                 return Uuid.random(random);
458         }
459 }
460