1 /**
2  * A UUID is a Universally Unique Identifier.
3  * It is a 128-bit number generated either randomly or according to some
4  * inscrutable algorithm, depending on the UUID version used.
5  *
6  * Here, we implement a data structure for holding and formatting UUIDs.
7  * To generate a UUID, use one of the other modules in the UUID package.
8  * You can also create a UUID by parsing a string containing a textual
9  * representation of a UUID, or by providing the constituent bytes.
10  *
11  * Copyright:
12  *     Copyright (c) 2009-2016 dunnhumby Germany GmbH.
13  *     All rights reserved.
14  *
15  * License:
16  *     Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
17  *     Alternatively, this file may be distributed under the terms of the Tango
18  *     3-Clause BSD License (see LICENSE_BSD.txt for details).
19  *
20  */
21 module ocean.util.uuid.Uuid;
22 
23 import ocean.transition;
24 
25 import ocean.core.ExceptionDefinitions;
26 import Integer = ocean.text.convert.Integer_tango;
27 
28 private union UuidData
29 {
30         uint[4] ui;
31         ubyte[16] ub;
32 }
33 
34 /** This struct represents a UUID. It offers static members for creating and
35  * parsing UUIDs.
36  *
37  * This struct treats a UUID as an opaque type. The specification has fields
38  * for time, version, client MAC address, and several other data points, but
39  * these are meaningless for most applications and means of generating a UUID.
40  *
41  * There are versions of UUID generation involving the system time and MAC
42  * address. These are not used for several reasons:
43  *      - One version contains identifying information, which is undesirable.
44  *      - Ensuring uniqueness between processes requires inter-process
45  *              communication. This would be unreasonably slow and complex.
46  *      - Obtaining the MAC address is a system-dependent operation and beyond
47  *              the scope of this module.
48  *      - Using Java and .NET as a guide, they only implement randomized creation
49  *              of UUIDs, not the MAC address/time based generation.
50  *
51  * When generating a random UUID, use a carefully seeded random number
52  * generator. A poorly chosen seed may produce undesirably consistent results.
53  */
54 struct Uuid
55 {
56         private UuidData _data;
57 
58         /** Copy the givent bytes into a UUID. If you supply more or fewer than
59           * 16 bytes, throws an IllegalArgumentException. */
60         public static Uuid opCall(ubyte[] data)
61         {
62                 if (data.length != 16)
63                 {
64                         throw new IllegalArgumentException("A UUID is 16 bytes long.");
65                 }
66                 Uuid u;
67                 u._data.ub[] = data[];
68                 return u;
69         }
70 
71         /** Attempt to parse the representation of a UUID given in value. If the
72           * value is not in the correct format, throw IllegalArgumentException.
73           * If the value is in the correct format, return a UUID representing the
74           * given value.
75           *
76           * The following is an example of a UUID in the expected format:
77           *     67e55044-10b1-426f-9247-bb680e5fe0c8
78           */
79         public static Uuid parse(char[] value)
80         {
81                 Uuid u;
82                 if (!tryParse(value, u))
83                 {
84                     auto msg = "'" ~ value ~ "' is not in the correct format for a UUID";
85                     throw new IllegalArgumentException(assumeUnique(msg));
86                 }
87                 return u;
88         }
89 
90         /** Attempt to parse the representation of a UUID given in value. If the
91           * value is not in the correct format, return false rather than throwing
92           * an exception. If the value is in the correct format, set uuid to
93           * represent the given value.
94           *
95           * The following is an example of a UUID in the expected format:
96           *     67e55044-10b1-426f-9247-bb680e5fe0c8
97           */
98         public static bool tryParse(char[] value, out Uuid uuid)
99         {
100                 if (value.length != 36 ||
101                         value[8] != '-' ||
102                         value[13] != '-' ||
103                         value[18] != '-' ||
104                         value[23] != '-')
105                 {
106                         return false;
107                 }
108                 int hyphens = 0;
109                 foreach (i, v; value)
110                 {
111                         if ('a' <= v && 'f' >= v) continue;
112                         if ('A' <= v && 'F' >= v) continue;
113                         if ('0' <= v && '9' >= v) continue;
114                         if (v == '-')
115                         {
116                                 hyphens++;
117                                 continue;
118                         }
119                         // illegal character
120                         return false;
121                 }
122                 if (hyphens != 4)
123                 {
124                         return false;
125                 }
126 
127                 with (uuid._data)
128                 {
129                         // This is verbose, but it's simple, and it gets around endian
130                         // issues if you try parsing an integer at a time.
131                         ub[0] = cast(ubyte) Integer.parse(value[0..2], 16);
132                         ub[1] = cast(ubyte) Integer.parse(value[2..4], 16);
133                         ub[2] = cast(ubyte) Integer.parse(value[4..6], 16);
134                         ub[3] = cast(ubyte) Integer.parse(value[6..8], 16);
135 
136                         ub[4] = cast(ubyte) Integer.parse(value[9..11], 16);
137                         ub[5] = cast(ubyte) Integer.parse(value[11..13], 16);
138 
139                         ub[6] = cast(ubyte) Integer.parse(value[14..16], 16);
140                         ub[7] = cast(ubyte) Integer.parse(value[16..18], 16);
141 
142                         ub[8] = cast(ubyte) Integer.parse(value[19..21], 16);
143                         ub[9] = cast(ubyte) Integer.parse(value[21..23], 16);
144 
145                         ub[10] = cast(ubyte) Integer.parse(value[24..26], 16);
146                         ub[11] = cast(ubyte) Integer.parse(value[26..28], 16);
147                         ub[12] = cast(ubyte) Integer.parse(value[28..30], 16);
148                         ub[13] = cast(ubyte) Integer.parse(value[30..32], 16);
149                         ub[14] = cast(ubyte) Integer.parse(value[32..34], 16);
150                         ub[15] = cast(ubyte) Integer.parse(value[34..36], 16);
151                 }
152 
153                 return true;
154         }
155 
156         /** Generate a UUID based on the given random number generator.
157           * The generator must have a method 'uint natural()' that returns
158           * a random number. The generated UUID conforms to version 4 of the
159           * specification. */
160         public static Uuid random(Random)(Random generator)
161         {
162                 Uuid u;
163                 with (u)
164                 {
165                         _data.ui[0] = generator.natural;
166                         _data.ui[1] = generator.natural;
167                         _data.ui[2] = generator.natural;
168                         _data.ui[3] = generator.natural;
169 
170                         // v4: 7th bytes' first half is 0b0100: 4 in hex
171                         _data.ub[6] &= 0b01001111;
172                         _data.ub[6] |= 0b01000000;
173 
174                         // v4: 9th byte's 1st half is 0b1000 to 0b1011: 8, 9, A, B in hex
175                         _data.ub[8] &= 0b10111111;
176                         _data.ub[8] |= 0b10000000;
177                 }
178                 return u;
179         }
180 
181         /* Generate a UUID based on the given namespace and name. This conforms to
182          * versions 3 and 5 of the standard -- version 3 if you use MD5, or version
183          * 5 if you use SHA1.
184          *
185          * You should pass 3 as the value for uuidVersion if you are using the
186          * MD5 hash, and 5 if you are using the SHA1 hash. To do otherwise is an
187          * Abomination Unto Nuggan.
188          *
189          * This method is exposed mainly for the convenience methods in
190          * ocean.util.uuid.*. You can use this method directly if you prefer.
191          */
192         public static Uuid byName(Digest)(Uuid namespace, char[] name, Digest digest,
193                                                                           ubyte uuidVersion)
194         {
195                 /* o  Compute the hash of the name space ID concatenated with the name.
196                    o  Set octets zero through 15 to octets zero through 15 of the hash.
197                    o  Set the four most significant bits (bits 12 through 15) of octet
198                           6 to the appropriate 4-bit version number from Section 4.1.3.
199                    o  Set the two most significant bits (bits 6 and 7) of octet 8 to
200                           zero and one, respectively.  */
201                 auto nameBytes = namespace.toBytes;
202                 nameBytes ~= cast(ubyte[])name;
203                 digest.update(nameBytes);
204                 nameBytes = digest.binaryDigest;
205                 nameBytes[6] = cast(ubyte) ((uuidVersion << 4) | (nameBytes[6] & 0b1111));
206                 nameBytes[8] |= 0b1000_0000;
207                 nameBytes[8] &= 0b1011_1111;
208                 return Uuid(nameBytes[0..16]);
209         }
210 
211         /** Return an empty UUID (with all bits set to 0). This doesn't conform
212           * to any particular version of the specification. It's equivalent to
213           * using an uninitialized UUID. This method is provided for clarity. */
214         public static Uuid empty()
215         {
216                 Uuid uuid;
217                 uuid._data.ui[] = 0;
218                 return uuid;
219         }
220 
221         /** Get a copy of this UUID's value as an array of unsigned bytes. */
222         public ubyte[] toBytes()
223         {
224                 return _data.ub.dup;
225         }
226 
227         /** Gets the version of this UUID.
228           * RFC 4122 defines five types of UUIDs:
229           *     -       Version 1 is based on the system's MAC address and the current time.
230           *     -       Version 2 uses the current user's userid and user domain in
231           *                     addition to the time and MAC address.
232           * -   Version 3 is namespace-based, as generated by the NamespaceGenV3
233           *                     module. It uses MD5 as a hash algorithm. RFC 4122 states that
234           *                     version 5 is preferred over version 3.
235           * -   Version 4 is generated randomly.
236           * -   Version 5 is like version 3, but uses SHA-1 rather than MD5. Use
237           *                     the NamespaceGenV5 module to create UUIDs like this.
238           *
239           * The following additional versions exist:
240           * -   Version 0 is reserved for backwards compatibility.
241           * -   Version 6 is a non-standard Microsoft extension.
242           * -   Version 7 is reserved for future use.
243           */
244         public ubyte format()
245         {
246                 return cast(ubyte) (_data.ub[6] >> 4);
247         }
248 
249         /** Get the canonical string representation of a UUID.
250           * The canonical representation is in hexidecimal, with hyphens inserted
251           * after the eighth, twelfth, sixteenth, and twentieth digits. For example:
252           *     67e55044-10b1-426f-9247-bb680e5fe0c8
253           * This is the format used by the parsing functions.
254           */
255         public char[] toString()
256         {
257                 // Look, only one allocation.
258                 char[] buf = new char[36];
259                 buf[8] = '-';
260                 buf[13] = '-';
261                 buf[18] = '-';
262                 buf[23] = '-';
263                 with (_data)
264                 {
265                         // See above with tryParse: this ignores endianness.
266                         // Technically, it's sufficient that the conversion to string
267                         // matches the conversion from string and from byte array. But
268                         // this is the simplest way to make sure of that. Plus you can
269                         // serialize and deserialize on machines with different endianness
270                         // without a bunch of strange conversions, and with consistent
271                         // string representations.
272                         Integer.format(buf[0..2], ub[0], "x2");
273                         Integer.format(buf[2..4], ub[1], "x2");
274                         Integer.format(buf[4..6], ub[2], "x2");
275                         Integer.format(buf[6..8], ub[3], "x2");
276                         Integer.format(buf[9..11], ub[4], "x2");
277                         Integer.format(buf[11..13], ub[5], "x2");
278                         Integer.format(buf[14..16], ub[6], "x2");
279                         Integer.format(buf[16..18], ub[7], "x2");
280                         Integer.format(buf[19..21], ub[8], "x2");
281                         Integer.format(buf[21..23], ub[9], "x2");
282                         Integer.format(buf[24..26], ub[10], "x2");
283                         Integer.format(buf[26..28], ub[11], "x2");
284                         Integer.format(buf[28..30], ub[12], "x2");
285                         Integer.format(buf[30..32], ub[13], "x2");
286                         Integer.format(buf[32..34], ub[14], "x2");
287                         Integer.format(buf[34..36], ub[15], "x2");
288                 }
289                 return buf;
290         }
291 
292         /** Determines if this UUID has the same value as another. */
293         public equals_t opEquals(Uuid other)
294         {
295                 return
296                         _data.ui[0] == other._data.ui[0] &&
297                         _data.ui[1] == other._data.ui[1] &&
298                         _data.ui[2] == other._data.ui[2] &&
299                         _data.ui[3] == other._data.ui[3];
300         }
301 
302         /** Get a hash code representing this UUID. */
303         public hash_t toHash()
304         {
305                 with (_data)
306                 {
307                         // 29 is just a convenient prime number
308                         return (((((ui[0] * 29) ^ ui[1]) * 29) ^ ui[2]) * 29) ^ ui[3];
309                 }
310         }
311 }
312 
313 
314 version (TangoTest)
315 {
316         import ocean.math.random.Kiss;
317         unittest
318         {
319                 // Generate them in the correct format
320                 for (int i = 0; i < 20; i++)
321                 {
322                         auto uu = Uuid.random(&Kiss.instance).toString;
323                         auto c = uu[19];
324                         test (c == '9' || c == '8' || c == 'a' || c == 'b', uu);
325                         auto d = uu[14];
326                         test (d == '4', uu);
327                 }
328 
329                 // empty
330                 test (Uuid.empty.toString == "00000000-0000-0000-0000-000000000000", Uuid.empty.toString);
331 
332                 ubyte[] bytes = [0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x11, 0xd1,
333                                           0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4, 0x30, 0xc8];
334                 Uuid u = Uuid(bytes.dup);
335                 auto str = "64f2ad82-5182-4c6a-ade5-59728ca0567b";
336                 auto u2 = Uuid.parse(str);
337 
338                 // toString
339                 test (Uuid(bytes) == u);
340                 test (u2 != u);
341 
342                 test (u2.format == 4);
343 
344                 // tryParse
345                 Uuid u3;
346                 test (Uuid.tryParse(str, u3));
347                 test (u3 == u2);
348         }
349 
350         unittest
351         {
352                 Uuid fail;
353                 // contains 'r'
354                 test (!Uuid.tryParse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7", fail));
355                 // too short
356                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba", fail));
357                 // hyphens matter
358                 test (!Uuid.tryParse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7", fail));
359                 // hyphens matter (2)
360                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7", fail));
361                 // hyphens matter (3)
362                 test (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7", fail));
363         }
364 
365         unittest
366         {
367                 // contains 'r'
368                 try
369                 {
370                         Uuid.parse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
371                 }
372                 catch (IllegalArgumentException) {}
373 
374                 // too short
375                 try
376                 {
377                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba"); assert (false);
378                 }
379                 catch (IllegalArgumentException) {}
380 
381                 // hyphens matter
382                 try
383                 {
384                         Uuid.parse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
385                 }
386                 catch (IllegalArgumentException) {}
387 
388                 // hyphens matter (2)
389                 try
390                 {
391                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7"); assert (false);
392                 }
393                 catch (IllegalArgumentException) {}
394 
395                 // hyphens matter (3)
396                 try
397                 {
398                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7"); assert (false);
399                 }
400                 catch (IllegalArgumentException) {}
401         }
402 
403         import ocean.util.digest.Sha1;
404         unittest
405         {
406                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
407                 auto name = "hello";
408                 // This was generated with the uuid utility on linux/amd64. It might have different results on
409                 // a ppc processor -- the spec says something about network byte order, but it's using an array
410                 // of bytes at that point, so converting to NBO is a noop...
411                 auto expected = Uuid.parse("2b1c6704-a43f-5d43-9abb-b13310b4458a");
412                 auto generated = Uuid.byName(namespace, name, new Sha1, 5);
413                 test (generated == expected, "\nexpected: " ~ expected.toString ~ "\nbut was:  " ~ generated.toString);
414         }
415 
416         import ocean.util.digest.Md5;
417         unittest
418         {
419                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
420                 auto name = "hello";
421                 auto expected = Uuid.parse("31a2b702-85a8-349a-9b0e-213b1bd753b8");
422                 auto generated = Uuid.byName(namespace, name, new Md5, 3);
423                 test (generated == expected, "\nexpected: " ~ expected.toString ~ "\nbut was:  " ~ generated.toString);
424         }
425         void main(){}
426 }
427 
428 /** A base interface for any UUID generator for UUIDs. That is,
429   * this interface is specified so that you write your code dependent on a
430   * UUID generator that takes an arbitrary random source, and easily switch
431   * to a different random source. Since the default uses KISS, if you find
432   * yourself needing more secure random numbers, you could trivially switch
433   * your code to use the Mersenne twister, or some other PRNG.
434   *
435   * You could also, if you wish, use this to switch to deterministic UUID
436   * generation, if your needs require it.
437   */
438 interface UuidGen
439 {
440         Uuid next();
441 }
442 
443 /** Given a random number generator conforming to Tango's standard random
444   * interface, this will generate random UUIDs according to version 4 of
445   * RFC 4122. */
446 class RandomGen(TRandom) : UuidGen
447 {
448         TRandom random;
449         this (TRandom random)
450         {
451                 this.random = random;
452         }
453 
454         Uuid next()
455         {
456                 return Uuid.random(random);
457         }
458 }
459