1 /++
2 This module defines functions to parse units and quantities.
3 
4 Copyright: Copyright 2013-2018, Nicolas Sicard  
5 Authors: Nicolas Sicard  
6 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0)  
7 Source: $(LINK https://github.com/biozic/quantities)  
8 +/
9 module quantities.parsing;
10 
11 import quantities.internal.dimensions;
12 import quantities.runtime;
13 import quantities.compiletime;
14 import std.conv : parse;
15 import std.exception : basicExceptionCtors, enforce;
16 import std.format : format;
17 import std.traits : isNumeric, isSomeString;
18 
19 /++
20 Contains the symbols of the units and the prefixes that a parser can handle.
21 +/
22 struct SymbolList(N)
23         if (isNumeric!N)
24 {
25     static assert(isNumeric!N, "Incompatible type: " ~ N.stringof);
26 
27     package
28     {
29         QVariant!N[string] units;
30         N[string] prefixes;
31         size_t maxPrefixLength;
32     }
33 
34     /// Adds (or replaces) a unit in the list
35     auto addUnit(Q)(string symbol, Q unit)
36             if (isQVariantOrQuantity!Q)
37     {
38         static if (isQVariant!Q)
39             units[symbol] = unit;
40         else static if (isQuantity!Q)
41             units[symbol] = unit.qVariant;
42         else
43             static assert(false);
44         return this;
45     }
46 
47     /// Adds (or replaces) a prefix in the list
48     auto addPrefix(N)(string symbol, N factor)
49             if (isNumeric!N)
50     {
51         prefixes[symbol] = factor;
52         if (symbol.length > maxPrefixLength)
53             maxPrefixLength = symbol.length;
54         return this;
55     }
56 }
57 
58 /++
59 A quantity parser.
60 
61 Params:
62     N = The numeric type of the quantities.
63     numberParser = a function that takes a reference to any kind of string and
64         returns the parsed number.
65 +/
66 struct Parser(N, alias numberParser = (ref s) => parse!N(s))
67         if (isNumeric!N)
68 {
69     /// A list of registered symbols for units and prefixes.
70     SymbolList!N symbolList;
71 
72     /++
73     Parses a QVariant from str.
74     +/
75     QVariant!N parse(S)(S str)
76             if (isSomeString!S)
77     {
78         return parseQuantityImpl!(N, numberParser)(str, symbolList);
79     }
80 }
81 ///
82 unittest
83 {
84     // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement
85 
86     import std.conv : parse;
87 
88     auto century = unit!real("T");
89     alias LectureLength = typeof(century);
90 
91     auto symbolList = SymbolList!real().addUnit("Cy", century).addPrefix("µ", 1e-6L);
92     alias numberParser = (ref s) => parse!real(s);
93     auto parser = Parser!(real, numberParser)(symbolList);
94 
95     auto timing = 1e-6L * century;
96     assert(timing == parser.parse("1 µCy"));
97 }
98 
99 /// Exception thrown when parsing encounters an unexpected token.
100 class ParsingException : Exception
101 {
102     mixin basicExceptionCtors;
103 }
104 
105 package(quantities):
106 
107 QVariant!N parseQuantityImpl(N, alias numberParser, S)(S input, SymbolList!N symbolList)
108         if (isSomeString!S)
109 {
110     import std.range.primitives : empty;
111 
112     N value;
113     try
114         value = numberParser(input);
115     catch (Exception)
116         value = 1;
117 
118     if (input.empty)
119         return QVariant!N(value, Dimensions.init);
120 
121     auto parser = QuantityParser!(N, S)(input, symbolList);
122     return value * parser.parsedQuantity();
123 }
124 
125 // A parser that can parse a text for a unit or a quantity
126 struct QuantityParser(N, S)
127         if (isNumeric!N && isSomeString!S)
128 {
129     import std.conv : to;
130     import std.exception : enforce;
131     import std.format : format;
132     import std.range.primitives : empty, front, popFront;
133 
134     private
135     {
136         S input;
137         SymbolList!N symbolList;
138         Token[] tokens;
139     }
140 
141     this(S input, SymbolList!N symbolList)
142     {
143         this.input = input;
144         this.symbolList = symbolList;
145         lex(input);
146     }
147 
148     QVariant!N parsedQuantity()
149     {
150         return parseCompoundUnit();
151     }
152 
153     QVariant!N parseCompoundUnit(bool inParens = false)
154     {
155         QVariant!N ret = parseExponentUnit();
156         if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
157             return ret;
158 
159         do
160         {
161             check();
162             auto cur = tokens.front;
163 
164             bool multiply = true;
165             if (cur.type == Tok.div)
166                 multiply = false;
167 
168             if (cur.type == Tok.mul || cur.type == Tok.div)
169             {
170                 advance();
171                 check();
172                 cur = tokens.front;
173             }
174 
175             QVariant!N rhs = parseExponentUnit();
176             if (multiply)
177                 ret *= rhs;
178             else
179                 ret /= rhs;
180 
181             if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
182                 break;
183 
184             cur = tokens.front;
185         }
186         while (!tokens.empty);
187 
188         return ret;
189     }
190 
191     QVariant!N parseExponentUnit()
192     {
193         QVariant!N ret = parseUnit();
194 
195         // If no exponent is found
196         if (tokens.empty)
197             return ret;
198 
199         // The next token should be '^', an integer or a superior integer
200         auto next = tokens.front;
201         if (next.type != Tok.exp && next.type != Tok.integer && next.type != Tok.supinteger)
202             return ret;
203 
204         // Skip the '^' if present, and expect an integer
205         if (next.type == Tok.exp)
206             advance(Tok.integer);
207 
208         Rational r = parseRationalOrInteger();
209         return ret ^^ r;
210     }
211 
212     Rational parseRationalOrInteger()
213     {
214         int num = parseInteger();
215         int den = 1;
216         if (tokens.length && tokens.front.type == Tok.div)
217         {
218             advance();
219             den = parseInteger();
220         }
221         return Rational(num, den);
222     }
223 
224     int parseInteger()
225     {
226         check(Tok.integer, Tok.supinteger);
227         int n = tokens.front.integer;
228         if (tokens.length)
229             advance();
230         return n;
231     }
232 
233     QVariant!N parseUnit()
234     {
235         if (!tokens.length)
236             return QVariant!N(1, Dimensions.init);
237 
238         if (tokens.front.type == Tok.lparen)
239         {
240             advance();
241             auto ret = parseCompoundUnit(true);
242             check(Tok.rparen);
243             advance();
244             return ret;
245         }
246         else
247             return parsePrefixUnit();
248     }
249 
250     QVariant!N parsePrefixUnit()
251     {
252         check(Tok.symbol);
253         auto str = input[tokens.front.begin .. tokens.front.end].to!string;
254         if (tokens.length)
255             advance();
256 
257         // Try a standalone unit symbol (no prefix)
258         auto uptr = str in symbolList.units;
259         if (uptr)
260             return *uptr;
261 
262         // Try with prefixes, the longest prefix first
263         N* factor;
264         for (size_t i = symbolList.maxPrefixLength; i > 0; i--)
265         {
266             if (str.length >= i)
267             {
268                 string prefix = str[0 .. i].to!string;
269                 factor = prefix in symbolList.prefixes;
270                 if (factor)
271                 {
272                     string unit = str[i .. $].to!string;
273                     enforce!ParsingException(unit.length,
274                             "Expecting a unit after the prefix " ~ prefix);
275                     uptr = unit in symbolList.units;
276                     if (uptr)
277                         return *factor * *uptr;
278                 }
279             }
280         }
281 
282         throw new ParsingException("Unknown unit symbol: '%s'".format(str));
283     }
284 
285     enum Tok
286     {
287         none,
288         symbol,
289         mul,
290         div,
291         exp,
292         integer,
293         supinteger,
294         rparen,
295         lparen
296     }
297 
298     struct Token
299     {
300         Tok type;
301         size_t begin;
302         size_t end;
303         int integer = int.max;
304     }
305 
306     void lex(S input) @safe
307     {
308         import std.array : appender;
309         import std.conv : parse;
310         import std.exception : enforce;
311         import std.utf : codeLength;
312 
313         enum State
314         {
315             none,
316             symbol,
317             integer,
318             supinteger
319         }
320 
321         auto tokapp = appender(tokens);
322         size_t i, j;
323         State state = State.none;
324         auto intapp = appender!string;
325 
326         void pushToken(Tok type)
327         {
328             tokapp.put(Token(type, i, j));
329             i = j;
330             state = State.none;
331         }
332 
333         void pushInteger(Tok type)
334         {
335             int n;
336             auto slice = intapp.data;
337             try
338             {
339                 n = parse!int(slice);
340                 assert(slice.empty);
341             }
342             catch (Exception)
343                 throw new ParsingException("Unexpected integer format: %s".format(slice));
344 
345             tokapp.put(Token(type, i, j, n));
346             i = j;
347             state = State.none;
348             intapp = appender!string;
349         }
350 
351         void push()
352         {
353             if (state == State.symbol)
354                 pushToken(Tok.symbol);
355             else if (state == State.integer)
356                 pushInteger(Tok.integer);
357             else if (state == State.supinteger)
358                 pushInteger(Tok.supinteger);
359         }
360 
361         foreach (dchar cur; input)
362         {
363             auto len = cur.codeLength!char;
364             switch (cur)
365             {
366             case ' ':
367             case '\t':
368             case '\u00A0':
369             case '\u2000': .. case '\u200A':
370             case '\u202F':
371             case '\u205F':
372                 push();
373                 j += len;
374                 i = j;
375                 break;
376 
377             case '(':
378                 push();
379                 j += len;
380                 pushToken(Tok.lparen);
381                 break;
382             case ')':
383                 push();
384                 j += len;
385                 pushToken(Tok.rparen);
386                 break;
387 
388             case '*': // Asterisk
389             case '.': // Dot
390             case '\u00B7': // Middle dot (·)         
391             case '\u00D7': // Multiplication sign (×)
392             case '\u2219': // Bullet operator (∙)    
393             case '\u22C5': // Dot operator (⋅)       
394             case '\u2022': // Bullet (•)             
395             case '\u2715': // Multiplication X (✕)   
396                 push();
397                 j += len;
398                 pushToken(Tok.mul);
399                 break;
400 
401             case '/': // Slash
402             case '\u00F7': // Division sign (÷)
403             case '\u2215': // Division slash (∕)
404                 push();
405                 j += len;
406                 pushToken(Tok.div);
407                 break;
408 
409             case '^':
410                 push();
411                 j += len;
412                 pushToken(Tok.exp);
413                 break;
414 
415             case '-': // Hyphen
416             case '\u2212': // Minus sign (−)
417             case '\u2012': // Figure dash (‒)
418             case '\u2013': // En dash (–)
419                 intapp.put('-');
420                 goto PushIntChar;
421             case '+': // Plus sign
422                 intapp.put('+');
423                 goto PushIntChar;
424             case '0': .. case '9':
425                 intapp.put(cur);
426             PushIntChar:
427                 if (state != State.integer)
428                     push();
429                 state = State.integer;
430                 j += len;
431                 break;
432 
433             case '⁰':
434                 intapp.put('0');
435                 goto PushSupIntChar;
436             case '¹':
437                 intapp.put('1');
438                 goto PushSupIntChar;
439             case '²':
440                 intapp.put('2');
441                 goto PushSupIntChar;
442             case '³':
443                 intapp.put('3');
444                 goto PushSupIntChar;
445             case '⁴':
446                 intapp.put('4');
447                 goto PushSupIntChar;
448             case '⁵':
449                 intapp.put('5');
450                 goto PushSupIntChar;
451             case '⁶':
452                 intapp.put('6');
453                 goto PushSupIntChar;
454             case '⁷':
455                 intapp.put('7');
456                 goto PushSupIntChar;
457             case '⁸':
458                 intapp.put('8');
459                 goto PushSupIntChar;
460             case '⁹':
461                 intapp.put('9');
462                 goto PushSupIntChar;
463             case '⁻':
464                 intapp.put('-');
465                 goto PushSupIntChar;
466             case '⁺':
467                 intapp.put('+');
468             PushSupIntChar:
469                 if (state != State.supinteger)
470                     push();
471                 state = State.supinteger;
472                 j += len;
473                 break;
474 
475             default:
476                 if (state == State.integer || state == State.supinteger)
477                     push();
478                 state = State.symbol;
479                 j += len;
480                 break;
481             }
482         }
483         push();
484         tokens = tokapp.data;
485     }
486 
487     void advance(Types...)(Types types)
488     {
489         enforce!ParsingException(!tokens.empty, "Unexpected end of input");
490         tokens.popFront();
491 
492         static if (Types.length)
493             check(types);
494     }
495 
496     void check()
497     {
498         enforce!ParsingException(tokens.length, "Unexpected end of input");
499     }
500 
501     void check(Tok tok)
502     {
503         check();
504         enforce!ParsingException(tokens[0].type == tok,
505                 format("Found '%s' while expecting %s", input[tokens[0].begin .. tokens[0].end],
506                     tok));
507     }
508 
509     void check(Tok tok1, Tok tok2)
510     {
511         check();
512         enforce!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2,
513                 format("Found '%s' while expecting %s or %s",
514                     input[tokens[0].begin .. tokens[0].end], tok1, tok2));
515     }
516 }
517 
518 // Tests
519 
520 @("Generic parsing")
521 unittest
522 {
523     import std.exception : assertThrown;
524 
525     auto meter = unit!double("L");
526     auto kilogram = unit!double("M");
527     auto second = unit!double("T");
528     auto one = meter / meter;
529     auto unknown = one;
530 
531     auto siSL = SymbolList!double().addUnit("m", meter).addUnit("kg", kilogram)
532         .addUnit("s", second).addPrefix("c", 0.01L).addPrefix("m", 0.001L);
533 
534     bool checkParse(S, Q)(S input, Q quantity)
535     {
536         import std.conv : parse;
537 
538         return parseQuantityImpl!(double, (ref s) => parse!double(s))(input, siSL) == quantity;
539     }
540 
541     assert(checkParse("1    m    ", meter));
542     assert(checkParse("1m", meter));
543     assert(checkParse("1 mm", 0.001 * meter));
544     assert(checkParse("1 m2", meter * meter));
545     assert(checkParse("1 m^-1", 1 / meter));
546     assert(checkParse("1 m-1", 1 / meter));
547     assert(checkParse("1 m^1/1", meter));
548     assert(checkParse("1 m^-1/1", 1 / meter));
549     assert(checkParse("1 m²", meter * meter));
550     assert(checkParse("1 m⁺²", meter * meter));
551     assert(checkParse("1 m⁻¹", 1 / meter));
552     assert(checkParse("1 (m)", meter));
553     assert(checkParse("1 (m^-1)", 1 / meter));
554     assert(checkParse("1 ((m)^-1)^-1", meter));
555     assert(checkParse("1 (s/(s/m))", meter));
556     assert(checkParse("1 m*m", meter * meter));
557     assert(checkParse("1 m m", meter * meter));
558     assert(checkParse("1 m.m", meter * meter));
559     assert(checkParse("1 m⋅m", meter * meter));
560     assert(checkParse("1 m×m", meter * meter));
561     assert(checkParse("1 m/m", meter / meter));
562     assert(checkParse("1 m÷m", meter / meter));
563     assert(checkParse("1 m.s", second * meter));
564     assert(checkParse("1 m s", second * meter));
565     assert(checkParse("1 m²s", meter * meter * second));
566     assert(checkParse("1 m*m/m", meter));
567     assert(checkParse("0.8 m⁰", 0.8 * one));
568     assert(checkParse("0.8", 0.8 * one));
569     assert(checkParse("0.8 ", 0.8 * one));
570 
571     assertThrown!ParsingException(checkParse("1 c m", unknown));
572     assertThrown!ParsingException(checkParse("1 c", unknown));
573     assertThrown!ParsingException(checkParse("1 Qm", unknown));
574     assertThrown!ParsingException(checkParse("1 m + m", unknown));
575     assertThrown!ParsingException(checkParse("1 m/", unknown));
576     assertThrown!ParsingException(checkParse("1 m^", unknown));
577     assertThrown!ParsingException(checkParse("1 m^m", unknown));
578     assertThrown!ParsingException(checkParse("1 m ) m", unknown));
579     assertThrown!ParsingException(checkParse("1 m * m) m", unknown));
580     assertThrown!ParsingException(checkParse("1 m^²", unknown));
581     assertThrown!ParsingException(checkParse("1-⁺⁵", unknown));
582 }