1 /++
2 This module defines functions to parse units and quantities.
3 
4 The text input is parsed according to the following grammar.
5 For example:
6 $(DL
7 $(DT Prefixes and unit symbols must be joined:)
8     $(DD "1 mm" = 1 millimeter)
9     $(DD "1 m m" = 1 square meter)
10 $(BR)
11 $(DT Standalone units are preferred over prefixed ones:)
12     $(DD "1 cd" = 1 candela, not 1 centiday)
13 $(BR)
14 $(DT Powers of units:)
15     $(DD "1 m^2")
16     $(DD "1 m^-1/2" $(I (rational exponent)))
17     $(DD "1 m²" $(I (superscript integer)))
18 $(BR)
19 $(DT Multiplication of to units:)
20     $(DD "1 N m" $(I (whitespace)))
21     $(DD "1 N . m")
22     $(DD "1 N ⋅ m" $(I (centered dot)))
23     $(DD "1 N * m")
24     $(DD "1 N × m" $(I (times sign)))
25 $(BR)
26 $(DT Division of to units:)
27     $(DD "1 mol / s")
28     $(DD "1 mol ÷ s")
29 $(BR)
30 $(DT Grouping of units with parentheses:)
31     $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²)
32 )
33 
34 Grammar: (whitespace not significant)
35 $(DL
36 $(DT Quantity:)
37     $(DD Units)
38     $(DD Number Units)
39 $(BR)
40 $(DT Number:)
41     $(DD $(I Numeric value parsed by std.conv.parse!double))
42 $(BR)
43 $(DT Units:)
44     $(DD Unit)
45     $(DD Unit Units)
46     $(DD Unit Operator Units)
47 $(BR)
48 $(DT Operator:)
49     $(DD $(B *))
50     $(DD $(B .))
51     $(DD $(B ⋅))
52     $(DD $(B ×))
53     $(DD $(B /))
54     $(DD $(B ÷))
55 $(BR)
56 $(DT Unit:)
57     $(DD Base)
58     $(DD Base $(B ^) Integer)
59     $(DD Base $(B ^) Rational)
60     $(DD Base SupInteger)
61 $(BR)
62 $(DT Base:)
63     $(DD Symbol)
64     $(DD Prefix Symbol)
65     $(DD $(B $(LPAREN)) Units $(B $(RPAREN)))
66 $(BR)
67 $(DT Symbol:)
68     $(DD $(I The symbol of a valid unit))
69 $(BR)
70 $(DT Prefix:)
71     $(DD $(I The symbol of a valid prefix))
72 $(BR)
73 $(DT Rational:)
74     $(DD Integer $(B /) Integer)
75 $(BR)
76 $(DT Integer:)
77     $(DD $(I Integer value parsed by std.conv.parse!int))
78 $(BR)
79 $(DT SupInteger:)
80     $(DD $(I Superscript version of Integer))
81 )
82 
83 Copyright: Copyright 2013-2018, Nicolas Sicard
84 Authors: Nicolas Sicard
85 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
86 Source: $(LINK https://github.com/biozic/quantities)
87 +/
88 module quantities.runtime.parsing;
89 
90 import quantities.internal.dimensions;
91 import quantities.runtime.qvariant;
92 import quantities.compiletime.quantity;
93 
94 import std.array;
95 import std.algorithm;
96 import std.conv;
97 import std.exception;
98 import std.math;
99 import std.range;
100 import std.string;
101 import std.traits;
102 import std.typetuple;
103 import std.utf;
104 
105 /++
106 Contains the symbols of the units and the prefixes that a parser can handle.
107 +/
108 struct SymbolList(N)
109 {
110     static assert(isNumeric!N, "Incompatible type: " ~ N.stringof);
111 
112     private
113     {
114         QVariant!N[string] units;
115         N[string] prefixes;
116         size_t maxPrefixLength;
117     }
118 
119     /// Adds (or replaces) a unit in the list
120     auto addUnit(Q)(string symbol, Q unit)
121             if (isQVariantOrQuantity!Q)
122     {
123         static if (isQVariant!Q)
124             units[symbol] = unit;
125         else static if (isQuantity!Q)
126             units[symbol] = unit.qVariant;
127         else
128             static assert(false);
129         return this;
130     }
131 
132     /// Adds (or replaces) a prefix in the list
133     auto addPrefix(N)(string symbol, N factor)
134             if (isNumeric!N)
135     {
136         prefixes[symbol] = factor;
137         if (symbol.length > maxPrefixLength)
138             maxPrefixLength = symbol.length;
139         return this;
140     }
141 }
142 
143 /++
144 A quantity parser.
145 
146 Params:
147     N = The numeric type of the quantities.
148     numberParser = a function that takes a reference to a string and returns the
149         parsed number.
150 +/
151 struct Parser(N, alias numberParser)
152         if (isNumeric!N)
153 {
154     /// A list of registered symbols for units and prefixes.
155     SymbolList!N symbolList;
156 
157     /++
158     Parses a QVariant from str.
159     +/
160     QVariant!N parse(S)(S str)
161             if (isSomeString!S)
162     {
163         return parseQuantityImpl!(N, numberParser)(str, symbolList);
164     }
165 }
166 ///
167 unittest
168 {
169     // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement
170 
171     import std.conv : parse;
172 
173     auto century = unit!real("T");
174     alias LectureLength = typeof(century);
175 
176     auto symbolList = SymbolList!real().addUnit("Cy", century).addPrefix("µ", 1e-6L);
177     alias numberParser = (ref string s) => std.conv.parse!real(s);
178     auto parser = Parser!(real, numberParser)(symbolList);
179 
180     auto timing = 1e-6L * century;
181     assert(timing == parser.parse("1 µCy"));
182 }
183 
184 version (none) unittest  // Compile-time
185 {
186     import std.conv : parse;
187 
188     enum euro = unit!int("@");
189     alias Currency = typeof(euro);
190 
191     enum symbolList = SymbolList!int().addUnit("€", euro).addPrefix("k", 1000);
192     alias intParser = (ref string s) => std.conv.parse!int(s);
193     enum parser = Parser!(int, intParser)(symbolList);
194 
195     enum cost = 2_500_000 * euro;
196     static assert(cost == parser.parse("2500 k€"));
197 }
198 
199 /// Exception thrown when parsing encounters an unexpected token.
200 class ParsingException : Exception
201 {
202     mixin basicExceptionCtors;
203 }
204 
205 private:
206 
207 QVariant!N parseQuantityImpl(N, alias parseFun, S)(S input, SymbolList!N symbolList)
208         if (isSomeString!S)
209 {
210     N value;
211     auto str = input[];
212 
213     try
214         value = parseFun(str);
215     catch (Exception)
216         value = 1;
217 
218     if (str.empty)
219         return QVariant!N(value, Dimensions.init);
220 
221     auto tokens = lex(str);
222     auto parser = QuantityParser!N(tokens, symbolList);
223 
224     return value * parser.parseCompoundUnit();
225 }
226 
227 unittest  // Test parsing
228 {
229     auto meter = unit!double("L");
230     auto kilogram = unit!double("M");
231     auto second = unit!double("T");
232     auto one = meter / meter;
233     auto unknown = one;
234 
235     auto siSL = SymbolList!double().addUnit("m", meter).addUnit("kg", kilogram)
236         .addUnit("s", second).addPrefix("c", 0.01L).addPrefix("m", 0.001L);
237 
238     bool checkParse(S, Q)(S input, Q quantity)
239             if (isSomeString!S)
240     {
241         alias numberParser = std.conv.parse!(Q.valueType, S);
242         return parseQuantityImpl!(double, numberParser)(input, siSL) == quantity;
243     }
244 
245     assert(checkParse("1    m    ", meter));
246     assert(checkParse("1m", meter));
247     assert(checkParse("1 mm", 0.001 * meter));
248     assert(checkParse("1 m^-1", 1 / meter));
249     assert(checkParse("1 m^2/2", meter));
250     assert(checkParse("1 m^-2/2", 1 / meter));
251     assert(checkParse("1 m²", meter * meter));
252     assert(checkParse("1 m⁺²", meter * meter));
253     assert(checkParse("1 m⁻¹", 1 / meter));
254     assert(checkParse("1 (m)", meter));
255     assert(checkParse("1 (m^-1)", 1 / meter));
256     assert(checkParse("1 ((m)^-1)^-1", meter));
257     assert(checkParse("1 (s/(s/m))", meter));
258     assert(checkParse("1 m*m", meter * meter));
259     assert(checkParse("1 m m", meter * meter));
260     assert(checkParse("1 m.m", meter * meter));
261     assert(checkParse("1 m⋅m", meter * meter));
262     assert(checkParse("1 m×m", meter * meter));
263     assert(checkParse("1 m/m", meter / meter));
264     assert(checkParse("1 m÷m", meter / meter));
265     assert(checkParse("1 m.s", second * meter));
266     assert(checkParse("1 m s", second * meter));
267     assert(checkParse("1 m²s", meter * meter * second));
268     assert(checkParse("1 m*m/m", meter));
269     assert(checkParse("0.8 m⁰", 0.8 * one));
270     assert(checkParse("0.8", 0.8 * one));
271     assert(checkParse("0.8 ", 0.8 * one));
272 
273     assertThrown!ParsingException(checkParse("1 c m", unknown));
274     assertThrown!ParsingException(checkParse("1 c", unknown));
275     assertThrown!ParsingException(checkParse("1 Qm", unknown));
276     assertThrown!ParsingException(checkParse("1 m + m", unknown));
277     assertThrown!ParsingException(checkParse("1 m/", unknown));
278     assertThrown!ParsingException(checkParse("1 m^", unknown));
279     assertThrown!ParsingException(checkParse("1 m^m", unknown));
280     assertThrown!ParsingException(checkParse("1 m ) m", unknown));
281     assertThrown!ParsingException(checkParse("1 m * m) m", unknown));
282     assertThrown!ParsingException(checkParse("1 m^²", unknown));
283     assertThrown!ParsingException(checkParse("1-⁺⁵", unknown));
284 }
285 
286 // A parser that can parse a text for a unit or a quantity
287 struct QuantityParser(N)
288 {
289     private
290     {
291         Token[] tokens;
292         SymbolList!N symbolList;
293     }
294 
295     QVariant!N parseCompoundUnit(bool inParens = false)
296     {
297         QVariant!N ret = parseExponentUnit();
298         if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
299             return ret;
300 
301         do
302         {
303             tokens.check();
304             auto cur = tokens.front;
305 
306             bool multiply = true;
307             if (cur.type == Tok.div)
308                 multiply = false;
309 
310             if (cur.type == Tok.mul || cur.type == Tok.div)
311             {
312                 tokens.advance();
313                 tokens.check();
314                 cur = tokens.front;
315             }
316 
317             QVariant!N rhs = parseExponentUnit();
318             if (multiply)
319                 ret *= rhs;
320             else
321                 ret /= rhs;
322 
323             if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
324                 break;
325 
326             cur = tokens.front;
327         }
328         while (!tokens.empty);
329 
330         return ret;
331     }
332 
333     QVariant!N parseExponentUnit()
334     {
335         QVariant!N ret = parseUnit();
336 
337         if (tokens.empty)
338             return ret;
339 
340         auto next = tokens.front;
341         if (next.type != Tok.exp && next.type != Tok.supinteger)
342             return ret;
343 
344         if (next.type == Tok.exp)
345             tokens.advance(Tok.integer);
346 
347         Rational r = parseRationalOrInteger();
348         return ret ^^ r;
349     }
350 
351     Rational parseRationalOrInteger()
352     {
353         int num = parseInteger();
354         int den = 1;
355         if (tokens.length && tokens.front.type == Tok.div)
356         {
357             tokens.advance();
358             den = parseInteger();
359         }
360         return Rational(num, den);
361     }
362 
363     int parseInteger()
364     {
365         tokens.check(Tok.integer, Tok.supinteger);
366         int n = tokens.front.integer;
367         if (tokens.length)
368             tokens.advance();
369         return n;
370     }
371 
372     QVariant!N parseUnit()
373     {
374         if (!tokens.length)
375             return QVariant!N(1, Dimensions.init);
376 
377         if (tokens.front.type == Tok.lparen)
378         {
379             tokens.advance();
380             auto ret = parseCompoundUnit(true);
381             tokens.check(Tok.rparen);
382             tokens.advance();
383             return ret;
384         }
385         else
386             return parsePrefixUnit();
387     }
388 
389     QVariant!N parsePrefixUnit()
390     {
391         tokens.check(Tok.symbol);
392         auto str = tokens.front.slice;
393         if (tokens.length)
394             tokens.advance();
395 
396         // Try a standalone unit symbol (no prefix)
397         auto uptr = str in symbolList.units;
398         if (uptr)
399             return *uptr;
400 
401         // Try with prefixes, the longest prefix first
402         N* factor;
403         for (size_t i = symbolList.maxPrefixLength; i > 0; i--)
404         {
405             if (str.length >= i)
406             {
407                 string prefix = str[0 .. i].to!string;
408                 factor = prefix in symbolList.prefixes;
409                 if (factor)
410                 {
411                     string unit = str[i .. $].to!string;
412                     enforceEx!ParsingException(unit.length,
413                             "Expecting a unit after the prefix " ~ prefix);
414                     uptr = unit in symbolList.units;
415                     if (uptr)
416                         return *factor * *uptr;
417                 }
418             }
419         }
420 
421         throw new ParsingException("Unknown unit symbol: '%s'".format(str));
422     }
423 }
424 
425 enum Tok
426 {
427     none,
428     symbol,
429     mul,
430     div,
431     exp,
432     integer,
433     supinteger,
434     rparen,
435     lparen
436 }
437 
438 struct Token
439 {
440     Tok type;
441     const(char)[] slice;
442     int integer = int.max;
443 }
444 
445 Token[] lex(const(char)[] input)
446 {
447     enum State
448     {
449         none,
450         symbol,
451         integer,
452         supinteger
453     }
454 
455     Token[] tokens;
456     auto tokapp = appender(tokens);
457 
458     auto original = input;
459     size_t i, j;
460     State state = State.none;
461 
462     void pushToken(Tok type)
463     {
464         tokapp.put(Token(type, original[i .. j]));
465         i = j;
466         state = State.none;
467     }
468 
469     void pushInteger(Tok type)
470     {
471         auto slice = original[i .. j];
472 
473         if (type == Tok.supinteger)
474         {
475             auto a = appender!string;
476             foreach (dchar c; slice)
477             {
478                 switch (c)
479                 {
480                 case '⁰':
481                     a.put('0');
482                     break;
483                 case '¹':
484                     a.put('1');
485                     break;
486                 case '²':
487                     a.put('2');
488                     break;
489                 case '³':
490                     a.put('3');
491                     break;
492                 case '⁴':
493                     a.put('4');
494                     break;
495                 case '⁵':
496                     a.put('5');
497                     break;
498                 case '⁶':
499                     a.put('6');
500                     break;
501                 case '⁷':
502                     a.put('7');
503                     break;
504                 case '⁸':
505                     a.put('8');
506                     break;
507                 case '⁹':
508                     a.put('9');
509                     break;
510                 case '⁺':
511                     a.put('+');
512                     break;
513                 case '⁻':
514                     a.put('-');
515                     break;
516                 default:
517                     assert(false, "Error in pushInteger()");
518                 }
519             }
520             slice = a.data;
521         }
522 
523         int n;
524         try
525         {
526             n = std.conv.parse!int(slice);
527             enforce(slice.empty);
528         }
529         catch (Exception)
530             throw new ParsingException("Unexpected integer format: %s".format(original[i .. j]));
531 
532         tokapp.put(Token(type, original[i .. j], n));
533         i = j;
534         state = State.none;
535     }
536 
537     void push()
538     {
539         if (state == State.symbol)
540             pushToken(Tok.symbol);
541         else if (state == State.integer)
542             pushInteger(Tok.integer);
543         else if (state == State.supinteger)
544             pushInteger(Tok.supinteger);
545     }
546 
547     foreach (dchar cur; input)
548     {
549         auto len = cur.codeLength!char;
550         switch (cur)
551         {
552             // Whitespace
553         case ' ':
554         case '\t':
555         case '\u00A0':
556         case '\u2000': .. case '\u200A':
557         case '\u202F':
558         case '\u205F':
559             push();
560             j += len;
561             i = j;
562             break;
563 
564         case '(':
565             push();
566             j += len;
567             pushToken(Tok.lparen);
568             break;
569 
570         case ')':
571             push();
572             j += len;
573             pushToken(Tok.rparen);
574             break;
575 
576         case '*':
577         case '.':
578         case '⋅':
579         case '×':
580             push();
581             j += len;
582             pushToken(Tok.mul);
583             break;
584 
585         case '/':
586         case '÷':
587             push();
588             j += len;
589             pushToken(Tok.div);
590             break;
591 
592         case '^':
593             push();
594             j += len;
595             pushToken(Tok.exp);
596             break;
597 
598         case '0': .. case '9':
599         case '-':
600         case '+':
601             if (state != State.integer)
602                 push();
603             state = State.integer;
604             j += len;
605             break;
606 
607         case '⁰':
608         case '¹':
609         case '²':
610         case '³':
611         case '⁴':
612         case '⁵':
613         case '⁶':
614         case '⁷':
615         case '⁸':
616         case '⁹':
617         case '⁻':
618         case '⁺':
619             if (state != State.supinteger)
620                 push();
621             state = State.supinteger;
622             j += len;
623             break;
624 
625         default:
626             if (state == State.integer || state == State.supinteger)
627                 push();
628             state = State.symbol;
629             j += len;
630             break;
631         }
632     }
633     push();
634 
635     return tokapp.data;
636 }
637 
638 void advance(Types...)(ref Token[] tokens, Types types)
639 {
640     enforceEx!ParsingException(!tokens.empty, "Unexpected end of input");
641     tokens.popFront();
642 
643     static if (Types.length)
644         check(tokens, types);
645 }
646 
647 void check(Token[] tokens)
648 {
649     enforceEx!ParsingException(tokens.length, "Unexpected end of input");
650 }
651 
652 void check(Token[] tokens, Tok tok)
653 {
654     tokens.check();
655     enforceEx!ParsingException(tokens[0].type == tok,
656             format("Found '%s' while expecting %s", tokens[0].slice, tok));
657 }
658 
659 void check(Token[] tokens, Tok tok1, Tok tok2)
660 {
661     tokens.check();
662     enforceEx!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2,
663             format("Found '%s' while expecting %s or %s", tokens[0].slice, tok1, tok2));
664 }