1 /++
2 This module defines functions to parse units and quantities. The text
3 input is parsed according to the following grammar. For example:
4 $(DL
5 $(DT Prefixes and unit symbols must be joined:)
6     $(DD "1 mm" = 1 millimeter)
7     $(DD "1 m m" = 1 square meter)
8 $(BR)
9 $(DT Standalone units are preferred over prefixed ones:)
10     $(DD "1 cd" = 1 candela, not 1 centiday)
11 $(BR)
12 $(DT Powers of units:)
13     $(DD "1 m^2")
14     $(DD "1 m²" $(I (superscript integer)))
15 $(BR)
16 $(DT Multiplication of to units:)
17     $(DD "1 N m" $(I (whitespace)))
18     $(DD "1 N . m")
19     $(DD "1 N ⋅ m" $(I (centered dot)))
20     $(DD "1 N * m")
21     $(DD "1 N × m" $(I (times sign)))
22 $(BR)
23 $(DT Division of to units:)
24     $(DD "1 mol / s")
25     $(DD "1 mol ÷ s")
26 $(BR)
27 $(DT Grouping of units with parentheses:)
28     $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²)
29 )
30 
31 Grammar: (whitespace not significant)
32 $(DL
33 $(DT Quantity:)
34     $(DD Units)
35     $(DD Number Units)
36 $(BR)
37 $(DT Number:)
38     $(DD $(I Numeric value parsed by std.conv.parse!double))
39 $(BR)
40 $(DT Units:)
41     $(DD Unit)
42     $(DD Unit Units)
43     $(DD Unit Operator Units)
44 $(BR)
45 $(DT Operator:)
46     $(DD $(B *))
47     $(DD $(B .))
48     $(DD $(B ⋅))
49     $(DD $(B ×))
50     $(DD $(B /))
51     $(DD $(B ÷))
52 $(BR)
53 $(DT Unit:)
54     $(DD Base)
55     $(DD Base $(B ^) Integer)
56     $(DD Base SupInteger)
57 $(BR)
58 $(DT Base:)
59     $(DD Symbol)
60     $(DD Prefix Symbol)
61     $(DD $(B $(LPAREN)) Units $(B $(RPAREN)))
62 $(BR)
63 $(DT Symbol:)
64     $(DD $(I The symbol of a valid unit))
65 $(BR)
66 $(DT Prefix:)
67     $(DD $(I The symbol of a valid prefix))
68 $(BR)
69 $(DT Integer:)
70     $(DD $(I Integer value parsed by std.conv.parse!int))
71 $(BR)
72 $(DT SupInteger:)
73     $(DD $(I Superscript version of Integer))
74 )
75 
76 Copyright: Copyright 2013-2015, Nicolas Sicard
77 Authors: Nicolas Sicard
78 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
79 Source: $(LINK https://github.com/biozic/quantities)
80 +/
81 module quantities.parsing;
82 
83 import quantities.internal.dimensions;
84 import quantities.base;
85 import quantities.qvariant;
86 
87 import std.array;
88 import std.algorithm;
89 import std.conv;
90 import std.exception;
91 import std.math;
92 import std.range;
93 import std.string;
94 import std.traits;
95 import std.typetuple;
96 import std.utf;
97 
98 
99 /// Exception thrown when operating on two units that are not interconvertible.
100 class DimensionException : Exception
101 {
102     pure @safe nothrow
103     this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
104     {
105         super(msg, file, line, next);
106     }
107     
108     pure @safe nothrow
109     this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__)
110     {
111         super(msg, file, line, next);
112     }
113 }
114 
115 /++
116 Contains the symbols of the units and the prefixes that a parser can handle.
117 +/
118 struct SymbolList(N)
119 {
120     static assert(isNumeric!N, "Incompatible type: " ~ N.stringof);
121 
122     package
123     {
124         QVariant!N[string] units;
125         N[string] prefixes;
126         size_t maxPrefixLength;
127     }
128 
129     /// Adds (or replaces) a unit in the list
130     auto addUnit(Q)(string symbol, Q unit)
131         if (isQVariant!Q)
132     {
133         units[symbol] = unit;
134         return this;
135     }
136     /// ditto
137     auto addUnit(Q)(string symbol, Q unit)
138         if (isQuantity!Q)
139     {
140         return addUnit(symbol, unit.qVariant);
141     }
142 
143     /// Adds (or replaces) a prefix in the list
144     auto addPrefix(N)(string symbol, N factor)
145         if (isNumeric!N)
146     {
147         prefixes[symbol] = factor;
148         if (symbol.length > maxPrefixLength)
149             maxPrefixLength = symbol.length;
150         return this;
151     }
152 }
153 
154 /// Type of a function that can parse a string for a numeric value of type N.
155 alias NumberParser(N) = N function(ref string s) pure @safe;
156 
157 /// A quantity parser
158 struct Parser(N)
159 {
160     SymbolList!N symbolList; /// A list of registered symbols for units and prefixes.
161     NumberParser!N numberParser; /// A function that can parse a string for a numeric value of type N.
162 
163     /++
164     Parses a QVariant from a string.
165     +/
166     QVariant!N parseVariant(string str)
167     {
168         return parseQuantityImpl!N(str, symbolList, numberParser);
169     }
170 
171     /++
172     Parses a quantity of a known type Q from a string.
173     +/
174     Q parse(Q)(string str)
175         if (isQuantity!Q)
176     {
177         static assert(is(N : Q.valueType), "Incompatible value type: " ~ Q.valueType.stringof);
178         
179         auto q = parseQuantityImpl!(Q.valueType)(str, symbolList, numberParser);
180         enforceEx!DimensionException(Q.dimensions == q.dimensions,
181             "Dimension error: [%s] is not compatible with [%s]".format(
182                 Q.dimensions.toString, q.dimensions.toString));
183         return Q.make(q.rawValue);
184     }
185 }
186 ///
187 pure @safe unittest
188 {
189     // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement
190 
191     auto century = unit!(real, "century");
192     alias LectureLength = typeof(century);
193     
194     auto symbolList = SymbolList!real()
195         .addUnit("Cy", century)
196         .addPrefix("µ", 1e-6L);
197 
198     import std.conv;
199     auto parser = Parser!real(symbolList, &std.conv.parse!(real, string));
200 
201     auto timing = 1e-6L * century;
202     assert(timing == parser.parse!LectureLength("1 µCy"));
203     assert(timing == parser.parseVariant("1 µCy"));
204 }
205 
206 /// Creates a compile-time parser that parses a string for a quantity and
207 /// automatically deduces the quantity type.
208 template compileTimeParser(N, alias symbolList, alias numberParser)
209 {
210     template compileTimeParser(string str)
211     {
212         enum q = parseQuantityImpl!N(str, symbolList, &numberParser); 
213         enum compileTimeParser = Quantity!(N, cast(Dimensions) q.dimensions).make(q.rawValue);
214     }
215 }
216 ///
217 pure @safe unittest
218 {
219     enum century = unit!(real, "century");
220     alias LectureLength = typeof(century);
221     
222     enum symbolList = SymbolList!real()
223         .addUnit("Cy", century)
224         .addPrefix("µ", 1e-6L);
225     
226     alias ctParser = compileTimeParser!(real, symbolList, std.conv.parse!(real, string));
227     enum timing = 1e-6L * century;
228     static assert(timing == ctParser!"1 µCy");
229 }
230 
231 /// Exception thrown when parsing encounters an unexpected token.
232 class ParsingException : Exception
233 {
234     pure @safe nothrow
235     this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
236     {
237         super(msg, file, line, next);
238     }
239 
240     pure @safe nothrow
241     this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__)
242     {
243         super(msg, file, line, next);
244     }
245 }
246 
247 private:
248 
249 QVariant!N parseQuantityImpl(N)(string input, SymbolList!N symbolList, NumberParser!N parseFun)
250 {
251     N value;
252     try
253         value = parseFun(input);
254     catch (Exception)
255         value = 1;
256 
257     if (input.empty)
258         return QVariant!N.make(value, Dimensions.init);
259 
260     auto tokens = lex(input);
261     auto parser = QuantityParser!N(tokens, symbolList);
262 
263     return value * parser.parseCompoundUnit();
264 }
265 
266 pure @safe unittest // Test parsing
267 {
268     auto meter = unit!(double, "L");
269     auto kilogram = unit!(double, "M");
270     auto second = unit!(double, "T");
271     auto one = meter / meter;
272     auto unknown = one;
273 
274     auto siSL = SymbolList!double()
275         .addUnit("m", meter)
276         .addUnit("kg", kilogram)
277         .addUnit("s", second)
278         .addPrefix("c", 0.01L)
279         .addPrefix("m", 0.001L);
280 
281     bool checkParse(Q)(string input, Q quantity)
282     {
283         return parseQuantityImpl!double(input, siSL, &std.conv.parse!(double, string))
284             == quantity.qVariant;
285     }
286 
287     assert(checkParse("1    m    ", meter));
288     assert(checkParse("1m", meter));
289     assert(checkParse("1 mm", 0.001 * meter));
290     assert(checkParse("1 m^-1", 1 / meter));
291     assert(checkParse("1 m²", meter * meter));
292     assert(checkParse("1 m⁺²", meter * meter));
293     assert(checkParse("1 m⁻¹", 1 / meter));
294     assert(checkParse("1 (m)", meter));
295     assert(checkParse("1 (m^-1)", 1 / meter));
296     assert(checkParse("1 ((m)^-1)^-1", meter));
297     assert(checkParse("1 (s/(s/m))", meter));
298     assert(checkParse("1 m*m", meter * meter));
299     assert(checkParse("1 m m", meter * meter));
300     assert(checkParse("1 m.m", meter * meter));
301     assert(checkParse("1 m⋅m", meter * meter));
302     assert(checkParse("1 m×m", meter * meter));
303     assert(checkParse("1 m/m", meter / meter));
304     assert(checkParse("1 m÷m", meter / meter));
305     assert(checkParse("1 m.s", second * meter));
306     assert(checkParse("1 m s", second * meter));
307     assert(checkParse("1 m²s", meter * meter * second));
308     assert(checkParse("1 m*m/m", meter));
309     assert(checkParse("0.8 m⁰", 0.8 * one));
310     assert(checkParse("0.8", 0.8 * one));
311     assert(checkParse("0.8 ", 0.8 * one));
312 
313     assertThrown!ParsingException(checkParse("1 c m", unknown));
314     assertThrown!ParsingException(checkParse("1 c", unknown));
315     assertThrown!ParsingException(checkParse("1 Qm", unknown));
316     assertThrown!ParsingException(checkParse("1 m + m", unknown));
317     assertThrown!ParsingException(checkParse("1 m/", unknown));
318     assertThrown!ParsingException(checkParse("1 m^", unknown));
319     assertThrown!ParsingException(checkParse("1 m^m", unknown));
320     assertThrown!ParsingException(checkParse("1 m ) m", unknown));
321     assertThrown!ParsingException(checkParse("1 m * m) m", unknown));
322     assertThrown!ParsingException(checkParse("1 m^²", unknown));
323     assertThrown!ParsingException(checkParse("1-⁺⁵", unknown));
324 }
325 
326 // A parser that can parse a text for a unit or a quantity
327 struct QuantityParser(N)
328 {
329     private 
330     {
331         Token[] tokens;
332         SymbolList!N symbolList;
333     }
334 
335     QVariant!N parseCompoundUnit(bool inParens = false) pure @safe
336     {
337         QVariant!N ret = parseExponentUnit();
338         if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
339             return ret;
340 
341         do {
342             tokens.check();
343             auto cur = tokens.front;
344 
345             bool multiply = true;
346             if (cur.type == Tok.div)
347                 multiply = false;
348 
349             if (cur.type == Tok.mul || cur.type == Tok.div)
350             {
351                 tokens.advance();
352                 tokens.check();
353                 cur = tokens.front;
354             }
355 
356             QVariant!N rhs = parseExponentUnit();
357             if (multiply)
358                 ret *= rhs;
359             else
360                 ret /= rhs;
361 
362             if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
363                 break;
364 
365             cur = tokens.front;
366         }
367         while (!tokens.empty);
368 
369         return ret;
370     }
371 
372     QVariant!N parseExponentUnit() pure @safe
373     {
374         QVariant!N ret = parseUnit();
375 
376         if (tokens.empty)
377             return ret;
378 
379         auto next = tokens.front;
380         if (next.type != Tok.exp && next.type != Tok.supinteger)
381             return ret;
382 
383         if (next.type == Tok.exp)
384             tokens.advance(Tok.integer);
385 
386         int n = parseInteger();
387 
388         // Cannot use ret ^^ n because of CTFE limitation
389         static if (__traits(compiles, std.math.pow(ret.value, n)))
390             ret._value = std.math.pow(ret.value, n);
391         else
392             foreach (i; 1 .. n)
393                 ret._value *= ret._value;
394         ret.dimensions = ret.dimensions.pow(n);
395         return ret;
396     }
397 
398     int parseInteger() pure @safe
399     {
400         tokens.check(Tok.integer, Tok.supinteger);
401         int n = tokens.front.integer;
402         if (tokens.length)
403             tokens.advance();
404         return n;
405     }
406 
407     QVariant!N parseUnit() pure @safe
408     {
409         if (!tokens.length)
410             return QVariant!N.make(1, Dimensions.init);
411 
412         QVariant!N ret;        
413         if (tokens.front.type == Tok.lparen)
414         {
415             tokens.advance();
416             ret = parseCompoundUnit(true);
417             tokens.check(Tok.rparen);
418             tokens.advance();
419         }
420         else
421             ret = parsePrefixUnit();
422 
423         return ret;
424     }
425 
426     QVariant!N parsePrefixUnit() pure @safe
427     {
428         tokens.check(Tok.symbol);
429         auto str = tokens.front.slice;
430         if (tokens.length)
431             tokens.advance();
432 
433         // Try a standalone unit symbol (no prefix)
434         auto uptr = str in symbolList.units;
435         if (uptr)
436             return *uptr;
437 
438         // Try with prefixes, the longest prefix first
439         N* factor;
440         for (size_t i = symbolList.maxPrefixLength; i > 0; i--)
441         {
442             if (str.length >= i)
443             {
444                 string prefix = str[0 .. i].to!string;
445                 factor = prefix in symbolList.prefixes;
446                 if (factor)
447                 {
448                     string unit = str[i .. $].to!string;
449                     enforceEx!ParsingException(unit.length, "Expecting a unit after the prefix " ~ prefix);
450                     uptr = unit in symbolList.units;
451                     if (uptr)
452                         return *factor * *uptr;
453                 }
454             }
455         }
456 
457         throw new ParsingException("Unknown unit symbol: '%s'".format(str));
458     }
459 }
460 
461 enum Tok
462 {
463     none,
464     symbol,
465     mul,
466     div,
467     exp,
468     integer,
469     supinteger,
470     rparen,
471     lparen
472 }
473 
474 struct Token
475 {
476     Tok type;
477     string slice;
478     int integer = int.max;
479 }
480 
481 Token[] lex(string input) pure @safe
482 {
483     enum State
484     {
485         none,
486         symbol,
487         integer,
488         supinteger
489     }
490 
491     Token[] tokens;
492     auto tokapp = appender(tokens);
493 
494     auto original = input;
495     size_t i, j;
496     State state = State.none;
497 
498     void pushToken(Tok type)
499     {
500         tokapp.put(Token(type, original[i .. j]));
501         i = j;
502         state = State.none;
503     }
504 
505     void pushInteger(Tok type)
506     {
507         auto slice = original[i .. j];
508 
509         if (type == Tok.supinteger)
510         {
511             auto a = appender!string;
512             foreach (dchar c; slice)
513             {
514                 switch (c)
515                 {
516                     case '⁰': a.put('0'); break;
517                     case '¹': a.put('1'); break;
518                     case '²': a.put('2'); break;
519                     case '³': a.put('3'); break;
520                     case '⁴': a.put('4'); break;
521                     case '⁵': a.put('5'); break;
522                     case '⁶': a.put('6'); break;
523                     case '⁷': a.put('7'); break;
524                     case '⁸': a.put('8'); break;
525                     case '⁹': a.put('9'); break;
526                     case '⁺': a.put('+'); break;
527                     case '⁻': a.put('-'); break;
528                     default: assert(false, "Error in pushInteger()");
529                 }
530             }
531             slice = a.data;
532         }
533 
534         int n;
535         try
536         {
537             n = std.conv.parse!int(slice);
538             enforce(slice.empty);
539         }
540         catch (Exception)
541             throw new ParsingException("Unexpected integer format: " ~ original[i .. j]);
542             
543         tokapp.put(Token(type, original[i .. j], n));
544         i = j;
545         state = State.none;
546     }
547 
548     void push()
549     {
550         if (state == State.symbol)
551             pushToken(Tok.symbol);
552         else if (state == State.integer)
553             pushInteger(Tok.integer);
554         else if (state == State.supinteger)
555             pushInteger(Tok.supinteger);
556     }
557 
558     foreach (dchar cur; input)
559     {
560         auto len = cur.codeLength!char;
561         switch (cur)
562         {
563             // Whitespace
564             case ' ':
565             case '\t':
566             case '\u00A0':
567             case '\u2000': .. case '\u200A':
568             case '\u202F':
569             case '\u205F':
570                 push();
571                 j += len;
572                 i = j;
573                 break;
574 
575             case '(':
576                 push();
577                 j += len;
578                 pushToken(Tok.lparen);
579                 break;
580 
581             case ')':
582                 push();
583                 j += len;
584                 pushToken(Tok.rparen);
585                 break;
586 
587             case '*':
588             case '.':
589             case '⋅':
590             case '×':
591                 push();
592                 j += len;
593                 pushToken(Tok.mul);
594                 break;
595 
596             case '/':
597             case '÷':
598                 push();
599                 j += len;
600                 pushToken(Tok.div);
601                 break;
602 
603             case '^':
604                 push();
605                 j += len;
606                 pushToken(Tok.exp);
607                 break;
608 
609             case '0': .. case '9':
610             case '-':
611             case '+':
612                 if (state != State.integer)
613                     push();
614                 state = State.integer;
615                 j += len;
616                 break;
617 
618             case '⁰':
619             case '¹':
620             case '²':
621             case '³':
622             case '⁴':
623             case '⁵':
624             case '⁶':
625             case '⁷':
626             case '⁸':
627             case '⁹':
628             case '⁻':
629             case '⁺':
630                 if (state != State.supinteger)
631                     push();
632                 state = State.supinteger;
633                 j += len;
634                 break;
635 
636             default:
637                 if (state == State.integer || state == State.supinteger)
638                     push();
639                 state = State.symbol;
640                 j += len;
641                 break;
642         }
643     }
644     push();
645 
646     return tokapp.data;
647 }
648 
649 void advance(Types...)(ref Token[] tokens, Types types)
650 {
651     enforceEx!ParsingException(!tokens.empty, "Unexpected end of input");
652     tokens.popFront();
653 
654     static if (Types.length)
655         check(tokens, types);
656 }
657 
658 void check(Token[] tokens) pure @safe
659 {
660     enforceEx!ParsingException(tokens.length, "Unexpected end of input");
661 }
662 
663 void check(Token[] tokens, Tok tok) pure @safe
664 {
665     tokens.check();
666     enforceEx!ParsingException(tokens[0].type == tok,
667         format("Found '%s' while expecting %s", 
668             tokens[0].slice, tok));
669 }
670 
671 void check(Token[] tokens, Tok tok1, Tok tok2) pure @safe
672 {
673     tokens.check();
674     enforceEx!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2,
675         format("Found '%s' while expecting %s or %s", 
676             tokens[0].slice, tok1, tok2));
677 }