1 /++
2 This module defines functions to parse units and quantities. The text
3 input is parsed according to the following grammar. For example:
4 $(DL
5 $(DT Prefixes and unit symbols must be joined:)
6     $(DD "1 mm" = 1 millimeter)
7     $(DD "1 m m" = 1 square meter)
8 $(BR)
9 $(DT Standalone units are preferred over prefixed ones:)
10     $(DD "1 cd" = 1 candela, not 1 centiday)
11 $(BR)
12 $(DT Powers of units:)
13     $(DD "1 m^2")
14     $(DD "1 m²" $(I (superscript integer)))
15 $(BR)
16 $(DT Multiplication of to units:)
17     $(DD "1 N m" $(I (whitespace)))
18     $(DD "1 N . m")
19     $(DD "1 N ⋅ m" $(I (centered dot)))
20     $(DD "1 N * m")
21     $(DD "1 N × m" $(I (times sign)))
22 $(BR)
23 $(DT Division of to units:)
24     $(DD "1 mol / s")
25     $(DD "1 mol ÷ s")
26 $(BR)
27 $(DT Grouping of units with parentheses:)
28     $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²)
29 )
30 
31 Grammar: (whitespace not significant)
32 $(DL
33 $(DT Quantity:)
34     $(DD Units)
35     $(DD Number Units)
36 $(BR)
37 $(DT Number:)
38     $(DD $(I Numeric value parsed by std.conv.parse!double))
39 $(BR)
40 $(DT Units:)
41     $(DD Unit)
42     $(DD Unit Units)
43     $(DD Unit Operator Units)
44 $(BR)
45 $(DT Operator:)
46     $(DD $(B *))
47     $(DD $(B .))
48     $(DD $(B ⋅))
49     $(DD $(B ×))
50     $(DD $(B /))
51     $(DD $(B ÷))
52 $(BR)
53 $(DT Unit:)
54     $(DD Base)
55     $(DD Base $(B ^) Integer)
56     $(DD Base SupInteger)
57 $(BR)
58 $(DT Base:)
59     $(DD Symbol)
60     $(DD Prefix Symbol)
61     $(DD $(B $(LPAREN)) Units $(B $(RPAREN)))
62 $(BR)
63 $(DT Symbol:)
64     $(DD $(I The symbol of a valid unit))
65 $(BR)
66 $(DT Prefix:)
67     $(DD $(I The symbol of a valid prefix))
68 $(BR)
69 $(DT Integer:)
70     $(DD $(I Integer value parsed by std.conv.parse!int))
71 $(BR)
72 $(DT SupInteger:)
73     $(DD $(I Superscript version of Integer))
74 )
75 
76 Copyright: Copyright 2013-2015, Nicolas Sicard
77 Authors: Nicolas Sicard
78 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
79 Source: $(LINK https://github.com/biozic/quantities)
80 +/
81 module quantities.parsing;
82 
83 import quantities.base;
84 import quantities.qvariant;
85 
86 import std.array;
87 import std.algorithm;
88 import std.conv;
89 import std.exception;
90 import std.math;
91 import std.range;
92 import std.string;
93 import std.traits;
94 import std.typetuple;
95 import std.utf;
96 
97 
98 /// Exception thrown when operating on two units that are not interconvertible.
99 class DimensionException : Exception
100 {
101     pure @safe nothrow
102     this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
103     {
104         super(msg, file, line, next);
105     }
106     
107     pure @safe nothrow
108     this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__)
109     {
110         super(msg, file, line, next);
111     }
112 }
113 
114 /++
115 Contains the symbols of the units and the prefixes that a parser can handle.
116 +/
117 struct SymbolList(N)
118 {
119     static assert(isNumeric!N, "Incompatible type: " ~ N.stringof);
120 
121     package
122     {
123         QVariant!N[string] units;
124         N[string] prefixes;
125         size_t maxPrefixLength;
126     }
127 
128     /// Adds (or replaces) a unit in the list
129     auto addUnit(Q)(string symbol, Q unit)
130         if (isQVariant!Q)
131     {
132         units[symbol] = unit;
133         return this;
134     }
135     /// ditto
136     auto addUnit(Q)(string symbol, Q unit)
137         if (isQuantity!Q)
138     {
139         return addUnit(symbol, unit.qVariant);
140     }
141 
142     /// Adds (or replaces) a prefix in the list
143     auto addPrefix(N)(string symbol, N factor)
144         if (isNumeric!N)
145     {
146         prefixes[symbol] = factor;
147         if (symbol.length > maxPrefixLength)
148             maxPrefixLength = symbol.length;
149         return this;
150     }
151 }
152 
153 /// Type of a function that can parse a string for a numeric value of type N.
154 alias NumberParser(N) = N function(ref string s) pure @safe;
155 
156 /// A quantity parser
157 struct Parser(N)
158 {
159     SymbolList!N symbolList; /// A list of registered symbols for units and prefixes.
160     NumberParser!N numberParser; /// A function that can parse a string for a numeric value of type N.
161 
162     /++
163     Parses a QVariant from a string.
164     +/
165     QVariant!N parseVariant(string str)
166     {
167         return parseQuantityImpl!N(str, symbolList, numberParser);
168     }
169 
170     /++
171     Parses a quantity of a known type Q from a string.
172     +/
173     Q parse(Q)(string str)
174         if (isQuantity!Q)
175     {
176         static assert(is(N : Q.valueType), "Incompatible value type: " ~ Q.valueType.stringof);
177         
178         auto q = parseQuantityImpl!(Q.valueType)(str, symbolList, numberParser);
179         enforceEx!DimensionException(equals(Q.dimensions, q.dimensions),
180             "Dimension error: [%s] is not compatible with [%s]".format(
181                 quantities.base.toString(Q.dimensions), quantities.base.toString(q.dimensions)));
182         return Q.make(q.rawValue);
183     }
184 }
185 ///
186 pure @safe unittest
187 {
188     // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement
189 
190     auto century = unit!(real, "century");
191     alias LectureLength = typeof(century);
192     
193     auto symbolList = SymbolList!real()
194         .addUnit("Cy", century)
195         .addPrefix("µ", 1e-6L);
196 
197     import std.conv;
198     auto parser = Parser!real(symbolList, &std.conv.parse!(real, string));
199 
200     auto timing = 1e-6L * century;
201     assert(timing == parser.parse!LectureLength("1 µCy"));
202     assert(timing == parser.parseVariant("1 µCy"));
203 }
204 
205 /// Creates a compile-time parser that parses a string for a quantity and
206 /// automatically deduces the quantity type.
207 template compileTimeParser(N, alias symbolList, alias numberParser)
208 {
209     template compileTimeParser(string str)
210     {
211         enum q = parseQuantityImpl!N(str, symbolList, &numberParser); 
212         enum compileTimeParser = Quantity!(N, cast(Dimensions) q.dimensions).make(q.rawValue);
213     }
214 }
215 ///
216 pure @safe unittest
217 {
218     enum century = unit!(real, "century");
219     alias LectureLength = typeof(century);
220     
221     enum symbolList = SymbolList!real()
222         .addUnit("Cy", century)
223         .addPrefix("µ", 1e-6L);
224     
225     alias ctParser = compileTimeParser!(real, symbolList, std.conv.parse!(real, string));
226     enum timing = 1e-6L * century;
227     static assert(timing == ctParser!"1 µCy");
228 }
229 
230 /// Exception thrown when parsing encounters an unexpected token.
231 class ParsingException : Exception
232 {
233     pure @safe nothrow
234     this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
235     {
236         super(msg, file, line, next);
237     }
238 
239     pure @safe nothrow
240     this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__)
241     {
242         super(msg, file, line, next);
243     }
244 }
245 
246 private:
247 
248 QVariant!N parseQuantityImpl(N)(string input, SymbolList!N symbolList, NumberParser!N parseFun)
249 {
250     N value;
251     try
252         value = parseFun(input);
253     catch (Exception)
254         value = 1;
255 
256     if (input.empty)
257         return QVariant!N.make(value, null);
258 
259     auto tokens = lex(input);
260     auto parser = QuantityParser!N(tokens, symbolList);
261 
262     return value * parser.parseCompoundUnit();
263 }
264 
265 pure @safe unittest // Test parsing
266 {
267     auto meter = unit!(double, "L");
268     auto kilogram = unit!(double, "M");
269     auto second = unit!(double, "T");
270     auto one = meter / meter;
271     auto unknown = one;
272 
273     auto siSL = SymbolList!double()
274         .addUnit("m", meter)
275         .addUnit("kg", kilogram)
276         .addUnit("s", second)
277         .addPrefix("c", 0.01L)
278         .addPrefix("m", 0.001L);
279 
280     bool checkParse(Q)(string input, Q quantity)
281     {
282         return parseQuantityImpl!double(input, siSL, &std.conv.parse!(double, string))
283             == quantity.qVariant;
284     }
285 
286     assert(checkParse("1    m    ", meter));
287     assert(checkParse("1 mm", 0.001 * meter));
288     assert(checkParse("1 m^-1", 1 / meter));
289     assert(checkParse("1 m²", meter * meter));
290     assert(checkParse("1 m⁺²", meter * meter));
291     assert(checkParse("1 m⁻¹", 1 / meter));
292     assert(checkParse("1 (m)", meter));
293     assert(checkParse("1 (m^-1)", 1 / meter));
294     assert(checkParse("1 ((m)^-1)^-1", meter));
295     assert(checkParse("1 (s/(s/m))", meter));
296     assert(checkParse("1 m*m", meter * meter));
297     assert(checkParse("1 m m", meter * meter));
298     assert(checkParse("1 m.m", meter * meter));
299     assert(checkParse("1 m⋅m", meter * meter));
300     assert(checkParse("1 m×m", meter * meter));
301     assert(checkParse("1 m/m", meter / meter));
302     assert(checkParse("1 m÷m", meter / meter));
303     assert(checkParse("1 m.s", second * meter));
304     assert(checkParse("1 m s", second * meter));
305     assert(checkParse("1 m²s", meter * meter * second));
306     assert(checkParse("1 m*m/m", meter));
307     assert(checkParse("0.8 m⁰", 0.8 * one));
308     assert(checkParse("0.8", 0.8 * one));
309 
310     assertThrown!ParsingException(checkParse("1 c m", unknown));
311     assertThrown!ParsingException(checkParse("1 c", unknown));
312     assertThrown!ParsingException(checkParse("1 Qm", unknown));
313     assertThrown!ParsingException(checkParse("1 m + m", unknown));
314     assertThrown!ParsingException(checkParse("1 m/", unknown));
315     assertThrown!ParsingException(checkParse("1 m^", unknown));
316     assertThrown!ParsingException(checkParse("1 m^m", unknown));
317     assertThrown!ParsingException(checkParse("1 m ) m", unknown));
318     assertThrown!ParsingException(checkParse("1 m * m) m", unknown));
319     assertThrown!ParsingException(checkParse("1 m^²", unknown));
320     assertThrown!ParsingException(checkParse("1-⁺⁵", unknown));
321 }
322 
323 // A parser that can parse a text for a unit or a quantity
324 struct QuantityParser(N)
325 {
326     private 
327     {
328         Token[] tokens;
329         SymbolList!N symbolList;
330     }
331 
332     QVariant!N parseCompoundUnit(bool inParens = false) pure @safe
333     {
334         QVariant!N ret = parseExponentUnit();
335         if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
336             return ret;
337 
338         do {
339             tokens.check();
340             auto cur = tokens.front;
341 
342             bool multiply = true;
343             if (cur.type == Tok.div)
344                 multiply = false;
345 
346             if (cur.type == Tok.mul || cur.type == Tok.div)
347             {
348                 tokens.advance();
349                 tokens.check();
350                 cur = tokens.front;
351             }
352 
353             QVariant!N rhs = parseExponentUnit();
354             if (multiply)
355                 ret *= rhs;
356             else
357                 ret /= rhs;
358 
359             if (tokens.empty || (inParens && tokens.front.type == Tok.rparen))
360                 break;
361 
362             cur = tokens.front;
363         }
364         while (!tokens.empty);
365 
366         return ret;
367     }
368 
369     QVariant!N parseExponentUnit() pure @safe
370     {
371         QVariant!N ret = parseUnit();
372 
373         if (tokens.empty)
374             return ret;
375 
376         auto next = tokens.front;
377         if (next.type != Tok.exp && next.type != Tok.supinteger)
378             return ret;
379 
380         if (next.type == Tok.exp)
381             tokens.advance(Tok.integer);
382 
383         int n = parseInteger();
384 
385         // Cannot use ret ^^ n because of CTFE limitation
386         static if (__traits(compiles, std.math.pow(ret.value, n)))
387             ret._value = std.math.pow(ret.value, n);
388         else
389             foreach (i; 1 .. n)
390                 ret._value *= ret._value;
391         ret.dimensions = ret.dimensions.pow(n);
392         return ret;
393     }
394 
395     int parseInteger() pure @safe
396     {
397         tokens.check(Tok.integer, Tok.supinteger);
398         int n = tokens.front.integer;
399         if (tokens.length)
400             tokens.advance();
401         return n;
402     }
403 
404     QVariant!N parseUnit() pure @safe
405     {
406         QVariant!N ret;
407 
408         if (tokens.front.type == Tok.lparen)
409         {
410             tokens.advance();
411             ret = parseCompoundUnit(true);
412             tokens.check(Tok.rparen);
413             tokens.advance();
414         }
415         else
416             ret = parsePrefixUnit();
417 
418         return ret;
419     }
420 
421     QVariant!N parsePrefixUnit() pure @safe
422     {
423         tokens.check(Tok.symbol);
424         auto str = tokens.front.slice;
425         if (tokens.length)
426             tokens.advance();
427 
428         // Try a standalone unit symbol (no prefix)
429         auto uptr = str in symbolList.units;
430         if (uptr)
431             return *uptr;
432 
433         // Try with prefixes, the longest prefix first
434         N* factor;
435         for (size_t i = symbolList.maxPrefixLength; i > 0; i--)
436         {
437             if (str.length >= i)
438             {
439                 string prefix = str[0 .. i].to!string;
440                 factor = prefix in symbolList.prefixes;
441                 if (factor)
442                 {
443                     string unit = str[i .. $].to!string;
444                     enforceEx!ParsingException(unit.length, "Expecting a unit after the prefix " ~ prefix);
445                     uptr = unit in symbolList.units;
446                     if (uptr)
447                         return *factor * *uptr;
448                 }
449             }
450         }
451 
452         throw new ParsingException("Unknown unit symbol: '%s'".format(str));
453     }
454 }
455 
456 enum Tok
457 {
458     none,
459     symbol,
460     mul,
461     div,
462     exp,
463     integer,
464     supinteger,
465     rparen,
466     lparen
467 }
468 
469 struct Token
470 {
471     Tok type;
472     string slice;
473     int integer = int.max;
474 }
475 
476 Token[] lex(string input) pure @safe
477 {
478     enum State
479     {
480         none,
481         symbol,
482         integer,
483         supinteger
484     }
485 
486     Token[] tokens;
487     auto tokapp = appender(tokens);
488 
489     auto original = input;
490     size_t i, j;
491     State state = State.none;
492 
493     void pushToken(Tok type)
494     {
495         tokapp.put(Token(type, original[i .. j]));
496         i = j;
497         state = State.none;
498     }
499 
500     void pushInteger(Tok type)
501     {
502         auto slice = original[i .. j];
503 
504         if (type == Tok.supinteger)
505         {
506             auto a = appender!string;
507             foreach (dchar c; slice)
508             {
509                 switch (c)
510                 {
511                     case '⁰': a.put('0'); break;
512                     case '¹': a.put('1'); break;
513                     case '²': a.put('2'); break;
514                     case '³': a.put('3'); break;
515                     case '⁴': a.put('4'); break;
516                     case '⁵': a.put('5'); break;
517                     case '⁶': a.put('6'); break;
518                     case '⁷': a.put('7'); break;
519                     case '⁸': a.put('8'); break;
520                     case '⁹': a.put('9'); break;
521                     case '⁺': a.put('+'); break;
522                     case '⁻': a.put('-'); break;
523                     default: assert(false, "Error in pushInteger()");
524                 }
525             }
526             slice = a.data;
527         }
528 
529         int n;
530         try
531             n = std.conv.parse!int(slice);
532         catch (Exception)
533             throw new ParsingException("Unexpected integer format: " ~ original[i .. j]);
534 
535         enforceEx!ParsingException(slice.empty, "Unexpected integer format: " ~ slice);
536 
537         tokapp.put(Token(type, original[i .. j], n));
538         i = j;
539         state = State.none;
540     }
541 
542     void push()
543     {
544         if (state == State.symbol)
545             pushToken(Tok.symbol);
546         else if (state == State.integer)
547             pushInteger(Tok.integer);
548         else if (state == State.supinteger)
549             pushInteger(Tok.supinteger);
550     }
551 
552     while (!input.empty)
553     {
554         auto cur = input.front;
555         auto len = cur.codeLength!char;
556         switch (cur)
557         {
558             // Whitespace
559             case ' ':
560             case '\t':
561             case '\u00A0':
562             case '\u2000': .. case '\u200A':
563             case '\u202F':
564             case '\u205F':
565                 push();
566                 j += len;
567                 i = j;
568                 break;
569 
570             case '(':
571                 push();
572                 j += len;
573                 pushToken(Tok.lparen);
574                 break;
575 
576             case ')':
577                 push();
578                 j += len;
579                 pushToken(Tok.rparen);
580                 break;
581 
582             case '*':
583             case '.':
584             case '⋅':
585             case '×':
586                 push();
587                 j += len;
588                 pushToken(Tok.mul);
589                 break;
590 
591             case '/':
592             case '÷':
593                 push();
594                 j += len;
595                 pushToken(Tok.div);
596                 break;
597 
598             case '^':
599                 push();
600                 j += len;
601                 pushToken(Tok.exp);
602                 break;
603 
604             case '0': .. case '9':
605             case '-':
606             case '+':
607                 if (state != State.integer)
608                     push();
609                 state = State.integer;
610                 j += len;
611                 break;
612 
613             case '⁰':
614             case '¹':
615             case '²':
616             case '³':
617             case '⁴':
618             case '⁵':
619             case '⁶':
620             case '⁷':
621             case '⁸':
622             case '⁹':
623             case '⁻':
624             case '⁺':
625                 if (state != State.supinteger)
626                     push();
627                 state = State.supinteger;
628                 j += len;
629                 break;
630 
631             default:
632                 if (state == State.integer || state == State.supinteger)
633                     push();
634                 state = State.symbol;
635                 j += len;
636                 break;
637         }
638         input.popFront();
639     }
640     push();
641 
642     return tokapp.data;
643 }
644 
645 void advance(Types...)(ref Token[] tokens, Types types)
646 {
647     enforceEx!ParsingException(!tokens.empty, "Unexpected end of input");
648     tokens.popFront();
649 
650     static if (Types.length)
651         check(tokens, types);
652 }
653 
654 void check(Types...)(Token[] tokens, Types types)
655 {
656     enforceEx!ParsingException(!tokens.empty, "Unexpected end of input");
657     auto token = tokens.front;
658 
659     static if (Types.length)
660     {
661         bool ok = false;
662         Tok[] valid = [types];
663         foreach (type; types)
664         {
665             if (token.type == type)
666             {
667                 ok = true;
668                 break;
669             }
670         }
671         import std.string : format;
672         enforceEx!ParsingException(ok, format("Found '%s' while expecting %s", 
673             token.slice, valid.front));
674     }
675 }