1 /++ 2 This module defines functions to parse units and quantities. The text 3 input is parsed according to the following grammar. For example: 4 $(DL 5 $(DT Prefixes and unit symbols must be joined:) 6 $(DD "1 mm" = 1 millimeter) 7 $(DD "1 m m" = 1 square meter) 8 $(BR) 9 $(DT Standalone units are preferred over prefixed ones:) 10 $(DD "1 cd" = 1 candela, not 1 centiday) 11 $(BR) 12 $(DT Powers of units:) 13 $(DD "1 m^2") 14 $(DD "1 m²" $(I (superscript integer))) 15 $(BR) 16 $(DT Multiplication of to units:) 17 $(DD "1 N m" $(I (whitespace))) 18 $(DD "1 N . m") 19 $(DD "1 N ⋅ m" $(I (centered dot))) 20 $(DD "1 N * m") 21 $(DD "1 N × m" $(I (times sign))) 22 $(BR) 23 $(DT Division of to units:) 24 $(DD "1 mol / s") 25 $(DD "1 mol ÷ s") 26 $(BR) 27 $(DT Grouping of units with parentheses:) 28 $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²) 29 ) 30 31 Grammar: (whitespace not significant) 32 $(DL 33 $(DT Quantity:) 34 $(DD Units) 35 $(DD Number Units) 36 $(BR) 37 $(DT Number:) 38 $(DD $(I Numeric value parsed by std.conv.parse!double)) 39 $(BR) 40 $(DT Units:) 41 $(DD Unit) 42 $(DD Unit Units) 43 $(DD Unit Operator Units) 44 $(BR) 45 $(DT Operator:) 46 $(DD $(B *)) 47 $(DD $(B .)) 48 $(DD $(B ⋅)) 49 $(DD $(B ×)) 50 $(DD $(B /)) 51 $(DD $(B ÷)) 52 $(BR) 53 $(DT Unit:) 54 $(DD Base) 55 $(DD Base $(B ^) Integer) 56 $(DD Base SupInteger) 57 $(BR) 58 $(DT Base:) 59 $(DD Symbol) 60 $(DD Prefix Symbol) 61 $(DD $(B $(LPAREN)) Units $(B $(RPAREN))) 62 $(BR) 63 $(DT Symbol:) 64 $(DD $(I The symbol of a valid unit)) 65 $(BR) 66 $(DT Prefix:) 67 $(DD $(I The symbol of a valid prefix)) 68 $(BR) 69 $(DT Integer:) 70 $(DD $(I Integer value parsed by std.conv.parse!int)) 71 $(BR) 72 $(DT SupInteger:) 73 $(DD $(I Superscript version of Integer)) 74 ) 75 76 Copyright: Copyright 2013-2015, Nicolas Sicard 77 Authors: Nicolas Sicard 78 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 79 Source: $(LINK https://github.com/biozic/quantities) 80 +/ 81 module quantities.parsing; 82 83 import quantities.internal.dimensions; 84 import quantities.base; 85 import quantities.qvariant; 86 87 import std.array; 88 import std.algorithm; 89 import std.conv; 90 import std.exception; 91 import std.math; 92 import std.range; 93 import std.string; 94 import std.traits; 95 import std.typetuple; 96 import std.utf; 97 98 99 /// Exception thrown when operating on two units that are not interconvertible. 100 class DimensionException : Exception 101 { 102 pure @safe nothrow 103 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 104 { 105 super(msg, file, line, next); 106 } 107 108 pure @safe nothrow 109 this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 110 { 111 super(msg, file, line, next); 112 } 113 } 114 115 /++ 116 Contains the symbols of the units and the prefixes that a parser can handle. 117 +/ 118 struct SymbolList(N) 119 { 120 static assert(isNumeric!N, "Incompatible type: " ~ N.stringof); 121 122 package 123 { 124 QVariant!N[string] units; 125 N[string] prefixes; 126 size_t maxPrefixLength; 127 } 128 129 /// Adds (or replaces) a unit in the list 130 auto addUnit(Q)(string symbol, Q unit) 131 if (isQVariant!Q) 132 { 133 units[symbol] = unit; 134 return this; 135 } 136 /// ditto 137 auto addUnit(Q)(string symbol, Q unit) 138 if (isQuantity!Q) 139 { 140 return addUnit(symbol, unit.qVariant); 141 } 142 143 /// Adds (or replaces) a prefix in the list 144 auto addPrefix(N)(string symbol, N factor) 145 if (isNumeric!N) 146 { 147 prefixes[symbol] = factor; 148 if (symbol.length > maxPrefixLength) 149 maxPrefixLength = symbol.length; 150 return this; 151 } 152 } 153 154 /// Type of a function that can parse a string for a numeric value of type N. 155 alias NumberParser(N) = N function(ref string s) pure @safe; 156 157 /// A quantity parser 158 struct Parser(N) 159 { 160 SymbolList!N symbolList; /// A list of registered symbols for units and prefixes. 161 NumberParser!N numberParser; /// A function that can parse a string for a numeric value of type N. 162 163 /++ 164 Parses a QVariant from a string. 165 +/ 166 QVariant!N parseVariant(string str) 167 { 168 return parseQuantityImpl!N(str, symbolList, numberParser); 169 } 170 171 /++ 172 Parses a quantity of a known type Q from a string. 173 +/ 174 Q parse(Q)(string str) 175 if (isQuantity!Q) 176 { 177 static assert(is(N : Q.valueType), "Incompatible value type: " ~ Q.valueType.stringof); 178 179 auto q = parseQuantityImpl!(Q.valueType)(str, symbolList, numberParser); 180 enforceEx!DimensionException(Q.dimensions == q.dimensions, 181 "Dimension error: [%s] is not compatible with [%s]".format( 182 Q.dimensions.toString, q.dimensions.toString)); 183 return Q.make(q.rawValue); 184 } 185 } 186 /// 187 pure @safe unittest 188 { 189 // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement 190 191 auto century = unit!(real, "century"); 192 alias LectureLength = typeof(century); 193 194 auto symbolList = SymbolList!real() 195 .addUnit("Cy", century) 196 .addPrefix("µ", 1e-6L); 197 198 import std.conv; 199 auto parser = Parser!real(symbolList, &std.conv.parse!(real, string)); 200 201 auto timing = 1e-6L * century; 202 assert(timing == parser.parse!LectureLength("1 µCy")); 203 assert(timing == parser.parseVariant("1 µCy")); 204 } 205 206 /// Creates a compile-time parser that parses a string for a quantity and 207 /// automatically deduces the quantity type. 208 template compileTimeParser(N, alias symbolList, alias numberParser) 209 { 210 template compileTimeParser(string str) 211 { 212 enum q = parseQuantityImpl!N(str, symbolList, &numberParser); 213 enum compileTimeParser = Quantity!(N, cast(Dimensions) q.dimensions).make(q.rawValue); 214 } 215 } 216 /// 217 pure @safe unittest 218 { 219 enum century = unit!(real, "century"); 220 alias LectureLength = typeof(century); 221 222 enum symbolList = SymbolList!real() 223 .addUnit("Cy", century) 224 .addPrefix("µ", 1e-6L); 225 226 alias ctParser = compileTimeParser!(real, symbolList, std.conv.parse!(real, string)); 227 enum timing = 1e-6L * century; 228 static assert(timing == ctParser!"1 µCy"); 229 } 230 231 /// Exception thrown when parsing encounters an unexpected token. 232 class ParsingException : Exception 233 { 234 pure @safe nothrow 235 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 236 { 237 super(msg, file, line, next); 238 } 239 240 pure @safe nothrow 241 this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 242 { 243 super(msg, file, line, next); 244 } 245 } 246 247 private: 248 249 QVariant!N parseQuantityImpl(N)(string input, SymbolList!N symbolList, NumberParser!N parseFun) 250 { 251 N value; 252 try 253 value = parseFun(input); 254 catch (Exception) 255 value = 1; 256 257 if (input.empty) 258 return QVariant!N.make(value, Dimensions.init); 259 260 auto tokens = lex(input); 261 auto parser = QuantityParser!N(tokens, symbolList); 262 263 return value * parser.parseCompoundUnit(); 264 } 265 266 pure @safe unittest // Test parsing 267 { 268 auto meter = unit!(double, "L"); 269 auto kilogram = unit!(double, "M"); 270 auto second = unit!(double, "T"); 271 auto one = meter / meter; 272 auto unknown = one; 273 274 auto siSL = SymbolList!double() 275 .addUnit("m", meter) 276 .addUnit("kg", kilogram) 277 .addUnit("s", second) 278 .addPrefix("c", 0.01L) 279 .addPrefix("m", 0.001L); 280 281 bool checkParse(Q)(string input, Q quantity) 282 { 283 return parseQuantityImpl!double(input, siSL, &std.conv.parse!(double, string)) 284 == quantity.qVariant; 285 } 286 287 assert(checkParse("1 m ", meter)); 288 assert(checkParse("1m", meter)); 289 assert(checkParse("1 mm", 0.001 * meter)); 290 assert(checkParse("1 m^-1", 1 / meter)); 291 assert(checkParse("1 m²", meter * meter)); 292 assert(checkParse("1 m⁺²", meter * meter)); 293 assert(checkParse("1 m⁻¹", 1 / meter)); 294 assert(checkParse("1 (m)", meter)); 295 assert(checkParse("1 (m^-1)", 1 / meter)); 296 assert(checkParse("1 ((m)^-1)^-1", meter)); 297 assert(checkParse("1 (s/(s/m))", meter)); 298 assert(checkParse("1 m*m", meter * meter)); 299 assert(checkParse("1 m m", meter * meter)); 300 assert(checkParse("1 m.m", meter * meter)); 301 assert(checkParse("1 m⋅m", meter * meter)); 302 assert(checkParse("1 m×m", meter * meter)); 303 assert(checkParse("1 m/m", meter / meter)); 304 assert(checkParse("1 m÷m", meter / meter)); 305 assert(checkParse("1 m.s", second * meter)); 306 assert(checkParse("1 m s", second * meter)); 307 assert(checkParse("1 m²s", meter * meter * second)); 308 assert(checkParse("1 m*m/m", meter)); 309 assert(checkParse("0.8 m⁰", 0.8 * one)); 310 assert(checkParse("0.8", 0.8 * one)); 311 assert(checkParse("0.8 ", 0.8 * one)); 312 313 assertThrown!ParsingException(checkParse("1 c m", unknown)); 314 assertThrown!ParsingException(checkParse("1 c", unknown)); 315 assertThrown!ParsingException(checkParse("1 Qm", unknown)); 316 assertThrown!ParsingException(checkParse("1 m + m", unknown)); 317 assertThrown!ParsingException(checkParse("1 m/", unknown)); 318 assertThrown!ParsingException(checkParse("1 m^", unknown)); 319 assertThrown!ParsingException(checkParse("1 m^m", unknown)); 320 assertThrown!ParsingException(checkParse("1 m ) m", unknown)); 321 assertThrown!ParsingException(checkParse("1 m * m) m", unknown)); 322 assertThrown!ParsingException(checkParse("1 m^²", unknown)); 323 assertThrown!ParsingException(checkParse("1-⁺⁵", unknown)); 324 } 325 326 // A parser that can parse a text for a unit or a quantity 327 struct QuantityParser(N) 328 { 329 private 330 { 331 Token[] tokens; 332 SymbolList!N symbolList; 333 } 334 335 QVariant!N parseCompoundUnit(bool inParens = false) pure @safe 336 { 337 QVariant!N ret = parseExponentUnit(); 338 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 339 return ret; 340 341 do { 342 tokens.check(); 343 auto cur = tokens.front; 344 345 bool multiply = true; 346 if (cur.type == Tok.div) 347 multiply = false; 348 349 if (cur.type == Tok.mul || cur.type == Tok.div) 350 { 351 tokens.advance(); 352 tokens.check(); 353 cur = tokens.front; 354 } 355 356 QVariant!N rhs = parseExponentUnit(); 357 if (multiply) 358 ret *= rhs; 359 else 360 ret /= rhs; 361 362 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 363 break; 364 365 cur = tokens.front; 366 } 367 while (!tokens.empty); 368 369 return ret; 370 } 371 372 QVariant!N parseExponentUnit() pure @safe 373 { 374 QVariant!N ret = parseUnit(); 375 376 if (tokens.empty) 377 return ret; 378 379 auto next = tokens.front; 380 if (next.type != Tok.exp && next.type != Tok.supinteger) 381 return ret; 382 383 if (next.type == Tok.exp) 384 tokens.advance(Tok.integer); 385 386 int n = parseInteger(); 387 388 // Cannot use ret ^^ n because of CTFE limitation 389 static if (__traits(compiles, std.math.pow(ret.value, n))) 390 ret._value = std.math.pow(ret.value, n); 391 else 392 foreach (i; 1 .. n) 393 ret._value *= ret._value; 394 ret.dimensions = ret.dimensions.pow(n); 395 return ret; 396 } 397 398 int parseInteger() pure @safe 399 { 400 tokens.check(Tok.integer, Tok.supinteger); 401 int n = tokens.front.integer; 402 if (tokens.length) 403 tokens.advance(); 404 return n; 405 } 406 407 QVariant!N parseUnit() pure @safe 408 { 409 if (!tokens.length) 410 return QVariant!N.make(1, Dimensions.init); 411 412 QVariant!N ret; 413 if (tokens.front.type == Tok.lparen) 414 { 415 tokens.advance(); 416 ret = parseCompoundUnit(true); 417 tokens.check(Tok.rparen); 418 tokens.advance(); 419 } 420 else 421 ret = parsePrefixUnit(); 422 423 return ret; 424 } 425 426 QVariant!N parsePrefixUnit() pure @safe 427 { 428 tokens.check(Tok.symbol); 429 auto str = tokens.front.slice; 430 if (tokens.length) 431 tokens.advance(); 432 433 // Try a standalone unit symbol (no prefix) 434 auto uptr = str in symbolList.units; 435 if (uptr) 436 return *uptr; 437 438 // Try with prefixes, the longest prefix first 439 N* factor; 440 for (size_t i = symbolList.maxPrefixLength; i > 0; i--) 441 { 442 if (str.length >= i) 443 { 444 string prefix = str[0 .. i].to!string; 445 factor = prefix in symbolList.prefixes; 446 if (factor) 447 { 448 string unit = str[i .. $].to!string; 449 enforceEx!ParsingException(unit.length, "Expecting a unit after the prefix " ~ prefix); 450 uptr = unit in symbolList.units; 451 if (uptr) 452 return *factor * *uptr; 453 } 454 } 455 } 456 457 throw new ParsingException("Unknown unit symbol: '%s'".format(str)); 458 } 459 } 460 461 enum Tok 462 { 463 none, 464 symbol, 465 mul, 466 div, 467 exp, 468 integer, 469 supinteger, 470 rparen, 471 lparen 472 } 473 474 struct Token 475 { 476 Tok type; 477 string slice; 478 int integer = int.max; 479 } 480 481 Token[] lex(string input) pure @safe 482 { 483 enum State 484 { 485 none, 486 symbol, 487 integer, 488 supinteger 489 } 490 491 Token[] tokens; 492 auto tokapp = appender(tokens); 493 494 auto original = input; 495 size_t i, j; 496 State state = State.none; 497 498 void pushToken(Tok type) 499 { 500 tokapp.put(Token(type, original[i .. j])); 501 i = j; 502 state = State.none; 503 } 504 505 void pushInteger(Tok type) 506 { 507 auto slice = original[i .. j]; 508 509 if (type == Tok.supinteger) 510 { 511 auto a = appender!string; 512 foreach (dchar c; slice) 513 { 514 switch (c) 515 { 516 case '⁰': a.put('0'); break; 517 case '¹': a.put('1'); break; 518 case '²': a.put('2'); break; 519 case '³': a.put('3'); break; 520 case '⁴': a.put('4'); break; 521 case '⁵': a.put('5'); break; 522 case '⁶': a.put('6'); break; 523 case '⁷': a.put('7'); break; 524 case '⁸': a.put('8'); break; 525 case '⁹': a.put('9'); break; 526 case '⁺': a.put('+'); break; 527 case '⁻': a.put('-'); break; 528 default: assert(false, "Error in pushInteger()"); 529 } 530 } 531 slice = a.data; 532 } 533 534 int n; 535 try 536 { 537 n = std.conv.parse!int(slice); 538 enforce(slice.empty); 539 } 540 catch (Exception) 541 throw new ParsingException("Unexpected integer format: " ~ original[i .. j]); 542 543 tokapp.put(Token(type, original[i .. j], n)); 544 i = j; 545 state = State.none; 546 } 547 548 void push() 549 { 550 if (state == State.symbol) 551 pushToken(Tok.symbol); 552 else if (state == State.integer) 553 pushInteger(Tok.integer); 554 else if (state == State.supinteger) 555 pushInteger(Tok.supinteger); 556 } 557 558 foreach (dchar cur; input) 559 { 560 auto len = cur.codeLength!char; 561 switch (cur) 562 { 563 // Whitespace 564 case ' ': 565 case '\t': 566 case '\u00A0': 567 case '\u2000': .. case '\u200A': 568 case '\u202F': 569 case '\u205F': 570 push(); 571 j += len; 572 i = j; 573 break; 574 575 case '(': 576 push(); 577 j += len; 578 pushToken(Tok.lparen); 579 break; 580 581 case ')': 582 push(); 583 j += len; 584 pushToken(Tok.rparen); 585 break; 586 587 case '*': 588 case '.': 589 case '⋅': 590 case '×': 591 push(); 592 j += len; 593 pushToken(Tok.mul); 594 break; 595 596 case '/': 597 case '÷': 598 push(); 599 j += len; 600 pushToken(Tok.div); 601 break; 602 603 case '^': 604 push(); 605 j += len; 606 pushToken(Tok.exp); 607 break; 608 609 case '0': .. case '9': 610 case '-': 611 case '+': 612 if (state != State.integer) 613 push(); 614 state = State.integer; 615 j += len; 616 break; 617 618 case '⁰': 619 case '¹': 620 case '²': 621 case '³': 622 case '⁴': 623 case '⁵': 624 case '⁶': 625 case '⁷': 626 case '⁸': 627 case '⁹': 628 case '⁻': 629 case '⁺': 630 if (state != State.supinteger) 631 push(); 632 state = State.supinteger; 633 j += len; 634 break; 635 636 default: 637 if (state == State.integer || state == State.supinteger) 638 push(); 639 state = State.symbol; 640 j += len; 641 break; 642 } 643 } 644 push(); 645 646 return tokapp.data; 647 } 648 649 void advance(Types...)(ref Token[] tokens, Types types) 650 { 651 enforceEx!ParsingException(!tokens.empty, "Unexpected end of input"); 652 tokens.popFront(); 653 654 static if (Types.length) 655 check(tokens, types); 656 } 657 658 void check(Token[] tokens) pure @safe 659 { 660 enforceEx!ParsingException(tokens.length, "Unexpected end of input"); 661 } 662 663 void check(Token[] tokens, Tok tok) pure @safe 664 { 665 tokens.check(); 666 enforceEx!ParsingException(tokens[0].type == tok, 667 format("Found '%s' while expecting %s", 668 tokens[0].slice, tok)); 669 } 670 671 void check(Token[] tokens, Tok tok1, Tok tok2) pure @safe 672 { 673 tokens.check(); 674 enforceEx!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2, 675 format("Found '%s' while expecting %s or %s", 676 tokens[0].slice, tok1, tok2)); 677 }