1 /++ 2 This module defines functions to parse units and quantities. The text 3 input is parsed according to the following grammar. For example: 4 $(DL 5 $(DT Prefixes and unit symbols must be joined:) 6 $(DD "1 mm" = 1 millimeter) 7 $(DD "1 m m" = 1 square meter) 8 $(BR) 9 $(DT Standalone units are preferred over prefixed ones:) 10 $(DD "1 cd" = 1 candela, not 1 centiday) 11 $(BR) 12 $(DT Powers of units:) 13 $(DD "1 m^2") 14 $(DD "1 m²" $(I (superscript integer))) 15 $(BR) 16 $(DT Multiplication of to units:) 17 $(DD "1 N m" $(I (whitespace))) 18 $(DD "1 N . m") 19 $(DD "1 N ⋅ m" $(I (centered dot))) 20 $(DD "1 N * m") 21 $(DD "1 N × m" $(I (times sign))) 22 $(BR) 23 $(DT Division of to units:) 24 $(DD "1 mol / s") 25 $(DD "1 mol ÷ s") 26 $(BR) 27 $(DT Grouping of units with parentheses:) 28 $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²) 29 ) 30 31 Grammar: (whitespace not significant) 32 $(DL 33 $(DT Quantity:) 34 $(DD Units) 35 $(DD Number Units) 36 $(BR) 37 $(DT Number:) 38 $(DD $(I Numeric value parsed by std.conv.parse!double)) 39 $(BR) 40 $(DT Units:) 41 $(DD Unit) 42 $(DD Unit Units) 43 $(DD Unit Operator Units) 44 $(BR) 45 $(DT Operator:) 46 $(DD $(B *)) 47 $(DD $(B .)) 48 $(DD $(B ⋅)) 49 $(DD $(B ×)) 50 $(DD $(B /)) 51 $(DD $(B ÷)) 52 $(BR) 53 $(DT Unit:) 54 $(DD Base) 55 $(DD Base $(B ^) Integer) 56 $(DD Base SupInteger) 57 $(BR) 58 $(DT Base:) 59 $(DD Symbol) 60 $(DD Prefix Symbol) 61 $(DD $(B $(LPAREN)) Units $(B $(RPAREN))) 62 $(BR) 63 $(DT Symbol:) 64 $(DD $(I The symbol of a valid unit)) 65 $(BR) 66 $(DT Prefix:) 67 $(DD $(I The symbol of a valid prefix)) 68 $(BR) 69 $(DT Integer:) 70 $(DD $(I Integer value parsed by std.conv.parse!int)) 71 $(BR) 72 $(DT SupInteger:) 73 $(DD $(I Superscript version of Integer)) 74 ) 75 76 Copyright: Copyright 2013-2015, Nicolas Sicard 77 Authors: Nicolas Sicard 78 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 79 Source: $(LINK https://github.com/biozic/quantities) 80 +/ 81 module quantities.parsing; 82 83 import quantities.base; 84 import quantities.qvariant; 85 86 import std.array; 87 import std.algorithm; 88 import std.conv; 89 import std.exception; 90 import std.math; 91 import std.range; 92 import std.string; 93 import std.traits; 94 import std.typetuple; 95 import std.utf; 96 97 98 /// Exception thrown when operating on two units that are not interconvertible. 99 class DimensionException : Exception 100 { 101 pure @safe nothrow 102 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 103 { 104 super(msg, file, line, next); 105 } 106 107 pure @safe nothrow 108 this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 109 { 110 super(msg, file, line, next); 111 } 112 } 113 114 /++ 115 Contains the symbols of the units and the prefixes that a parser can handle. 116 +/ 117 struct SymbolList(N) 118 { 119 static assert(isNumeric!N, "Incompatible type: " ~ N.stringof); 120 121 package 122 { 123 QVariant!N[string] units; 124 N[string] prefixes; 125 size_t maxPrefixLength; 126 } 127 128 /// Adds (or replaces) a unit in the list 129 auto addUnit(Q)(string symbol, Q unit) 130 if (isQVariant!Q) 131 { 132 units[symbol] = unit; 133 return this; 134 } 135 /// ditto 136 auto addUnit(Q)(string symbol, Q unit) 137 if (isQuantity!Q) 138 { 139 return addUnit(symbol, unit.qVariant); 140 } 141 142 /// Adds (or replaces) a prefix in the list 143 auto addPrefix(N)(string symbol, N factor) 144 if (isNumeric!N) 145 { 146 prefixes[symbol] = factor; 147 if (symbol.length > maxPrefixLength) 148 maxPrefixLength = symbol.length; 149 return this; 150 } 151 } 152 153 /// Type of a function that can parse a string for a numeric value of type N. 154 alias NumberParser(N) = N function(ref string s) pure @safe; 155 156 /// A quantity parser 157 struct Parser(N) 158 { 159 SymbolList!N symbolList; /// A list of registered symbols for units and prefixes. 160 NumberParser!N numberParser; /// A function that can parse a string for a numeric value of type N. 161 162 /++ 163 Parses a QVariant from a string. 164 +/ 165 QVariant!N parseVariant(string str) 166 { 167 return parseQuantityImpl!N(str, symbolList, numberParser); 168 } 169 170 /++ 171 Parses a quantity of a known type Q from a string. 172 +/ 173 Q parse(Q)(string str) 174 if (isQuantity!Q) 175 { 176 static assert(is(N : Q.valueType), "Incompatible value type: " ~ Q.valueType.stringof); 177 178 auto q = parseQuantityImpl!(Q.valueType)(str, symbolList, numberParser); 179 enforceEx!DimensionException(equals(Q.dimensions, q.dimensions), 180 "Dimension error: [%s] is not compatible with [%s]".format( 181 quantities.base.toString(Q.dimensions), quantities.base.toString(q.dimensions))); 182 return Q.make(q.rawValue); 183 } 184 } 185 /// 186 pure @safe unittest 187 { 188 // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement 189 190 auto century = unit!(real, "century"); 191 alias LectureLength = typeof(century); 192 193 auto symbolList = SymbolList!real() 194 .addUnit("Cy", century) 195 .addPrefix("µ", 1e-6L); 196 197 import std.conv; 198 auto parser = Parser!real(symbolList, &std.conv.parse!(real, string)); 199 200 auto timing = 1e-6L * century; 201 assert(timing == parser.parse!LectureLength("1 µCy")); 202 assert(timing == parser.parseVariant("1 µCy")); 203 } 204 205 /// Creates a compile-time parser that parses a string for a quantity and 206 /// automatically deduces the quantity type. 207 template compileTimeParser(N, alias symbolList, alias numberParser) 208 { 209 template compileTimeParser(string str) 210 { 211 enum q = parseQuantityImpl!N(str, symbolList, &numberParser); 212 enum compileTimeParser = Quantity!(N, cast(Dimensions) q.dimensions).make(q.rawValue); 213 } 214 } 215 /// 216 pure @safe unittest 217 { 218 enum century = unit!(real, "century"); 219 alias LectureLength = typeof(century); 220 221 enum symbolList = SymbolList!real() 222 .addUnit("Cy", century) 223 .addPrefix("µ", 1e-6L); 224 225 alias ctParser = compileTimeParser!(real, symbolList, std.conv.parse!(real, string)); 226 enum timing = 1e-6L * century; 227 static assert(timing == ctParser!"1 µCy"); 228 } 229 230 /// Exception thrown when parsing encounters an unexpected token. 231 class ParsingException : Exception 232 { 233 pure @safe nothrow 234 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 235 { 236 super(msg, file, line, next); 237 } 238 239 pure @safe nothrow 240 this(string msg, Throwable next, string file = __FILE__, size_t line = __LINE__) 241 { 242 super(msg, file, line, next); 243 } 244 } 245 246 private: 247 248 QVariant!N parseQuantityImpl(N)(string input, SymbolList!N symbolList, NumberParser!N parseFun) 249 { 250 N value; 251 try 252 value = parseFun(input); 253 catch (Exception) 254 value = 1; 255 256 if (input.empty) 257 return QVariant!N.make(value, null); 258 259 auto tokens = lex(input); 260 auto parser = QuantityParser!N(tokens, symbolList); 261 262 return value * parser.parseCompoundUnit(); 263 } 264 265 pure @safe unittest // Test parsing 266 { 267 auto meter = unit!(double, "L"); 268 auto kilogram = unit!(double, "M"); 269 auto second = unit!(double, "T"); 270 auto one = meter / meter; 271 auto unknown = one; 272 273 auto siSL = SymbolList!double() 274 .addUnit("m", meter) 275 .addUnit("kg", kilogram) 276 .addUnit("s", second) 277 .addPrefix("c", 0.01L) 278 .addPrefix("m", 0.001L); 279 280 bool checkParse(Q)(string input, Q quantity) 281 { 282 return parseQuantityImpl!double(input, siSL, &std.conv.parse!(double, string)) 283 == quantity.qVariant; 284 } 285 286 assert(checkParse("1 m ", meter)); 287 assert(checkParse("1 mm", 0.001 * meter)); 288 assert(checkParse("1 m^-1", 1 / meter)); 289 assert(checkParse("1 m²", meter * meter)); 290 assert(checkParse("1 m⁺²", meter * meter)); 291 assert(checkParse("1 m⁻¹", 1 / meter)); 292 assert(checkParse("1 (m)", meter)); 293 assert(checkParse("1 (m^-1)", 1 / meter)); 294 assert(checkParse("1 ((m)^-1)^-1", meter)); 295 assert(checkParse("1 (s/(s/m))", meter)); 296 assert(checkParse("1 m*m", meter * meter)); 297 assert(checkParse("1 m m", meter * meter)); 298 assert(checkParse("1 m.m", meter * meter)); 299 assert(checkParse("1 m⋅m", meter * meter)); 300 assert(checkParse("1 m×m", meter * meter)); 301 assert(checkParse("1 m/m", meter / meter)); 302 assert(checkParse("1 m÷m", meter / meter)); 303 assert(checkParse("1 m.s", second * meter)); 304 assert(checkParse("1 m s", second * meter)); 305 assert(checkParse("1 m²s", meter * meter * second)); 306 assert(checkParse("1 m*m/m", meter)); 307 assert(checkParse("0.8 m⁰", 0.8 * one)); 308 assert(checkParse("0.8", 0.8 * one)); 309 310 assertThrown!ParsingException(checkParse("1 c m", unknown)); 311 assertThrown!ParsingException(checkParse("1 c", unknown)); 312 assertThrown!ParsingException(checkParse("1 Qm", unknown)); 313 assertThrown!ParsingException(checkParse("1 m + m", unknown)); 314 assertThrown!ParsingException(checkParse("1 m/", unknown)); 315 assertThrown!ParsingException(checkParse("1 m^", unknown)); 316 assertThrown!ParsingException(checkParse("1 m^m", unknown)); 317 assertThrown!ParsingException(checkParse("1 m ) m", unknown)); 318 assertThrown!ParsingException(checkParse("1 m * m) m", unknown)); 319 assertThrown!ParsingException(checkParse("1 m^²", unknown)); 320 assertThrown!ParsingException(checkParse("1-⁺⁵", unknown)); 321 } 322 323 // A parser that can parse a text for a unit or a quantity 324 struct QuantityParser(N) 325 { 326 private 327 { 328 Token[] tokens; 329 SymbolList!N symbolList; 330 } 331 332 QVariant!N parseCompoundUnit(bool inParens = false) pure @safe 333 { 334 QVariant!N ret = parseExponentUnit(); 335 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 336 return ret; 337 338 do { 339 tokens.check(); 340 auto cur = tokens.front; 341 342 bool multiply = true; 343 if (cur.type == Tok.div) 344 multiply = false; 345 346 if (cur.type == Tok.mul || cur.type == Tok.div) 347 { 348 tokens.advance(); 349 tokens.check(); 350 cur = tokens.front; 351 } 352 353 QVariant!N rhs = parseExponentUnit(); 354 if (multiply) 355 ret *= rhs; 356 else 357 ret /= rhs; 358 359 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 360 break; 361 362 cur = tokens.front; 363 } 364 while (!tokens.empty); 365 366 return ret; 367 } 368 369 QVariant!N parseExponentUnit() pure @safe 370 { 371 QVariant!N ret = parseUnit(); 372 373 if (tokens.empty) 374 return ret; 375 376 auto next = tokens.front; 377 if (next.type != Tok.exp && next.type != Tok.supinteger) 378 return ret; 379 380 if (next.type == Tok.exp) 381 tokens.advance(Tok.integer); 382 383 int n = parseInteger(); 384 385 // Cannot use ret ^^ n because of CTFE limitation 386 static if (__traits(compiles, std.math.pow(ret.value, n))) 387 ret._value = std.math.pow(ret.value, n); 388 else 389 foreach (i; 1 .. n) 390 ret._value *= ret._value; 391 ret.dimensions = ret.dimensions.pow(n); 392 return ret; 393 } 394 395 int parseInteger() pure @safe 396 { 397 tokens.check(Tok.integer, Tok.supinteger); 398 int n = tokens.front.integer; 399 if (tokens.length) 400 tokens.advance(); 401 return n; 402 } 403 404 QVariant!N parseUnit() pure @safe 405 { 406 QVariant!N ret; 407 408 if (tokens.front.type == Tok.lparen) 409 { 410 tokens.advance(); 411 ret = parseCompoundUnit(true); 412 tokens.check(Tok.rparen); 413 tokens.advance(); 414 } 415 else 416 ret = parsePrefixUnit(); 417 418 return ret; 419 } 420 421 QVariant!N parsePrefixUnit() pure @safe 422 { 423 tokens.check(Tok.symbol); 424 auto str = tokens.front.slice; 425 if (tokens.length) 426 tokens.advance(); 427 428 // Try a standalone unit symbol (no prefix) 429 auto uptr = str in symbolList.units; 430 if (uptr) 431 return *uptr; 432 433 // Try with prefixes, the longest prefix first 434 N* factor; 435 for (size_t i = symbolList.maxPrefixLength; i > 0; i--) 436 { 437 if (str.length >= i) 438 { 439 string prefix = str[0 .. i].to!string; 440 factor = prefix in symbolList.prefixes; 441 if (factor) 442 { 443 string unit = str[i .. $].to!string; 444 enforceEx!ParsingException(unit.length, "Expecting a unit after the prefix " ~ prefix); 445 uptr = unit in symbolList.units; 446 if (uptr) 447 return *factor * *uptr; 448 } 449 } 450 } 451 452 throw new ParsingException("Unknown unit symbol: '%s'".format(str)); 453 } 454 } 455 456 enum Tok 457 { 458 none, 459 symbol, 460 mul, 461 div, 462 exp, 463 integer, 464 supinteger, 465 rparen, 466 lparen 467 } 468 469 struct Token 470 { 471 Tok type; 472 string slice; 473 int integer = int.max; 474 } 475 476 Token[] lex(string input) pure @safe 477 { 478 enum State 479 { 480 none, 481 symbol, 482 integer, 483 supinteger 484 } 485 486 Token[] tokens; 487 auto tokapp = appender(tokens); 488 489 auto original = input; 490 size_t i, j; 491 State state = State.none; 492 493 void pushToken(Tok type) 494 { 495 tokapp.put(Token(type, original[i .. j])); 496 i = j; 497 state = State.none; 498 } 499 500 void pushInteger(Tok type) 501 { 502 auto slice = original[i .. j]; 503 504 if (type == Tok.supinteger) 505 { 506 auto a = appender!string; 507 foreach (dchar c; slice) 508 { 509 switch (c) 510 { 511 case '⁰': a.put('0'); break; 512 case '¹': a.put('1'); break; 513 case '²': a.put('2'); break; 514 case '³': a.put('3'); break; 515 case '⁴': a.put('4'); break; 516 case '⁵': a.put('5'); break; 517 case '⁶': a.put('6'); break; 518 case '⁷': a.put('7'); break; 519 case '⁸': a.put('8'); break; 520 case '⁹': a.put('9'); break; 521 case '⁺': a.put('+'); break; 522 case '⁻': a.put('-'); break; 523 default: assert(false, "Error in pushInteger()"); 524 } 525 } 526 slice = a.data; 527 } 528 529 int n; 530 try 531 n = std.conv.parse!int(slice); 532 catch (Exception) 533 throw new ParsingException("Unexpected integer format: " ~ original[i .. j]); 534 535 enforceEx!ParsingException(slice.empty, "Unexpected integer format: " ~ slice); 536 537 tokapp.put(Token(type, original[i .. j], n)); 538 i = j; 539 state = State.none; 540 } 541 542 void push() 543 { 544 if (state == State.symbol) 545 pushToken(Tok.symbol); 546 else if (state == State.integer) 547 pushInteger(Tok.integer); 548 else if (state == State.supinteger) 549 pushInteger(Tok.supinteger); 550 } 551 552 while (!input.empty) 553 { 554 auto cur = input.front; 555 auto len = cur.codeLength!char; 556 switch (cur) 557 { 558 // Whitespace 559 case ' ': 560 case '\t': 561 case '\u00A0': 562 case '\u2000': .. case '\u200A': 563 case '\u202F': 564 case '\u205F': 565 push(); 566 j += len; 567 i = j; 568 break; 569 570 case '(': 571 push(); 572 j += len; 573 pushToken(Tok.lparen); 574 break; 575 576 case ')': 577 push(); 578 j += len; 579 pushToken(Tok.rparen); 580 break; 581 582 case '*': 583 case '.': 584 case '⋅': 585 case '×': 586 push(); 587 j += len; 588 pushToken(Tok.mul); 589 break; 590 591 case '/': 592 case '÷': 593 push(); 594 j += len; 595 pushToken(Tok.div); 596 break; 597 598 case '^': 599 push(); 600 j += len; 601 pushToken(Tok.exp); 602 break; 603 604 case '0': .. case '9': 605 case '-': 606 case '+': 607 if (state != State.integer) 608 push(); 609 state = State.integer; 610 j += len; 611 break; 612 613 case '⁰': 614 case '¹': 615 case '²': 616 case '³': 617 case '⁴': 618 case '⁵': 619 case '⁶': 620 case '⁷': 621 case '⁸': 622 case '⁹': 623 case '⁻': 624 case '⁺': 625 if (state != State.supinteger) 626 push(); 627 state = State.supinteger; 628 j += len; 629 break; 630 631 default: 632 if (state == State.integer || state == State.supinteger) 633 push(); 634 state = State.symbol; 635 j += len; 636 break; 637 } 638 input.popFront(); 639 } 640 push(); 641 642 return tokapp.data; 643 } 644 645 void advance(Types...)(ref Token[] tokens, Types types) 646 { 647 enforceEx!ParsingException(!tokens.empty, "Unexpected end of input"); 648 tokens.popFront(); 649 650 static if (Types.length) 651 check(tokens, types); 652 } 653 654 void check(Types...)(Token[] tokens, Types types) 655 { 656 enforceEx!ParsingException(!tokens.empty, "Unexpected end of input"); 657 auto token = tokens.front; 658 659 static if (Types.length) 660 { 661 bool ok = false; 662 Tok[] valid = [types]; 663 foreach (type; types) 664 { 665 if (token.type == type) 666 { 667 ok = true; 668 break; 669 } 670 } 671 import std.string : format; 672 enforceEx!ParsingException(ok, format("Found '%s' while expecting %s", 673 token.slice, valid.front)); 674 } 675 }