1 /++ 2 This module defines functions to parse units and quantities. 3 4 The text input is parsed according to the following grammar. 5 For example: 6 $(DL 7 $(DT Prefixes and unit symbols must be joined:) 8 $(DD "1 mm" = 1 millimeter) 9 $(DD "1 m m" = 1 square meter) 10 $(BR) 11 $(DT Standalone units are preferred over prefixed ones:) 12 $(DD "1 cd" = 1 candela, not 1 centiday) 13 $(BR) 14 $(DT Powers of units:) 15 $(DD "1 m^2") 16 $(DD "1 m^-1/2" $(I (rational exponent))) 17 $(DD "1 m²" $(I (superscript integer))) 18 $(BR) 19 $(DT Multiplication of to units:) 20 $(DD "1 N m" $(I (whitespace))) 21 $(DD "1 N . m") 22 $(DD "1 N ⋅ m" $(I (centered dot))) 23 $(DD "1 N * m") 24 $(DD "1 N × m" $(I (times sign))) 25 $(BR) 26 $(DT Division of to units:) 27 $(DD "1 mol / s") 28 $(DD "1 mol ÷ s") 29 $(BR) 30 $(DT Grouping of units with parentheses:) 31 $(DD "1 kg/(m.s^2)" = 1 kg m⁻¹ s⁻²) 32 ) 33 34 Grammar: (whitespace not significant) 35 $(DL 36 $(DT Quantity:) 37 $(DD Units) 38 $(DD Number Units) 39 $(BR) 40 $(DT Number:) 41 $(DD $(I Numeric value parsed by std.conv.parse!double)) 42 $(BR) 43 $(DT Units:) 44 $(DD Unit) 45 $(DD Unit Units) 46 $(DD Unit Operator Units) 47 $(BR) 48 $(DT Operator:) 49 $(DD $(B *)) 50 $(DD $(B .)) 51 $(DD $(B ⋅)) 52 $(DD $(B ×)) 53 $(DD $(B /)) 54 $(DD $(B ÷)) 55 $(BR) 56 $(DT Unit:) 57 $(DD Base) 58 $(DD Base $(B ^) Integer) 59 $(DD Base $(B ^) Rational) 60 $(DD Base SupInteger) 61 $(BR) 62 $(DT Base:) 63 $(DD Symbol) 64 $(DD Prefix Symbol) 65 $(DD $(B $(LPAREN)) Units $(B $(RPAREN))) 66 $(BR) 67 $(DT Symbol:) 68 $(DD $(I The symbol of a valid unit)) 69 $(BR) 70 $(DT Prefix:) 71 $(DD $(I The symbol of a valid prefix)) 72 $(BR) 73 $(DT Rational:) 74 $(DD Integer $(B /) Integer) 75 $(BR) 76 $(DT Integer:) 77 $(DD $(I Integer value parsed by std.conv.parse!int)) 78 $(BR) 79 $(DT SupInteger:) 80 $(DD $(I Superscript version of Integer)) 81 ) 82 83 Copyright: Copyright 2013-2018, Nicolas Sicard 84 Authors: Nicolas Sicard 85 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 86 Source: $(LINK https://github.com/biozic/quantities) 87 +/ 88 module quantities.runtime.parsing; 89 90 import quantities.internal.dimensions; 91 import quantities.runtime.qvariant; 92 import quantities.compiletime.quantity; 93 94 import std.array; 95 import std.algorithm; 96 import std.conv; 97 import std.exception; 98 import std.math; 99 import std.range; 100 import std.string; 101 import std.traits; 102 import std.typetuple; 103 import std.utf; 104 105 /++ 106 Contains the symbols of the units and the prefixes that a parser can handle. 107 +/ 108 struct SymbolList(N) 109 { 110 static assert(isNumeric!N, "Incompatible type: " ~ N.stringof); 111 112 private 113 { 114 QVariant!N[string] units; 115 N[string] prefixes; 116 size_t maxPrefixLength; 117 } 118 119 /// Adds (or replaces) a unit in the list 120 auto addUnit(Q)(string symbol, Q unit) 121 if (isQVariantOrQuantity!Q) 122 { 123 static if (isQVariant!Q) 124 units[symbol] = unit; 125 else static if (isQuantity!Q) 126 units[symbol] = unit.qVariant; 127 else 128 static assert(false); 129 return this; 130 } 131 132 /// Adds (or replaces) a prefix in the list 133 auto addPrefix(N)(string symbol, N factor) 134 if (isNumeric!N) 135 { 136 prefixes[symbol] = factor; 137 if (symbol.length > maxPrefixLength) 138 maxPrefixLength = symbol.length; 139 return this; 140 } 141 } 142 143 /++ 144 A quantity parser. 145 146 Params: 147 N = The numeric type of the quantities. 148 numberParser = a function that takes a reference to a string and returns the 149 parsed number. 150 +/ 151 struct Parser(N, alias numberParser) 152 if (isNumeric!N) 153 { 154 /// A list of registered symbols for units and prefixes. 155 SymbolList!N symbolList; 156 157 /++ 158 Parses a QVariant from str. 159 +/ 160 QVariant!N parse(S)(S str) 161 if (isSomeString!S) 162 { 163 return parseQuantityImpl!(N, numberParser)(str, symbolList); 164 } 165 } 166 /// 167 unittest 168 { 169 // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement 170 171 import std.conv : parse; 172 173 auto century = unit!real("T"); 174 alias LectureLength = typeof(century); 175 176 auto symbolList = SymbolList!real().addUnit("Cy", century).addPrefix("µ", 1e-6L); 177 alias numberParser = (ref string s) => std.conv.parse!real(s); 178 auto parser = Parser!(real, numberParser)(symbolList); 179 180 auto timing = 1e-6L * century; 181 assert(timing == parser.parse("1 µCy")); 182 } 183 184 version (none) unittest // Compile-time 185 { 186 import std.conv : parse; 187 188 enum euro = unit!int("@"); 189 alias Currency = typeof(euro); 190 191 enum symbolList = SymbolList!int().addUnit("€", euro).addPrefix("k", 1000); 192 alias intParser = (ref string s) => std.conv.parse!int(s); 193 enum parser = Parser!(int, intParser)(symbolList); 194 195 enum cost = 2_500_000 * euro; 196 static assert(cost == parser.parse("2500 k€")); 197 } 198 199 /// Exception thrown when parsing encounters an unexpected token. 200 class ParsingException : Exception 201 { 202 mixin basicExceptionCtors; 203 } 204 205 private: 206 207 QVariant!N parseQuantityImpl(N, alias parseFun, S)(S input, SymbolList!N symbolList) 208 if (isSomeString!S) 209 { 210 N value; 211 auto str = input[]; 212 213 try 214 value = parseFun(str); 215 catch (Exception) 216 value = 1; 217 218 if (str.empty) 219 return QVariant!N(value, Dimensions.init); 220 221 auto tokens = lex(str); 222 auto parser = QuantityParser!N(tokens, symbolList); 223 224 return value * parser.parseCompoundUnit(); 225 } 226 227 unittest // Test parsing 228 { 229 auto meter = unit!double("L"); 230 auto kilogram = unit!double("M"); 231 auto second = unit!double("T"); 232 auto one = meter / meter; 233 auto unknown = one; 234 235 auto siSL = SymbolList!double().addUnit("m", meter).addUnit("kg", kilogram) 236 .addUnit("s", second).addPrefix("c", 0.01L).addPrefix("m", 0.001L); 237 238 bool checkParse(S, Q)(S input, Q quantity) 239 if (isSomeString!S) 240 { 241 alias numberParser = std.conv.parse!(Q.valueType, S); 242 return parseQuantityImpl!(double, numberParser)(input, siSL) == quantity; 243 } 244 245 assert(checkParse("1 m ", meter)); 246 assert(checkParse("1m", meter)); 247 assert(checkParse("1 mm", 0.001 * meter)); 248 assert(checkParse("1 m^-1", 1 / meter)); 249 assert(checkParse("1 m^2/2", meter)); 250 assert(checkParse("1 m^-2/2", 1 / meter)); 251 assert(checkParse("1 m²", meter * meter)); 252 assert(checkParse("1 m⁺²", meter * meter)); 253 assert(checkParse("1 m⁻¹", 1 / meter)); 254 assert(checkParse("1 (m)", meter)); 255 assert(checkParse("1 (m^-1)", 1 / meter)); 256 assert(checkParse("1 ((m)^-1)^-1", meter)); 257 assert(checkParse("1 (s/(s/m))", meter)); 258 assert(checkParse("1 m*m", meter * meter)); 259 assert(checkParse("1 m m", meter * meter)); 260 assert(checkParse("1 m.m", meter * meter)); 261 assert(checkParse("1 m⋅m", meter * meter)); 262 assert(checkParse("1 m×m", meter * meter)); 263 assert(checkParse("1 m/m", meter / meter)); 264 assert(checkParse("1 m÷m", meter / meter)); 265 assert(checkParse("1 m.s", second * meter)); 266 assert(checkParse("1 m s", second * meter)); 267 assert(checkParse("1 m²s", meter * meter * second)); 268 assert(checkParse("1 m*m/m", meter)); 269 assert(checkParse("0.8 m⁰", 0.8 * one)); 270 assert(checkParse("0.8", 0.8 * one)); 271 assert(checkParse("0.8 ", 0.8 * one)); 272 273 assertThrown!ParsingException(checkParse("1 c m", unknown)); 274 assertThrown!ParsingException(checkParse("1 c", unknown)); 275 assertThrown!ParsingException(checkParse("1 Qm", unknown)); 276 assertThrown!ParsingException(checkParse("1 m + m", unknown)); 277 assertThrown!ParsingException(checkParse("1 m/", unknown)); 278 assertThrown!ParsingException(checkParse("1 m^", unknown)); 279 assertThrown!ParsingException(checkParse("1 m^m", unknown)); 280 assertThrown!ParsingException(checkParse("1 m ) m", unknown)); 281 assertThrown!ParsingException(checkParse("1 m * m) m", unknown)); 282 assertThrown!ParsingException(checkParse("1 m^²", unknown)); 283 assertThrown!ParsingException(checkParse("1-⁺⁵", unknown)); 284 } 285 286 // A parser that can parse a text for a unit or a quantity 287 struct QuantityParser(N) 288 { 289 private 290 { 291 Token[] tokens; 292 SymbolList!N symbolList; 293 } 294 295 QVariant!N parseCompoundUnit(bool inParens = false) 296 { 297 QVariant!N ret = parseExponentUnit(); 298 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 299 return ret; 300 301 do 302 { 303 tokens.check(); 304 auto cur = tokens.front; 305 306 bool multiply = true; 307 if (cur.type == Tok.div) 308 multiply = false; 309 310 if (cur.type == Tok.mul || cur.type == Tok.div) 311 { 312 tokens.advance(); 313 tokens.check(); 314 cur = tokens.front; 315 } 316 317 QVariant!N rhs = parseExponentUnit(); 318 if (multiply) 319 ret *= rhs; 320 else 321 ret /= rhs; 322 323 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 324 break; 325 326 cur = tokens.front; 327 } 328 while (!tokens.empty); 329 330 return ret; 331 } 332 333 QVariant!N parseExponentUnit() 334 { 335 QVariant!N ret = parseUnit(); 336 337 if (tokens.empty) 338 return ret; 339 340 auto next = tokens.front; 341 if (next.type != Tok.exp && next.type != Tok.supinteger) 342 return ret; 343 344 if (next.type == Tok.exp) 345 tokens.advance(Tok.integer); 346 347 Rational r = parseRationalOrInteger(); 348 return ret ^^ r; 349 } 350 351 Rational parseRationalOrInteger() 352 { 353 int num = parseInteger(); 354 int den = 1; 355 if (tokens.length && tokens.front.type == Tok.div) 356 { 357 tokens.advance(); 358 den = parseInteger(); 359 } 360 return Rational(num, den); 361 } 362 363 int parseInteger() 364 { 365 tokens.check(Tok.integer, Tok.supinteger); 366 int n = tokens.front.integer; 367 if (tokens.length) 368 tokens.advance(); 369 return n; 370 } 371 372 QVariant!N parseUnit() 373 { 374 if (!tokens.length) 375 return QVariant!N(1, Dimensions.init); 376 377 if (tokens.front.type == Tok.lparen) 378 { 379 tokens.advance(); 380 auto ret = parseCompoundUnit(true); 381 tokens.check(Tok.rparen); 382 tokens.advance(); 383 return ret; 384 } 385 else 386 return parsePrefixUnit(); 387 } 388 389 QVariant!N parsePrefixUnit() 390 { 391 tokens.check(Tok.symbol); 392 auto str = tokens.front.slice; 393 if (tokens.length) 394 tokens.advance(); 395 396 // Try a standalone unit symbol (no prefix) 397 auto uptr = str in symbolList.units; 398 if (uptr) 399 return *uptr; 400 401 // Try with prefixes, the longest prefix first 402 N* factor; 403 for (size_t i = symbolList.maxPrefixLength; i > 0; i--) 404 { 405 if (str.length >= i) 406 { 407 string prefix = str[0 .. i].to!string; 408 factor = prefix in symbolList.prefixes; 409 if (factor) 410 { 411 string unit = str[i .. $].to!string; 412 enforceEx!ParsingException(unit.length, 413 "Expecting a unit after the prefix " ~ prefix); 414 uptr = unit in symbolList.units; 415 if (uptr) 416 return *factor * *uptr; 417 } 418 } 419 } 420 421 throw new ParsingException("Unknown unit symbol: '%s'".format(str)); 422 } 423 } 424 425 enum Tok 426 { 427 none, 428 symbol, 429 mul, 430 div, 431 exp, 432 integer, 433 supinteger, 434 rparen, 435 lparen 436 } 437 438 struct Token 439 { 440 Tok type; 441 const(char)[] slice; 442 int integer = int.max; 443 } 444 445 Token[] lex(const(char)[] input) 446 { 447 enum State 448 { 449 none, 450 symbol, 451 integer, 452 supinteger 453 } 454 455 Token[] tokens; 456 auto tokapp = appender(tokens); 457 458 auto original = input; 459 size_t i, j; 460 State state = State.none; 461 462 void pushToken(Tok type) 463 { 464 tokapp.put(Token(type, original[i .. j])); 465 i = j; 466 state = State.none; 467 } 468 469 void pushInteger(Tok type) 470 { 471 auto slice = original[i .. j]; 472 473 if (type == Tok.supinteger) 474 { 475 auto a = appender!string; 476 foreach (dchar c; slice) 477 { 478 switch (c) 479 { 480 case '⁰': 481 a.put('0'); 482 break; 483 case '¹': 484 a.put('1'); 485 break; 486 case '²': 487 a.put('2'); 488 break; 489 case '³': 490 a.put('3'); 491 break; 492 case '⁴': 493 a.put('4'); 494 break; 495 case '⁵': 496 a.put('5'); 497 break; 498 case '⁶': 499 a.put('6'); 500 break; 501 case '⁷': 502 a.put('7'); 503 break; 504 case '⁸': 505 a.put('8'); 506 break; 507 case '⁹': 508 a.put('9'); 509 break; 510 case '⁺': 511 a.put('+'); 512 break; 513 case '⁻': 514 a.put('-'); 515 break; 516 default: 517 assert(false, "Error in pushInteger()"); 518 } 519 } 520 slice = a.data; 521 } 522 523 int n; 524 try 525 { 526 n = std.conv.parse!int(slice); 527 enforce(slice.empty); 528 } 529 catch (Exception) 530 throw new ParsingException("Unexpected integer format: %s".format(original[i .. j])); 531 532 tokapp.put(Token(type, original[i .. j], n)); 533 i = j; 534 state = State.none; 535 } 536 537 void push() 538 { 539 if (state == State.symbol) 540 pushToken(Tok.symbol); 541 else if (state == State.integer) 542 pushInteger(Tok.integer); 543 else if (state == State.supinteger) 544 pushInteger(Tok.supinteger); 545 } 546 547 foreach (dchar cur; input) 548 { 549 auto len = cur.codeLength!char; 550 switch (cur) 551 { 552 // Whitespace 553 case ' ': 554 case '\t': 555 case '\u00A0': 556 case '\u2000': .. case '\u200A': 557 case '\u202F': 558 case '\u205F': 559 push(); 560 j += len; 561 i = j; 562 break; 563 564 case '(': 565 push(); 566 j += len; 567 pushToken(Tok.lparen); 568 break; 569 570 case ')': 571 push(); 572 j += len; 573 pushToken(Tok.rparen); 574 break; 575 576 case '*': 577 case '.': 578 case '⋅': 579 case '×': 580 push(); 581 j += len; 582 pushToken(Tok.mul); 583 break; 584 585 case '/': 586 case '÷': 587 push(); 588 j += len; 589 pushToken(Tok.div); 590 break; 591 592 case '^': 593 push(); 594 j += len; 595 pushToken(Tok.exp); 596 break; 597 598 case '0': .. case '9': 599 case '-': 600 case '+': 601 if (state != State.integer) 602 push(); 603 state = State.integer; 604 j += len; 605 break; 606 607 case '⁰': 608 case '¹': 609 case '²': 610 case '³': 611 case '⁴': 612 case '⁵': 613 case '⁶': 614 case '⁷': 615 case '⁸': 616 case '⁹': 617 case '⁻': 618 case '⁺': 619 if (state != State.supinteger) 620 push(); 621 state = State.supinteger; 622 j += len; 623 break; 624 625 default: 626 if (state == State.integer || state == State.supinteger) 627 push(); 628 state = State.symbol; 629 j += len; 630 break; 631 } 632 } 633 push(); 634 635 return tokapp.data; 636 } 637 638 void advance(Types...)(ref Token[] tokens, Types types) 639 { 640 enforceEx!ParsingException(!tokens.empty, "Unexpected end of input"); 641 tokens.popFront(); 642 643 static if (Types.length) 644 check(tokens, types); 645 } 646 647 void check(Token[] tokens) 648 { 649 enforceEx!ParsingException(tokens.length, "Unexpected end of input"); 650 } 651 652 void check(Token[] tokens, Tok tok) 653 { 654 tokens.check(); 655 enforceEx!ParsingException(tokens[0].type == tok, 656 format("Found '%s' while expecting %s", tokens[0].slice, tok)); 657 } 658 659 void check(Token[] tokens, Tok tok1, Tok tok2) 660 { 661 tokens.check(); 662 enforceEx!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2, 663 format("Found '%s' while expecting %s or %s", tokens[0].slice, tok1, tok2)); 664 }