1 /++ 2 This module defines functions to parse units and quantities. 3 4 Copyright: Copyright 2013-2018, Nicolas Sicard 5 Authors: Nicolas Sicard 6 License: $(LINK www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 Source: $(LINK https://github.com/biozic/quantities) 8 +/ 9 module quantities.parsing; 10 11 import quantities.internal.dimensions; 12 import quantities.runtime; 13 import quantities.compiletime; 14 import std.conv : parse; 15 import std.exception : basicExceptionCtors, enforce; 16 import std.format : format; 17 import std.traits : isNumeric, isSomeString; 18 19 /++ 20 Contains the symbols of the units and the prefixes that a parser can handle. 21 +/ 22 struct SymbolList(N) 23 if (isNumeric!N) 24 { 25 static assert(isNumeric!N, "Incompatible type: " ~ N.stringof); 26 27 package 28 { 29 QVariant!N[string] units; 30 N[string] prefixes; 31 size_t maxPrefixLength; 32 } 33 34 /// Adds (or replaces) a unit in the list 35 auto addUnit(Q)(string symbol, Q unit) 36 if (isQVariantOrQuantity!Q) 37 { 38 static if (isQVariant!Q) 39 units[symbol] = unit; 40 else static if (isQuantity!Q) 41 units[symbol] = unit.qVariant; 42 else 43 static assert(false); 44 return this; 45 } 46 47 /// Adds (or replaces) a prefix in the list 48 auto addPrefix(N)(string symbol, N factor) 49 if (isNumeric!N) 50 { 51 prefixes[symbol] = factor; 52 if (symbol.length > maxPrefixLength) 53 maxPrefixLength = symbol.length; 54 return this; 55 } 56 } 57 58 /++ 59 A quantity parser. 60 61 Params: 62 N = The numeric type of the quantities. 63 numberParser = a function that takes a reference to any kind of string and 64 returns the parsed number. 65 +/ 66 struct Parser(N, alias numberParser = (ref s) => parse!N(s)) 67 if (isNumeric!N) 68 { 69 /// A list of registered symbols for units and prefixes. 70 SymbolList!N symbolList; 71 72 /++ 73 Parses a QVariant from str. 74 +/ 75 QVariant!N parse(S)(S str) 76 if (isSomeString!S) 77 { 78 return parseQuantityImpl!(N, numberParser)(str, symbolList); 79 } 80 } 81 /// 82 unittest 83 { 84 // From http://en.wikipedia.org/wiki/List_of_humorous_units_of_measurement 85 86 import std.conv : parse; 87 88 auto century = unit!real("T"); 89 alias LectureLength = typeof(century); 90 91 auto symbolList = SymbolList!real().addUnit("Cy", century).addPrefix("µ", 1e-6L); 92 alias numberParser = (ref s) => parse!real(s); 93 auto parser = Parser!(real, numberParser)(symbolList); 94 95 auto timing = 1e-6L * century; 96 assert(timing == parser.parse("1 µCy")); 97 } 98 99 /// Exception thrown when parsing encounters an unexpected token. 100 class ParsingException : Exception 101 { 102 mixin basicExceptionCtors; 103 } 104 105 package(quantities): 106 107 QVariant!N parseQuantityImpl(N, alias numberParser, S)(S input, SymbolList!N symbolList) 108 if (isSomeString!S) 109 { 110 import std.range.primitives : empty; 111 112 N value; 113 try 114 value = numberParser(input); 115 catch (Exception) 116 value = 1; 117 118 if (input.empty) 119 return QVariant!N(value, Dimensions.init); 120 121 auto parser = QuantityParser!(N, S)(input, symbolList); 122 return value * parser.parsedQuantity(); 123 } 124 125 // A parser that can parse a text for a unit or a quantity 126 struct QuantityParser(N, S) 127 if (isNumeric!N && isSomeString!S) 128 { 129 import std.conv : to; 130 import std.exception : enforce; 131 import std.format : format; 132 import std.range.primitives : empty, front, popFront; 133 134 private 135 { 136 S input; 137 SymbolList!N symbolList; 138 Token[] tokens; 139 } 140 141 this(S input, SymbolList!N symbolList) 142 { 143 this.input = input; 144 this.symbolList = symbolList; 145 lex(input); 146 } 147 148 QVariant!N parsedQuantity() 149 { 150 return parseCompoundUnit(); 151 } 152 153 QVariant!N parseCompoundUnit(bool inParens = false) 154 { 155 QVariant!N ret = parseExponentUnit(); 156 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 157 return ret; 158 159 do 160 { 161 check(); 162 auto cur = tokens.front; 163 164 bool multiply = true; 165 if (cur.type == Tok.div) 166 multiply = false; 167 168 if (cur.type == Tok.mul || cur.type == Tok.div) 169 { 170 advance(); 171 check(); 172 cur = tokens.front; 173 } 174 175 QVariant!N rhs = parseExponentUnit(); 176 if (multiply) 177 ret *= rhs; 178 else 179 ret /= rhs; 180 181 if (tokens.empty || (inParens && tokens.front.type == Tok.rparen)) 182 break; 183 184 cur = tokens.front; 185 } 186 while (!tokens.empty); 187 188 return ret; 189 } 190 191 QVariant!N parseExponentUnit() 192 { 193 QVariant!N ret = parseUnit(); 194 195 // If no exponent is found 196 if (tokens.empty) 197 return ret; 198 199 // The next token should be '^', an integer or a superior integer 200 auto next = tokens.front; 201 if (next.type != Tok.exp && next.type != Tok.integer && next.type != Tok.supinteger) 202 return ret; 203 204 // Skip the '^' if present, and expect an integer 205 if (next.type == Tok.exp) 206 advance(Tok.integer); 207 208 Rational r = parseRationalOrInteger(); 209 return ret ^^ r; 210 } 211 212 Rational parseRationalOrInteger() 213 { 214 int num = parseInteger(); 215 int den = 1; 216 if (tokens.length && tokens.front.type == Tok.div) 217 { 218 advance(); 219 den = parseInteger(); 220 } 221 return Rational(num, den); 222 } 223 224 int parseInteger() 225 { 226 check(Tok.integer, Tok.supinteger); 227 int n = tokens.front.integer; 228 if (tokens.length) 229 advance(); 230 return n; 231 } 232 233 QVariant!N parseUnit() 234 { 235 if (!tokens.length) 236 return QVariant!N(1, Dimensions.init); 237 238 if (tokens.front.type == Tok.lparen) 239 { 240 advance(); 241 auto ret = parseCompoundUnit(true); 242 check(Tok.rparen); 243 advance(); 244 return ret; 245 } 246 else 247 return parsePrefixUnit(); 248 } 249 250 QVariant!N parsePrefixUnit() 251 { 252 check(Tok.symbol); 253 auto str = input[tokens.front.begin .. tokens.front.end].to!string; 254 if (tokens.length) 255 advance(); 256 257 // Try a standalone unit symbol (no prefix) 258 auto uptr = str in symbolList.units; 259 if (uptr) 260 return *uptr; 261 262 // Try with prefixes, the longest prefix first 263 N* factor; 264 for (size_t i = symbolList.maxPrefixLength; i > 0; i--) 265 { 266 if (str.length >= i) 267 { 268 string prefix = str[0 .. i].to!string; 269 factor = prefix in symbolList.prefixes; 270 if (factor) 271 { 272 string unit = str[i .. $].to!string; 273 enforce!ParsingException(unit.length, 274 "Expecting a unit after the prefix " ~ prefix); 275 uptr = unit in symbolList.units; 276 if (uptr) 277 return *factor * *uptr; 278 } 279 } 280 } 281 282 throw new ParsingException("Unknown unit symbol: '%s'".format(str)); 283 } 284 285 enum Tok 286 { 287 none, 288 symbol, 289 mul, 290 div, 291 exp, 292 integer, 293 supinteger, 294 rparen, 295 lparen 296 } 297 298 struct Token 299 { 300 Tok type; 301 size_t begin; 302 size_t end; 303 int integer = int.max; 304 } 305 306 void lex(S input) @safe 307 { 308 import std.array : appender; 309 import std.conv : parse; 310 import std.exception : enforce; 311 import std.utf : codeLength; 312 313 enum State 314 { 315 none, 316 symbol, 317 integer, 318 supinteger 319 } 320 321 auto tokapp = appender(tokens); 322 size_t i, j; 323 State state = State.none; 324 auto intapp = appender!string; 325 326 void pushToken(Tok type) 327 { 328 tokapp.put(Token(type, i, j)); 329 i = j; 330 state = State.none; 331 } 332 333 void pushInteger(Tok type) 334 { 335 int n; 336 auto slice = intapp.data; 337 try 338 { 339 n = parse!int(slice); 340 assert(slice.empty); 341 } 342 catch (Exception) 343 throw new ParsingException("Unexpected integer format: %s".format(slice)); 344 345 tokapp.put(Token(type, i, j, n)); 346 i = j; 347 state = State.none; 348 intapp = appender!string; 349 } 350 351 void push() 352 { 353 if (state == State.symbol) 354 pushToken(Tok.symbol); 355 else if (state == State.integer) 356 pushInteger(Tok.integer); 357 else if (state == State.supinteger) 358 pushInteger(Tok.supinteger); 359 } 360 361 foreach (dchar cur; input) 362 { 363 auto len = cur.codeLength!char; 364 switch (cur) 365 { 366 case ' ': 367 case '\t': 368 case '\u00A0': 369 case '\u2000': .. case '\u200A': 370 case '\u202F': 371 case '\u205F': 372 push(); 373 j += len; 374 i = j; 375 break; 376 377 case '(': 378 push(); 379 j += len; 380 pushToken(Tok.lparen); 381 break; 382 case ')': 383 push(); 384 j += len; 385 pushToken(Tok.rparen); 386 break; 387 388 case '*': // Asterisk 389 case '.': // Dot 390 case '\u00B7': // Middle dot (·) 391 case '\u00D7': // Multiplication sign (×) 392 case '\u2219': // Bullet operator (∙) 393 case '\u22C5': // Dot operator (⋅) 394 case '\u2022': // Bullet (•) 395 case '\u2715': // Multiplication X (✕) 396 push(); 397 j += len; 398 pushToken(Tok.mul); 399 break; 400 401 case '/': // Slash 402 case '\u00F7': // Division sign (÷) 403 case '\u2215': // Division slash (∕) 404 push(); 405 j += len; 406 pushToken(Tok.div); 407 break; 408 409 case '^': 410 push(); 411 j += len; 412 pushToken(Tok.exp); 413 break; 414 415 case '-': // Hyphen 416 case '\u2212': // Minus sign (−) 417 case '\u2012': // Figure dash (‒) 418 case '\u2013': // En dash (–) 419 intapp.put('-'); 420 goto PushIntChar; 421 case '+': // Plus sign 422 intapp.put('+'); 423 goto PushIntChar; 424 case '0': .. case '9': 425 intapp.put(cur); 426 PushIntChar: 427 if (state != State.integer) 428 push(); 429 state = State.integer; 430 j += len; 431 break; 432 433 case '⁰': 434 intapp.put('0'); 435 goto PushSupIntChar; 436 case '¹': 437 intapp.put('1'); 438 goto PushSupIntChar; 439 case '²': 440 intapp.put('2'); 441 goto PushSupIntChar; 442 case '³': 443 intapp.put('3'); 444 goto PushSupIntChar; 445 case '⁴': 446 intapp.put('4'); 447 goto PushSupIntChar; 448 case '⁵': 449 intapp.put('5'); 450 goto PushSupIntChar; 451 case '⁶': 452 intapp.put('6'); 453 goto PushSupIntChar; 454 case '⁷': 455 intapp.put('7'); 456 goto PushSupIntChar; 457 case '⁸': 458 intapp.put('8'); 459 goto PushSupIntChar; 460 case '⁹': 461 intapp.put('9'); 462 goto PushSupIntChar; 463 case '⁻': 464 intapp.put('-'); 465 goto PushSupIntChar; 466 case '⁺': 467 intapp.put('+'); 468 PushSupIntChar: 469 if (state != State.supinteger) 470 push(); 471 state = State.supinteger; 472 j += len; 473 break; 474 475 default: 476 if (state == State.integer || state == State.supinteger) 477 push(); 478 state = State.symbol; 479 j += len; 480 break; 481 } 482 } 483 push(); 484 tokens = tokapp.data; 485 } 486 487 void advance(Types...)(Types types) 488 { 489 enforce!ParsingException(!tokens.empty, "Unexpected end of input"); 490 tokens.popFront(); 491 492 static if (Types.length) 493 check(types); 494 } 495 496 void check() 497 { 498 enforce!ParsingException(tokens.length, "Unexpected end of input"); 499 } 500 501 void check(Tok tok) 502 { 503 check(); 504 enforce!ParsingException(tokens[0].type == tok, 505 format("Found '%s' while expecting %s", input[tokens[0].begin .. tokens[0].end], 506 tok)); 507 } 508 509 void check(Tok tok1, Tok tok2) 510 { 511 check(); 512 enforce!ParsingException(tokens[0].type == tok1 || tokens[0].type == tok2, 513 format("Found '%s' while expecting %s or %s", 514 input[tokens[0].begin .. tokens[0].end], tok1, tok2)); 515 } 516 } 517 518 // Tests 519 520 @("Generic parsing") 521 unittest 522 { 523 import std.exception : assertThrown; 524 525 auto meter = unit!double("L"); 526 auto kilogram = unit!double("M"); 527 auto second = unit!double("T"); 528 auto one = meter / meter; 529 auto unknown = one; 530 531 auto siSL = SymbolList!double().addUnit("m", meter).addUnit("kg", kilogram) 532 .addUnit("s", second).addPrefix("c", 0.01L).addPrefix("m", 0.001L); 533 534 bool checkParse(S, Q)(S input, Q quantity) 535 { 536 import std.conv : parse; 537 538 return parseQuantityImpl!(double, (ref s) => parse!double(s))(input, siSL) == quantity; 539 } 540 541 assert(checkParse("1 m ", meter)); 542 assert(checkParse("1m", meter)); 543 assert(checkParse("1 mm", 0.001 * meter)); 544 assert(checkParse("1 m2", meter * meter)); 545 assert(checkParse("1 m^-1", 1 / meter)); 546 assert(checkParse("1 m-1", 1 / meter)); 547 assert(checkParse("1 m^1/1", meter)); 548 assert(checkParse("1 m^-1/1", 1 / meter)); 549 assert(checkParse("1 m²", meter * meter)); 550 assert(checkParse("1 m⁺²", meter * meter)); 551 assert(checkParse("1 m⁻¹", 1 / meter)); 552 assert(checkParse("1 (m)", meter)); 553 assert(checkParse("1 (m^-1)", 1 / meter)); 554 assert(checkParse("1 ((m)^-1)^-1", meter)); 555 assert(checkParse("1 (s/(s/m))", meter)); 556 assert(checkParse("1 m*m", meter * meter)); 557 assert(checkParse("1 m m", meter * meter)); 558 assert(checkParse("1 m.m", meter * meter)); 559 assert(checkParse("1 m⋅m", meter * meter)); 560 assert(checkParse("1 m×m", meter * meter)); 561 assert(checkParse("1 m/m", meter / meter)); 562 assert(checkParse("1 m÷m", meter / meter)); 563 assert(checkParse("1 m.s", second * meter)); 564 assert(checkParse("1 m s", second * meter)); 565 assert(checkParse("1 m²s", meter * meter * second)); 566 assert(checkParse("1 m*m/m", meter)); 567 assert(checkParse("0.8 m⁰", 0.8 * one)); 568 assert(checkParse("0.8", 0.8 * one)); 569 assert(checkParse("0.8 ", 0.8 * one)); 570 571 assertThrown!ParsingException(checkParse("1 c m", unknown)); 572 assertThrown!ParsingException(checkParse("1 c", unknown)); 573 assertThrown!ParsingException(checkParse("1 Qm", unknown)); 574 assertThrown!ParsingException(checkParse("1 m + m", unknown)); 575 assertThrown!ParsingException(checkParse("1 m/", unknown)); 576 assertThrown!ParsingException(checkParse("1 m^", unknown)); 577 assertThrown!ParsingException(checkParse("1 m^m", unknown)); 578 assertThrown!ParsingException(checkParse("1 m ) m", unknown)); 579 assertThrown!ParsingException(checkParse("1 m * m) m", unknown)); 580 assertThrown!ParsingException(checkParse("1 m^²", unknown)); 581 assertThrown!ParsingException(checkParse("1-⁺⁵", unknown)); 582 }