This function scans a string according to Sect. 6 of RFC 8259.
The function is realized with a deterministic finite state machine derived from the grammar described in RFC 8259. Starting in state "init", the input is read and used to determined the next state. Only state "done" accepts the number. State "error" is a trap state to model errors. In the table below, "anything" means any character but the ones listed before.
The state machine is realized with one label per state (prefixed with "scan_number_") and goto
statements between them. The state machine contains cycles, but any cycle can be left when EOF is read. Therefore, the function is guaranteed to terminate.
During scanning, the read bytes are stored in token_buffer. This string is then converted to a signed integer, an unsigned integer, or a floating-point number.
7567 {
7568
7570
7571
7572 token_type number_type = token_type::value_unsigned;
7573
7574
7576 {
7577 case '-':
7578 {
7580 goto scan_number_minus;
7581 }
7582
7583 case '0':
7584 {
7586 goto scan_number_zero;
7587 }
7588
7589 case '1':
7590 case '2':
7591 case '3':
7592 case '4':
7593 case '5':
7594 case '6':
7595 case '7':
7596 case '8':
7597 case '9':
7598 {
7600 goto scan_number_any1;
7601 }
7602
7603
7604 default:
7605 JSON_ASSERT(false);
7606 }
7607
7608scan_number_minus:
7609
7610 number_type = token_type::value_integer;
7611
7612 switch (get())
7613 {
7614 case '0':
7615 {
7617 goto scan_number_zero;
7618 }
7619
7620 case '1':
7621 case '2':
7622 case '3':
7623 case '4':
7624 case '5':
7625 case '6':
7626 case '7':
7627 case '8':
7628 case '9':
7629 {
7631 goto scan_number_any1;
7632 }
7633
7634 default:
7635 {
7637 return token_type::parse_error;
7638 }
7639 }
7640
7641scan_number_zero:
7642
7643
7644 switch (get())
7645 {
7646 case '.':
7647 {
7649 goto scan_number_decimal1;
7650 }
7651
7652 case 'e':
7653 case 'E':
7654 {
7656 goto scan_number_exponent;
7657 }
7658
7659 default:
7660 goto scan_number_done;
7661 }
7662
7663scan_number_any1:
7664
7665
7666 switch (get())
7667 {
7668 case '0':
7669 case '1':
7670 case '2':
7671 case '3':
7672 case '4':
7673 case '5':
7674 case '6':
7675 case '7':
7676 case '8':
7677 case '9':
7678 {
7680 goto scan_number_any1;
7681 }
7682
7683 case '.':
7684 {
7686 goto scan_number_decimal1;
7687 }
7688
7689 case 'e':
7690 case 'E':
7691 {
7693 goto scan_number_exponent;
7694 }
7695
7696 default:
7697 goto scan_number_done;
7698 }
7699
7700scan_number_decimal1:
7701
7702 number_type = token_type::value_float;
7703
7704 switch (get())
7705 {
7706 case '0':
7707 case '1':
7708 case '2':
7709 case '3':
7710 case '4':
7711 case '5':
7712 case '6':
7713 case '7':
7714 case '8':
7715 case '9':
7716 {
7718 goto scan_number_decimal2;
7719 }
7720
7721 default:
7722 {
7724 return token_type::parse_error;
7725 }
7726 }
7727
7728scan_number_decimal2:
7729
7730
7731 switch (get())
7732 {
7733 case '0':
7734 case '1':
7735 case '2':
7736 case '3':
7737 case '4':
7738 case '5':
7739 case '6':
7740 case '7':
7741 case '8':
7742 case '9':
7743 {
7745 goto scan_number_decimal2;
7746 }
7747
7748 case 'e':
7749 case 'E':
7750 {
7752 goto scan_number_exponent;
7753 }
7754
7755 default:
7756 goto scan_number_done;
7757 }
7758
7759scan_number_exponent:
7760
7761 number_type = token_type::value_float;
7762
7763 switch (get())
7764 {
7765 case '+':
7766 case '-':
7767 {
7769 goto scan_number_sign;
7770 }
7771
7772 case '0':
7773 case '1':
7774 case '2':
7775 case '3':
7776 case '4':
7777 case '5':
7778 case '6':
7779 case '7':
7780 case '8':
7781 case '9':
7782 {
7784 goto scan_number_any2;
7785 }
7786
7787 default:
7788 {
7790 "invalid number; expected '+', '-', or digit after exponent";
7791 return token_type::parse_error;
7792 }
7793 }
7794
7795scan_number_sign:
7796
7797
7798 switch (get())
7799 {
7800 case '0':
7801 case '1':
7802 case '2':
7803 case '3':
7804 case '4':
7805 case '5':
7806 case '6':
7807 case '7':
7808 case '8':
7809 case '9':
7810 {
7812 goto scan_number_any2;
7813 }
7814
7815 default:
7816 {
7817 error_message =
"invalid number; expected digit after exponent sign";
7818 return token_type::parse_error;
7819 }
7820 }
7821
7822scan_number_any2:
7823
7824
7825 switch (get())
7826 {
7827 case '0':
7828 case '1':
7829 case '2':
7830 case '3':
7831 case '4':
7832 case '5':
7833 case '6':
7834 case '7':
7835 case '8':
7836 case '9':
7837 {
7839 goto scan_number_any2;
7840 }
7841
7842 default:
7843 goto scan_number_done;
7844 }
7845
7846scan_number_done:
7847
7848
7850 char *endptr = nullptr;
7851 errno = 0;
7852
7853
7854 if (number_type == token_type::value_unsigned)
7855 {
7856 const auto x = std::strtoull(
token_buffer.data(), &endptr, 10);
7857
7859
7860 if (errno == 0)
7861 {
7862 value_unsigned = static_cast<number_unsigned_t>(x);
7863
7864 if (value_unsigned == x)
7865 {
7866 return token_type::value_unsigned;
7867 }
7868 }
7869 }
7870
7871 else if (number_type == token_type::value_integer)
7872 {
7873 const auto x = std::strtoll(
token_buffer.data(), &endptr, 10);
7874
7876
7877 if (errno == 0)
7878 {
7879 value_integer = static_cast<number_integer_t>(x);
7880
7881 if (value_integer == x)
7882 {
7883 return token_type::value_integer;
7884 }
7885 }
7886 }
7887
7888
7889
7891
7893 return token_type::value_float;
7894 }
void add(char_int_type c)
add a character to token_buffer
void reset() noexcept
reset token_buffer; current character is beginning of token
char_int_type current
the current character
const char_int_type decimal_point_char
the decimal point
const char * error_message
a description of occurred lexer errors
void unget()
unget current character (read it again on next get)
string_t token_buffer
buffer for variable-length tokens (numbers, strings)