17 #include <initializer_list>
27 #include <unordered_map>
30 #if !defined(__cplusplus) || __cplusplus < 201703L
31 #error "Requires complete C++17 support"
43 template <
typename EF>
struct scope_exit {
44 explicit scope_exit(EF &&f)
45 : exit_function(std::move(f)), execute_on_destruction{
true} {}
47 scope_exit(scope_exit &&rhs)
48 : exit_function(std::move(rhs.exit_function)),
49 execute_on_destruction{rhs.execute_on_destruction} {
54 if (execute_on_destruction) { this->exit_function(); }
57 void release() { this->execute_on_destruction =
false; }
60 scope_exit(
const scope_exit &) =
delete;
61 void operator=(
const scope_exit &) =
delete;
62 scope_exit &operator=(scope_exit &&) =
delete;
65 bool execute_on_destruction;
72 inline size_t codepoint_length(
const char *s8,
size_t l) {
74 auto b =
static_cast<uint8_t
>(s8[0]);
75 if ((b & 0x80) == 0) {
77 }
else if ((b & 0xE0) == 0xC0 && l >= 2) {
79 }
else if ((b & 0xF0) == 0xE0 && l >= 3) {
81 }
else if ((b & 0xF8) == 0xF0 && l >= 4) {
88 inline size_t encode_codepoint(char32_t cp,
char *buff) {
90 buff[0] =
static_cast<char>(cp & 0x7F);
92 }
else if (cp < 0x0800) {
93 buff[0] =
static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
94 buff[1] =
static_cast<char>(0x80 | (cp & 0x3F));
96 }
else if (cp < 0xD800) {
97 buff[0] =
static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
98 buff[1] =
static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
99 buff[2] =
static_cast<char>(0x80 | (cp & 0x3F));
101 }
else if (cp < 0xE000) {
104 }
else if (cp < 0x10000) {
105 buff[0] =
static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
106 buff[1] =
static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
107 buff[2] =
static_cast<char>(0x80 | (cp & 0x3F));
109 }
else if (cp < 0x110000) {
110 buff[0] =
static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
111 buff[1] =
static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
112 buff[2] =
static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
113 buff[3] =
static_cast<char>(0x80 | (cp & 0x3F));
119 inline std::string encode_codepoint(char32_t cp) {
121 auto l = encode_codepoint(cp, buff);
122 return std::string(buff, l);
125 inline bool decode_codepoint(
const char *s8,
size_t l,
size_t &bytes,
128 auto b =
static_cast<uint8_t
>(s8[0]);
129 if ((b & 0x80) == 0) {
133 }
else if ((b & 0xE0) == 0xC0) {
136 cp = ((
static_cast<char32_t
>(s8[0] & 0x1F)) << 6) |
137 (
static_cast<char32_t
>(s8[1] & 0x3F));
140 }
else if ((b & 0xF0) == 0xE0) {
143 cp = ((
static_cast<char32_t
>(s8[0] & 0x0F)) << 12) |
144 ((
static_cast<char32_t
>(s8[1] & 0x3F)) << 6) |
145 (
static_cast<char32_t
>(s8[2] & 0x3F));
148 }
else if ((b & 0xF8) == 0xF0) {
151 cp = ((
static_cast<char32_t
>(s8[0] & 0x07)) << 18) |
152 ((
static_cast<char32_t
>(s8[1] & 0x3F)) << 12) |
153 ((
static_cast<char32_t
>(s8[2] & 0x3F)) << 6) |
154 (
static_cast<char32_t
>(s8[3] & 0x3F));
162 inline size_t decode_codepoint(
const char *s8,
size_t l, char32_t &out) {
164 if (decode_codepoint(s8, l, bytes, out)) {
return bytes; }
168 inline char32_t decode_codepoint(
const char *s8,
size_t l) {
170 decode_codepoint(s8, l, out);
174 inline std::u32string decode(
const char *s8,
size_t l) {
179 while (i < l && (s8[i] & 0xc0) == 0x80) {
182 out += decode_codepoint(&s8[beg], (i - beg));
191 inline std::string escape_characters(
const char *s,
size_t n) {
193 for (
size_t i = 0; i < n; i++) {
196 case '\n': str +=
"\\n";
break;
197 case '\r': str +=
"\\r";
break;
198 case '\t': str +=
"\\t";
break;
199 default: str += c;
break;
205 inline std::string escape_characters(std::string_view sv) {
206 return escape_characters(sv.data(), sv.size());
213 inline bool is_hex(
char c,
int &v) {
214 if (
'0' <= c && c <=
'9') {
217 }
else if (
'a' <= c && c <=
'f') {
220 }
else if (
'A' <= c && c <=
'F') {
227 inline bool is_digit(
char c,
int &v) {
228 if (
'0' <= c && c <=
'9') {
235 inline std::pair<int, size_t> parse_hex_number(
const char *s,
size_t n,
239 while (i < n && is_hex(s[i], val)) {
240 ret =
static_cast<int>(ret * 16 + val);
243 return std::pair(ret, i);
246 inline std::pair<int, size_t> parse_octal_number(
const char *s,
size_t n,
250 while (i < n && is_digit(s[i], val)) {
251 ret =
static_cast<int>(ret * 8 + val);
254 return std::pair(ret, i);
257 inline std::string resolve_escape_sequence(
const char *s,
size_t n) {
266 if (i == n) {
throw std::runtime_error(
"Invalid escape sequence..."); }
303 std::tie(cp, i) = parse_hex_number(s, n, i + 1);
304 r += encode_codepoint(cp);
309 std::tie(cp, i) = parse_octal_number(s, n, i);
310 r += encode_codepoint(cp);
329 Trie(
const Trie &) =
default;
331 Trie(
const std::vector<std::string> &items) {
332 for (
const auto &item : items) {
333 for (
size_t len = 1; len <= item.size(); len++) {
334 auto last = len == item.size();
335 std::string_view sv(item.data(), len);
336 auto it = dic_.find(sv);
337 if (it == dic_.end()) {
338 dic_.emplace(sv, Info{last, last});
340 it->second.match =
true;
342 it->second.done =
false;
348 size_t match(
const char *text,
size_t text_len)
const {
349 size_t match_len = 0;
352 while (!done && len <= text_len) {
353 std::string_view sv(text, len);
354 auto it = dic_.find(sv);
355 if (it == dic_.end()) {
358 if (it->second.match) { match_len = len; }
359 if (it->second.done) { done =
true; }
374 std::map<std::string, Info, std::less<>> dic_;
384 inline std::pair<size_t, size_t> line_info(
const char *start,
const char *cur) {
397 auto col = p - col_ptr + 1;
399 return std::pair(no, col);
405 inline constexpr
unsigned int str2tag_core(
const char *s,
size_t l,
408 : str2tag_core(s + 1, l - 1,
409 (h * 33) ^
static_cast<unsigned char>(*s));
412 inline constexpr
unsigned int str2tag(std::string_view sv) {
413 return str2tag_core(sv.data(), sv.size(), 0);
418 inline constexpr
unsigned int operator"" _(
const char *s,
size_t l) {
419 return str2tag_core(s, l, 0);
427 struct SemanticValues :
protected std::vector<std::any> {
429 const char *path =
nullptr;
430 const char *ss =
nullptr;
431 const std::vector<size_t> *source_line_index =
nullptr;
434 std::string_view sv()
const {
return sv_; }
437 const std::string &name()
const {
return name_; }
439 std::vector<unsigned int> tags;
442 std::pair<size_t, size_t> line_info()
const {
443 const auto &idx = *source_line_index;
445 auto cur =
static_cast<size_t>(std::distance(ss, sv_.data()));
446 auto it = std::lower_bound(
447 idx.begin(), idx.end(), cur,
448 [](
size_t element,
size_t value) { return element < value; });
450 auto id =
static_cast<size_t>(std::distance(idx.begin(), it));
451 auto off = cur - (
id == 0 ? 0 : idx[
id - 1] + 1);
452 return std::pair(
id + 1, off + 1);
456 size_t choice_count()
const {
return choice_count_; }
459 size_t choice()
const {
return choice_; }
462 std::vector<std::string_view> tokens;
464 std::string_view token(
size_t id = 0)
const {
465 if (tokens.empty()) {
return sv_; }
466 assert(
id < tokens.size());
471 std::string token_to_string(
size_t id = 0)
const {
472 return std::string(token(
id));
475 template <
typename T> T token_to_number()
const {
477 if constexpr (std::is_floating_point<T>::value) {
479 std::istringstream ss(token_to_string());
484 std::from_chars(sv.data(), sv.data() + sv.size(), n);
490 template <
typename T>
491 std::vector<T> transform(
size_t beg = 0,
492 size_t end =
static_cast<size_t>(-1))
const {
494 end = (std::min)(end, size());
495 for (
size_t i = beg; i < end; i++) {
496 r.emplace_back(std::any_cast<T>((*
this)[i]));
501 using std::vector<std::any>::iterator;
502 using std::vector<std::any>::const_iterator;
503 using std::vector<std::any>::size;
504 using std::vector<std::any>::empty;
505 using std::vector<std::any>::assign;
506 using std::vector<std::any>::begin;
507 using std::vector<std::any>::end;
508 using std::vector<std::any>::rbegin;
509 using std::vector<std::any>::rend;
510 using std::vector<std::any>::operator[];
511 using std::vector<std::any>::at;
512 using std::vector<std::any>::resize;
513 using std::vector<std::any>::front;
514 using std::vector<std::any>::back;
515 using std::vector<std::any>::push_back;
516 using std::vector<std::any>::pop_back;
517 using std::vector<std::any>::insert;
518 using std::vector<std::any>::erase;
519 using std::vector<std::any>::clear;
520 using std::vector<std::any>::swap;
521 using std::vector<std::any>::emplace;
522 using std::vector<std::any>::emplace_back;
525 friend class Context;
526 friend class Sequence;
527 friend class PrioritizedChoice;
529 friend class PrecedenceClimbing;
531 std::string_view sv_;
532 size_t choice_count_ = 0;
540 template <
typename F,
typename... Args> std::any call(F fn, Args &&... args) {
541 using R = decltype(fn(std::forward<Args>(args)...));
542 if constexpr (std::is_void<R>::value) {
543 fn(std::forward<Args>(args)...);
545 }
else if constexpr (std::is_same<
typename std::remove_cv<R>::type,
547 return fn(std::forward<Args>(args)...);
549 return std::any(fn(std::forward<Args>(args)...));
553 template <
typename T>
554 struct argument_count : argument_count<decltype(&T::operator())> {};
555 template <
typename R,
typename... Args>
556 struct argument_count<R (*)(Args...)>
557 : std::integral_constant<unsigned,
sizeof...(Args)> {};
558 template <
typename R,
typename C,
typename... Args>
559 struct argument_count<R (C::*)(Args...)>
560 : std::integral_constant<unsigned, sizeof...(Args)> {};
561 template <
typename R,
typename C,
typename... Args>
562 struct argument_count<R (C::*)(Args...) const>
563 : std::integral_constant<unsigned, sizeof...(Args)> {};
568 Action(Action &&rhs) =
default;
569 template <
typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
570 template <
typename F>
void operator=(F fn) { fn_ = make_adaptor(fn); }
571 Action &operator=(
const Action &rhs) =
default;
573 operator bool()
const {
return bool(fn_); }
575 std::any operator()(SemanticValues &vs, std::any &dt)
const {
580 using Fty = std::function<std::any(SemanticValues &vs, std::any &dt)>;
582 template <
typename F> Fty make_adaptor(F fn) {
583 if constexpr (argument_count<F>::value == 1) {
584 return [fn](
auto &vs,
auto & ) {
return call(fn, vs); };
586 return [fn](
auto &vs,
auto &dt) {
return call(fn, vs, dt); };
599 parse_error() =
default;
600 parse_error(
const char *s) : s_(s) {}
601 const char *what()
const {
return s_.empty() ? nullptr : s_.data(); }
610 inline bool success(
size_t len) {
return len !=
static_cast<size_t>(-1); }
612 inline bool fail(
size_t len) {
return len ==
static_cast<size_t>(-1); }
617 using Log = std::function<void(
size_t,
size_t,
const std::string &)>;
623 const char *error_pos =
nullptr;
624 std::vector<std::pair<const char *, bool>> expected_tokens;
625 const char *message_pos =
nullptr;
630 expected_tokens.clear();
631 message_pos =
nullptr;
635 void add(
const char *token,
bool is_literal) {
636 for (
const auto &x : expected_tokens) {
637 if (x.first == token && x.second == is_literal) {
return; }
639 expected_tokens.push_back(std::make_pair(token, is_literal));
642 void output_log(
const Log &log,
const char *s,
size_t n)
const {
644 auto line = line_info(s, message_pos);
646 if (
auto unexpected_token = heuristic_error_token(s, n, message_pos);
647 !unexpected_token.empty()) {
648 msg = replace_all(message,
"%t", unexpected_token);
652 log(line.first, line.second, msg);
653 }
else if (error_pos) {
654 auto line = line_info(s, error_pos);
657 if (expected_tokens.empty()) {
658 msg =
"syntax error.";
660 msg =
"syntax error";
663 if (
auto unexpected_token = heuristic_error_token(s, n, error_pos);
664 !unexpected_token.empty()) {
665 msg +=
", unexpected '";
666 msg += unexpected_token;
670 auto first_item =
true;
672 while (i < expected_tokens.size()) {
673 auto [token, is_literal] =
674 expected_tokens[expected_tokens.size() - i - 1];
677 if (!is_literal && token[0] !=
'_') {
678 msg += (first_item ?
", expecting " :
", ");
696 log(line.first, line.second, msg);
701 std::string heuristic_error_token(
const char *s,
size_t n,
702 const char *error_pos)
const {
703 auto len = n - std::distance(s, error_pos);
706 int c = error_pos[i++];
707 if (!std::ispunct(c) && !std::isspace(c)) {
708 while (i < len && !std::ispunct(error_pos[i]) &&
709 !std::isspace(error_pos[i])) {
713 return escape_characters(error_pos, std::min<size_t>(i, 8));
715 return std::string();
718 std::string replace_all(std::string str,
const std::string &from,
719 const std::string &to)
const {
721 while ((pos = str.find(from, pos)) != std::string::npos) {
722 str.replace(pos, from.length(), to);
736 using TracerEnter = std::function<void(
const Ope &name,
const char *s,
size_t n,
737 const SemanticValues &vs,
738 const Context &c,
const std::any &dt)>;
740 using TracerLeave = std::function<void(
741 const Ope &ope,
const char *s,
size_t n,
const SemanticValues &vs,
742 const Context &c,
const std::any &dt,
size_t)>;
749 std::vector<size_t> source_line_index;
751 ErrorInfo error_info;
752 bool recovered =
false;
754 std::vector<std::shared_ptr<SemanticValues>> value_stack;
755 size_t value_stack_size = 0;
757 std::vector<Definition *> rule_stack;
758 std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
760 size_t in_token_boundary_count = 0;
762 std::shared_ptr<Ope> whitespaceOpe;
763 bool in_whitespace =
false;
765 std::shared_ptr<Ope> wordOpe;
767 std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
768 size_t capture_scope_stack_size = 0;
770 const size_t def_count;
771 const bool enablePackratParsing;
772 std::vector<bool> cache_registered;
773 std::vector<bool> cache_success;
775 std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
778 TracerEnter tracer_enter;
779 TracerLeave tracer_leave;
783 Context(
const char *path,
const char *s,
size_t l,
size_t def_count,
784 std::shared_ptr<Ope> whitespaceOpe, std::shared_ptr<Ope> wordOpe,
785 bool enablePackratParsing, TracerEnter tracer_enter,
786 TracerLeave tracer_leave, Log log)
787 : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe),
788 def_count(def_count), enablePackratParsing(enablePackratParsing),
789 cache_registered(enablePackratParsing ? def_count * (l + 1) : 0),
790 cache_success(enablePackratParsing ? def_count * (l + 1) : 0),
791 tracer_enter(tracer_enter), tracer_leave(tracer_leave), log(log) {
793 for (
size_t pos = 0; pos < l; pos++) {
794 if (s[pos] ==
'\n') { source_line_index.push_back(pos); }
796 source_line_index.push_back(l);
798 args_stack.resize(1);
800 push_capture_scope();
803 ~Context() { assert(!value_stack_size); }
805 Context(
const Context &) =
delete;
806 Context(Context &&) =
delete;
807 Context operator=(
const Context &) =
delete;
809 template <
typename T>
810 void packrat(
const char *a_s,
size_t def_id,
size_t &len, std::any &val,
812 if (!enablePackratParsing) {
818 auto idx = def_count *
static_cast<size_t>(col) + def_id;
820 if (cache_registered[idx]) {
821 if (cache_success[idx]) {
822 auto key = std::pair(col, def_id);
823 std::tie(len, val) = cache_values[key];
826 len =
static_cast<size_t>(-1);
831 cache_registered[idx] =
true;
832 cache_success[idx] = success(len);
834 auto key = std::pair(col, def_id);
835 cache_values[key] = std::pair(len, val);
841 SemanticValues &push() {
842 assert(value_stack_size <= value_stack.size());
843 if (value_stack_size == value_stack.size()) {
844 value_stack.emplace_back(std::make_shared<SemanticValues>());
846 auto &vs = *value_stack[value_stack_size];
849 if (!vs.tags.empty()) { vs.tags.clear(); }
851 vs.sv_ = std::string_view();
852 vs.choice_count_ = 0;
854 if (!vs.tokens.empty()) { vs.tokens.clear(); }
857 auto &vs = *value_stack[value_stack_size++];
860 vs.source_line_index = &source_line_index;
864 void pop() { value_stack_size--; }
866 void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
867 args_stack.emplace_back(args);
870 void pop_args() { args_stack.pop_back(); }
872 const std::vector<std::shared_ptr<Ope>> &top_args()
const {
873 return args_stack[args_stack.size() - 1];
876 void push_capture_scope() {
877 assert(capture_scope_stack_size <= capture_scope_stack.size());
878 if (capture_scope_stack_size == capture_scope_stack.size()) {
879 capture_scope_stack.emplace_back(
880 std::map<std::string_view, std::string>());
882 auto &cs = capture_scope_stack[capture_scope_stack_size];
883 if (!cs.empty()) { cs.clear(); }
885 capture_scope_stack_size++;
888 void pop_capture_scope() { capture_scope_stack_size--; }
890 void shift_capture_values() {
891 assert(capture_scope_stack.size() >= 2);
892 auto curr = &capture_scope_stack[capture_scope_stack_size - 1];
893 auto prev = curr - 1;
894 for (
const auto &kv : *curr) {
895 (*prev)[kv.first] = kv.second;
899 void set_error_pos(
const char *a_s,
const char *literal =
nullptr);
902 void trace_enter(
const Ope &ope,
const char *a_s,
size_t n,
903 SemanticValues &vs, std::any &dt)
const;
905 void trace_leave(
const Ope &ope,
const char *a_s,
size_t n,
906 SemanticValues &vs, std::any &dt,
size_t len)
const;
907 bool is_traceable(
const Ope &ope)
const;
909 mutable size_t next_trace_id = 0;
910 mutable std::list<size_t> trace_ids;
921 size_t parse(
const char *s,
size_t n, SemanticValues &vs, Context &c,
923 virtual size_t parse_core(
const char *s,
size_t n, SemanticValues &vs,
924 Context &c, std::any &dt)
const = 0;
925 virtual void accept(Visitor &v) = 0;
928 class Sequence :
public Ope {
930 template <
typename... Args>
931 Sequence(
const Args &... args)
932 : opes_{
static_cast<std::shared_ptr<Ope>
>(args)...} {}
933 Sequence(
const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
934 Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
936 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
937 std::any &dt)
const override {
938 auto &chldsv = c.push();
939 auto pop_se = scope_exit([&]() { c.pop(); });
941 for (
const auto &ope : opes_) {
942 const auto &rule = *ope;
943 auto len = rule.parse(s + i, n - i, chldsv, c, dt);
944 if (fail(len)) {
return len; }
947 if (!chldsv.empty()) {
948 for (
size_t j = 0; j < chldsv.size(); j++) {
949 vs.emplace_back(std::move(chldsv[j]));
952 if (!chldsv.tags.empty()) {
953 for (
size_t j = 0; j < chldsv.tags.size(); j++) {
954 vs.tags.emplace_back(std::move(chldsv.tags[j]));
958 if (!chldsv.tokens.empty()) {
959 for (
size_t j = 0; j < chldsv.tokens.size(); j++) {
960 vs.tokens.emplace_back(std::move(chldsv.tokens[j]));
966 void accept(Visitor &v)
override;
968 std::vector<std::shared_ptr<Ope>> opes_;
971 class PrioritizedChoice :
public Ope {
973 template <
typename... Args>
974 PrioritizedChoice(
const Args &... args)
975 : opes_{
static_cast<std::shared_ptr<Ope>
>(args)...} {}
976 PrioritizedChoice(
const std::vector<std::shared_ptr<Ope>> &opes)
978 PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
980 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
981 std::any &dt)
const override {
983 for (
const auto &ope : opes_) {
984 auto &chldsv = c.push();
985 c.push_capture_scope();
986 auto se = scope_exit([&]() {
988 c.pop_capture_scope();
991 auto len = ope->parse(s, n, chldsv, c, dt);
993 if (!chldsv.empty()) {
994 for (
size_t i = 0; i < chldsv.size(); i++) {
995 vs.emplace_back(std::move(chldsv[i]));
998 if (!chldsv.tags.empty()) {
999 for (
size_t i = 0; i < chldsv.tags.size(); i++) {
1000 vs.tags.emplace_back(std::move(chldsv.tags[i]));
1003 vs.sv_ = chldsv.sv_;
1004 vs.choice_count_ = opes_.size();
1006 if (!chldsv.tokens.empty()) {
1007 for (
size_t i = 0; i < chldsv.tokens.size(); i++) {
1008 vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
1012 c.shift_capture_values();
1018 return static_cast<size_t>(-1);
1021 void accept(Visitor &v)
override;
1023 size_t size()
const {
return opes_.size(); }
1025 std::vector<std::shared_ptr<Ope>> opes_;
1028 class Repetition :
public Ope {
1030 Repetition(
const std::shared_ptr<Ope> &ope,
size_t min,
size_t max)
1031 : ope_(ope), min_(min), max_(max) {}
1033 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1034 std::any &dt)
const override {
1037 while (count < min_) {
1038 c.push_capture_scope();
1039 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1040 const auto &rule = *ope_;
1041 auto len = rule.parse(s + i, n - i, vs, c, dt);
1043 c.shift_capture_values();
1051 while (n - i > 0 && count < max_) {
1052 c.push_capture_scope();
1053 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1054 auto save_sv_size = vs.size();
1055 auto save_tok_size = vs.tokens.size();
1056 const auto &rule = *ope_;
1057 auto len = rule.parse(s + i, n - i, vs, c, dt);
1059 c.shift_capture_values();
1061 if (vs.size() != save_sv_size) {
1062 vs.erase(vs.begin() +
static_cast<std::ptrdiff_t
>(save_sv_size));
1063 vs.tags.erase(vs.tags.begin() +
1064 static_cast<std::ptrdiff_t
>(save_sv_size));
1066 if (vs.tokens.size() != save_tok_size) {
1067 vs.tokens.erase(vs.tokens.begin() +
1068 static_cast<std::ptrdiff_t
>(save_tok_size));
1078 void accept(Visitor &v)
override;
1080 bool is_zom()
const {
1081 return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1084 static std::shared_ptr<Repetition> zom(
const std::shared_ptr<Ope> &ope) {
1085 return std::make_shared<Repetition>(ope, 0,
1086 std::numeric_limits<size_t>::max());
1089 static std::shared_ptr<Repetition> oom(
const std::shared_ptr<Ope> &ope) {
1090 return std::make_shared<Repetition>(ope, 1,
1091 std::numeric_limits<size_t>::max());
1094 static std::shared_ptr<Repetition> opt(
const std::shared_ptr<Ope> &ope) {
1095 return std::make_shared<Repetition>(ope, 0, 1);
1098 std::shared_ptr<Ope> ope_;
1103 class AndPredicate :
public Ope {
1105 AndPredicate(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1107 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1108 Context &c, std::any &dt)
const override {
1109 auto &chldsv = c.push();
1110 c.push_capture_scope();
1111 auto se = scope_exit([&]() {
1113 c.pop_capture_scope();
1115 const auto &rule = *ope_;
1116 auto len = rule.parse(s, n, chldsv, c, dt);
1124 void accept(Visitor &v)
override;
1126 std::shared_ptr<Ope> ope_;
1129 class NotPredicate :
public Ope {
1131 NotPredicate(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1133 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1134 Context &c, std::any &dt)
const override {
1135 auto &chldsv = c.push();
1136 c.push_capture_scope();
1137 auto se = scope_exit([&]() {
1139 c.pop_capture_scope();
1141 auto len = ope_->parse(s, n, chldsv, c, dt);
1144 return static_cast<size_t>(-1);
1150 void accept(Visitor &v)
override;
1152 std::shared_ptr<Ope> ope_;
1155 class Dictionary :
public Ope,
public std::enable_shared_from_this<Dictionary> {
1157 Dictionary(
const std::vector<std::string> &v) : trie_(v) {}
1159 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1160 std::any &dt)
const override;
1162 void accept(Visitor &v)
override;
1167 class LiteralString :
public Ope,
1168 public std::enable_shared_from_this<LiteralString> {
1170 LiteralString(std::string &&s,
bool ignore_case)
1171 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1173 LiteralString(
const std::string &s,
bool ignore_case)
1174 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1176 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1177 std::any &dt)
const override;
1179 void accept(Visitor &v)
override;
1183 mutable std::once_flag init_is_word_;
1184 mutable bool is_word_;
1187 class CharacterClass :
public Ope,
1188 public std::enable_shared_from_this<CharacterClass> {
1190 CharacterClass(
const std::string &s,
bool negated) : negated_(negated) {
1191 auto chars = decode(s.data(), s.length());
1193 while (i < chars.size()) {
1194 if (i + 2 < chars.size() && chars[i + 1] ==
'-') {
1195 auto cp1 = chars[i];
1196 auto cp2 = chars[i + 2];
1197 ranges_.emplace_back(std::pair(cp1, cp2));
1201 ranges_.emplace_back(std::pair(cp, cp));
1205 assert(!ranges_.empty());
1208 CharacterClass(
const std::vector<std::pair<char32_t, char32_t>> &ranges,
1210 : ranges_(ranges), negated_(negated) {
1211 assert(!ranges_.empty());
1214 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1215 Context &c, std::any & )
const override {
1218 return static_cast<size_t>(-1);
1222 auto len = decode_codepoint(s, n, cp);
1224 for (
const auto &range : ranges_) {
1225 if (range.first <= cp && cp <= range.second) {
1228 return static_cast<size_t>(-1);
1239 return static_cast<size_t>(-1);
1243 void accept(Visitor &v)
override;
1245 std::vector<std::pair<char32_t, char32_t>> ranges_;
1249 class Character :
public Ope,
public std::enable_shared_from_this<Character> {
1251 Character(
char ch) : ch_(ch) {}
1253 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1254 Context &c, std::any & )
const override {
1255 if (n < 1 || s[0] != ch_) {
1257 return static_cast<size_t>(-1);
1262 void accept(Visitor &v)
override;
1267 class AnyCharacter :
public Ope,
1268 public std::enable_shared_from_this<AnyCharacter> {
1270 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1271 Context &c, std::any & )
const override {
1272 auto len = codepoint_length(s, n);
1275 return static_cast<size_t>(-1);
1280 void accept(Visitor &v)
override;
1283 class CaptureScope :
public Ope {
1285 CaptureScope(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1287 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1288 std::any &dt)
const override {
1289 c.push_capture_scope();
1290 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1291 const auto &rule = *ope_;
1292 auto len = rule.parse(s, n, vs, c, dt);
1296 void accept(Visitor &v)
override;
1298 std::shared_ptr<Ope> ope_;
1301 class Capture :
public Ope {
1303 using MatchAction = std::function<void(
const char *s,
size_t n, Context &c)>;
1305 Capture(
const std::shared_ptr<Ope> &ope, MatchAction ma)
1306 : ope_(ope), match_action_(ma) {}
1308 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1309 std::any &dt)
const override {
1310 const auto &rule = *ope_;
1311 auto len = rule.parse(s, n, vs, c, dt);
1312 if (success(len) && match_action_) { match_action_(s, len, c); }
1316 void accept(Visitor &v)
override;
1318 std::shared_ptr<Ope> ope_;
1319 MatchAction match_action_;
1322 class TokenBoundary :
public Ope {
1324 TokenBoundary(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1326 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1327 std::any &dt)
const override;
1329 void accept(Visitor &v)
override;
1331 std::shared_ptr<Ope> ope_;
1334 class Ignore :
public Ope {
1336 Ignore(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1338 size_t parse_core(
const char *s,
size_t n, SemanticValues & ,
1339 Context &c, std::any &dt)
const override {
1340 const auto &rule = *ope_;
1341 auto &chldsv = c.push();
1342 auto se = scope_exit([&]() { c.pop(); });
1343 return rule.parse(s, n, chldsv, c, dt);
1346 void accept(Visitor &v)
override;
1348 std::shared_ptr<Ope> ope_;
1351 using Parser = std::function<size_t(
const char *s,
size_t n, SemanticValues &vs,
1354 class User :
public Ope {
1356 User(Parser fn) : fn_(fn) {}
1357 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs,
1358 Context & , std::any &dt)
const override {
1360 return fn_(s, n, vs, dt);
1362 void accept(Visitor &v)
override;
1363 std::function<size_t(
const char *s,
size_t n, SemanticValues &vs,
1368 class WeakHolder :
public Ope {
1370 WeakHolder(
const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1372 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1373 std::any &dt)
const override {
1374 auto ope = weak_.lock();
1376 const auto &rule = *ope;
1377 return rule.parse(s, n, vs, c, dt);
1380 void accept(Visitor &v)
override;
1382 std::weak_ptr<Ope> weak_;
1385 class Holder :
public Ope {
1387 Holder(Definition *outer) : outer_(outer) {}
1389 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1390 std::any &dt)
const override;
1392 void accept(Visitor &v)
override;
1394 std::any reduce(SemanticValues &vs, std::any &dt)
const;
1396 const char *trace_name()
const;
1398 std::shared_ptr<Ope> ope_;
1400 mutable std::string trace_name_;
1402 friend class Definition;
1405 using Grammar = std::unordered_map<std::string, Definition>;
1407 class Reference :
public Ope,
public std::enable_shared_from_this<Reference> {
1409 Reference(
const Grammar &grammar,
const std::string &name,
const char *s,
1410 bool is_macro,
const std::vector<std::shared_ptr<Ope>> &args)
1411 : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1412 rule_(nullptr), iarg_(0) {}
1414 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1415 std::any &dt)
const override;
1417 void accept(Visitor &v)
override;
1419 std::shared_ptr<Ope> get_core_operator()
const;
1421 const Grammar &grammar_;
1422 const std::string name_;
1425 const bool is_macro_;
1426 const std::vector<std::shared_ptr<Ope>> args_;
1432 class Whitespace :
public Ope {
1434 Whitespace(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1436 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1437 std::any &dt)
const override {
1438 if (c.in_whitespace) {
return 0; }
1439 c.in_whitespace =
true;
1440 auto se = scope_exit([&]() { c.in_whitespace =
false; });
1441 const auto &rule = *ope_;
1442 return rule.parse(s, n, vs, c, dt);
1445 void accept(Visitor &v)
override;
1447 std::shared_ptr<Ope> ope_;
1450 class BackReference :
public Ope {
1452 BackReference(std::string &&name) : name_(name) {}
1454 BackReference(
const std::string &name) : name_(name) {}
1456 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1457 std::any &dt)
const override;
1459 void accept(Visitor &v)
override;
1464 class PrecedenceClimbing :
public Ope {
1466 using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1468 PrecedenceClimbing(
const std::shared_ptr<Ope> &atom,
1469 const std::shared_ptr<Ope> &binop,
const BinOpeInfo &info,
1470 const Definition &rule)
1471 : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1473 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1474 std::any &dt)
const override {
1475 return parse_expression(s, n, vs, c, dt, 0);
1478 void accept(Visitor &v)
override;
1480 std::shared_ptr<Ope> atom_;
1481 std::shared_ptr<Ope> binop_;
1483 const Definition &rule_;
1486 size_t parse_expression(
const char *s,
size_t n, SemanticValues &vs,
1487 Context &c, std::any &dt,
size_t min_prec)
const;
1489 Definition &get_reference_for_binop(Context &c)
const;
1492 class Recovery :
public Ope {
1494 Recovery(
const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1496 size_t parse_core(
const char *s,
size_t n, SemanticValues &vs, Context &c,
1497 std::any &dt)
const override;
1499 void accept(Visitor &v)
override;
1501 std::shared_ptr<Ope> ope_;
1507 template <
typename... Args> std::shared_ptr<Ope> seq(Args &&... args) {
1508 return std::make_shared<Sequence>(
static_cast<std::shared_ptr<Ope>
>(args)...);
1511 template <
typename... Args> std::shared_ptr<Ope> cho(Args &&... args) {
1512 return std::make_shared<PrioritizedChoice>(
1513 static_cast<std::shared_ptr<Ope>
>(args)...);
1516 inline std::shared_ptr<Ope> zom(
const std::shared_ptr<Ope> &ope) {
1517 return Repetition::zom(ope);
1520 inline std::shared_ptr<Ope> oom(
const std::shared_ptr<Ope> &ope) {
1521 return Repetition::oom(ope);
1524 inline std::shared_ptr<Ope> opt(
const std::shared_ptr<Ope> &ope) {
1525 return Repetition::opt(ope);
1528 inline std::shared_ptr<Ope> rep(
const std::shared_ptr<Ope> &ope,
size_t min,
1530 return std::make_shared<Repetition>(ope, min, max);
1533 inline std::shared_ptr<Ope> apd(
const std::shared_ptr<Ope> &ope) {
1534 return std::make_shared<AndPredicate>(ope);
1537 inline std::shared_ptr<Ope> npd(
const std::shared_ptr<Ope> &ope) {
1538 return std::make_shared<NotPredicate>(ope);
1541 inline std::shared_ptr<Ope> dic(
const std::vector<std::string> &v) {
1542 return std::make_shared<Dictionary>(v);
1545 inline std::shared_ptr<Ope> lit(std::string &&s) {
1546 return std::make_shared<LiteralString>(s,
false);
1549 inline std::shared_ptr<Ope> liti(std::string &&s) {
1550 return std::make_shared<LiteralString>(s,
true);
1553 inline std::shared_ptr<Ope> cls(
const std::string &s) {
1554 return std::make_shared<CharacterClass>(s,
false);
1557 inline std::shared_ptr<Ope>
1558 cls(
const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1559 return std::make_shared<CharacterClass>(ranges,
false);
1562 inline std::shared_ptr<Ope> ncls(
const std::string &s) {
1563 return std::make_shared<CharacterClass>(s,
true);
1566 inline std::shared_ptr<Ope>
1567 ncls(
const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1568 return std::make_shared<CharacterClass>(ranges,
true);
1571 inline std::shared_ptr<Ope> chr(
char dt) {
1572 return std::make_shared<Character>(dt);
1575 inline std::shared_ptr<Ope> dot() {
return std::make_shared<AnyCharacter>(); }
1577 inline std::shared_ptr<Ope> csc(
const std::shared_ptr<Ope> &ope) {
1578 return std::make_shared<CaptureScope>(ope);
1581 inline std::shared_ptr<Ope> cap(
const std::shared_ptr<Ope> &ope,
1582 Capture::MatchAction ma) {
1583 return std::make_shared<Capture>(ope, ma);
1586 inline std::shared_ptr<Ope> tok(
const std::shared_ptr<Ope> &ope) {
1587 return std::make_shared<TokenBoundary>(ope);
1590 inline std::shared_ptr<Ope> ign(
const std::shared_ptr<Ope> &ope) {
1591 return std::make_shared<Ignore>(ope);
1594 inline std::shared_ptr<Ope>
1595 usr(std::function<
size_t(
const char *s,
size_t n, SemanticValues &vs,
1598 return std::make_shared<User>(fn);
1601 inline std::shared_ptr<Ope> ref(
const Grammar &grammar,
const std::string &name,
1602 const char *s,
bool is_macro,
1603 const std::vector<std::shared_ptr<Ope>> &args) {
1604 return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1607 inline std::shared_ptr<Ope> wsp(
const std::shared_ptr<Ope> &ope) {
1608 return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1611 inline std::shared_ptr<Ope> bkr(std::string &&name) {
1612 return std::make_shared<BackReference>(name);
1615 inline std::shared_ptr<Ope> pre(
const std::shared_ptr<Ope> &atom,
1616 const std::shared_ptr<Ope> &binop,
1617 const PrecedenceClimbing::BinOpeInfo &info,
1618 const Definition &rule) {
1619 return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1622 inline std::shared_ptr<Ope> rec(
const std::shared_ptr<Ope> &ope) {
1623 return std::make_shared<Recovery>(ope);
1629 struct Ope::Visitor {
1630 virtual ~Visitor() {}
1631 virtual void visit(Sequence &) {}
1632 virtual void visit(PrioritizedChoice &) {}
1633 virtual void visit(Repetition &) {}
1634 virtual void visit(AndPredicate &) {}
1635 virtual void visit(NotPredicate &) {}
1636 virtual void visit(Dictionary &) {}
1637 virtual void visit(LiteralString &) {}
1638 virtual void visit(CharacterClass &) {}
1639 virtual void visit(Character &) {}
1640 virtual void visit(AnyCharacter &) {}
1641 virtual void visit(CaptureScope &) {}
1642 virtual void visit(Capture &) {}
1643 virtual void visit(TokenBoundary &) {}
1644 virtual void visit(Ignore &) {}
1645 virtual void visit(User &) {}
1646 virtual void visit(WeakHolder &) {}
1647 virtual void visit(Holder &) {}
1648 virtual void visit(Reference &) {}
1649 virtual void visit(Whitespace &) {}
1650 virtual void visit(BackReference &) {}
1651 virtual void visit(PrecedenceClimbing &) {}
1652 virtual void visit(Recovery &) {}
1655 struct IsReference :
public Ope::Visitor {
1656 void visit(Reference &)
override { is_reference_ =
true; }
1658 static bool check(Ope &ope) {
1661 return vis.is_reference_;
1665 bool is_reference_ =
false;
1668 struct TraceOpeName :
public Ope::Visitor {
1669 void visit(Sequence &)
override { name_ =
"Sequence"; }
1670 void visit(PrioritizedChoice &)
override { name_ =
"PrioritizedChoice"; }
1671 void visit(Repetition &)
override { name_ =
"Repetition"; }
1672 void visit(AndPredicate &)
override { name_ =
"AndPredicate"; }
1673 void visit(NotPredicate &)
override { name_ =
"NotPredicate"; }
1674 void visit(Dictionary &)
override { name_ =
"Dictionary"; }
1675 void visit(LiteralString &)
override { name_ =
"LiteralString"; }
1676 void visit(CharacterClass &)
override { name_ =
"CharacterClass"; }
1677 void visit(Character &)
override { name_ =
"Character"; }
1678 void visit(AnyCharacter &)
override { name_ =
"AnyCharacter"; }
1679 void visit(CaptureScope &)
override { name_ =
"CaptureScope"; }
1680 void visit(Capture &)
override { name_ =
"Capture"; }
1681 void visit(TokenBoundary &)
override { name_ =
"TokenBoundary"; }
1682 void visit(Ignore &)
override { name_ =
"Ignore"; }
1683 void visit(User &)
override { name_ =
"User"; }
1684 void visit(WeakHolder &)
override { name_ =
"WeakHolder"; }
1685 void visit(Holder &ope)
override { name_ = ope.trace_name(); }
1686 void visit(Reference &)
override { name_ =
"Reference"; }
1687 void visit(Whitespace &)
override { name_ =
"Whitespace"; }
1688 void visit(BackReference &)
override { name_ =
"BackReference"; }
1689 void visit(PrecedenceClimbing &)
override { name_ =
"PrecedenceClimbing"; }
1690 void visit(Recovery &)
override { name_ =
"Recovery"; }
1692 static std::string get(Ope &ope) {
1699 const char *name_ =
nullptr;
1702 struct AssignIDToDefinition :
public Ope::Visitor {
1703 void visit(Sequence &ope)
override {
1704 for (
auto op : ope.opes_) {
1708 void visit(PrioritizedChoice &ope)
override {
1709 for (
auto op : ope.opes_) {
1713 void visit(Repetition &ope)
override { ope.ope_->accept(*
this); }
1714 void visit(AndPredicate &ope)
override { ope.ope_->accept(*
this); }
1715 void visit(NotPredicate &ope)
override { ope.ope_->accept(*
this); }
1716 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1717 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1718 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
1719 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1720 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
1721 void visit(Holder &ope)
override;
1722 void visit(Reference &ope)
override;
1723 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
1724 void visit(PrecedenceClimbing &ope)
override;
1725 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1727 std::unordered_map<void *, size_t> ids;
1730 struct IsLiteralToken :
public Ope::Visitor {
1731 void visit(PrioritizedChoice &ope)
override {
1732 for (
auto op : ope.opes_) {
1733 if (!IsLiteralToken::check(*op)) {
return; }
1738 void visit(Dictionary &)
override { result_ =
true; }
1739 void visit(LiteralString &)
override { result_ =
true; }
1741 static bool check(Ope &ope) {
1748 bool result_ =
false;
1751 struct TokenChecker :
public Ope::Visitor {
1752 void visit(Sequence &ope)
override {
1753 for (
auto op : ope.opes_) {
1757 void visit(PrioritizedChoice &ope)
override {
1758 for (
auto op : ope.opes_) {
1762 void visit(Repetition &ope)
override { ope.ope_->accept(*
this); }
1763 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1764 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1765 void visit(TokenBoundary &)
override { has_token_boundary_ =
true; }
1766 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1767 void visit(WeakHolder &)
override { has_rule_ =
true; }
1768 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
1769 void visit(Reference &ope)
override;
1770 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
1771 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
1772 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1774 static bool is_token(Ope &ope) {
1775 if (IsLiteralToken::check(ope)) {
return true; }
1779 return vis.has_token_boundary_ || !vis.has_rule_;
1783 bool has_token_boundary_ =
false;
1784 bool has_rule_ =
false;
1787 struct FindLiteralToken :
public Ope::Visitor {
1788 void visit(LiteralString &ope)
override { token_ = ope.lit_.c_str(); }
1789 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
1790 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1791 void visit(Reference &ope)
override;
1792 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1794 static const char *token(Ope &ope) {
1795 FindLiteralToken vis;
1801 const char *token_ =
nullptr;
1804 struct DetectLeftRecursion :
public Ope::Visitor {
1805 DetectLeftRecursion(
const std::string &name) : name_(name) {}
1807 void visit(Sequence &ope)
override {
1808 for (
auto op : ope.opes_) {
1812 }
else if (error_s) {
1818 void visit(PrioritizedChoice &ope)
override {
1819 for (
auto op : ope.opes_) {
1827 void visit(Repetition &ope)
override {
1828 ope.ope_->accept(*
this);
1829 done_ = ope.min_ > 0;
1831 void visit(AndPredicate &ope)
override {
1832 ope.ope_->accept(*
this);
1835 void visit(NotPredicate &ope)
override {
1836 ope.ope_->accept(*
this);
1839 void visit(Dictionary &)
override { done_ =
true; }
1840 void visit(LiteralString &ope)
override { done_ = !ope.lit_.empty(); }
1841 void visit(CharacterClass &)
override { done_ =
true; }
1842 void visit(Character &)
override { done_ =
true; }
1843 void visit(AnyCharacter &)
override { done_ =
true; }
1844 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1845 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1846 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
1847 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1848 void visit(User &)
override { done_ =
true; }
1849 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
1850 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
1851 void visit(Reference &ope)
override;
1852 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
1853 void visit(BackReference &)
override { done_ =
true; }
1854 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
1855 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1857 const char *error_s =
nullptr;
1861 std::set<std::string> refs_;
1865 struct HasEmptyElement :
public Ope::Visitor {
1866 HasEmptyElement(std::list<std::pair<const char *, std::string>> &refs)
1869 void visit(Sequence &ope)
override {
1870 auto save_is_empty =
false;
1871 const char *save_error_s =
nullptr;
1872 std::string save_error_name;
1873 for (
auto op : ope.opes_) {
1875 if (!is_empty) {
return; }
1876 save_is_empty = is_empty;
1877 save_error_s = error_s;
1878 save_error_name = error_name;
1882 is_empty = save_is_empty;
1883 error_s = save_error_s;
1884 error_name = save_error_name;
1886 void visit(PrioritizedChoice &ope)
override {
1887 for (
auto op : ope.opes_) {
1889 if (is_empty) {
return; }
1892 void visit(Repetition &ope)
override {
1893 if (ope.min_ == 0) {
1896 ope.ope_->accept(*
this);
1899 void visit(AndPredicate &)
override { set_error(); }
1900 void visit(NotPredicate &)
override { set_error(); }
1901 void visit(LiteralString &ope)
override {
1902 if (ope.lit_.empty()) { set_error(); }
1904 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1905 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1906 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
1907 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1908 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
1909 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
1910 void visit(Reference &ope)
override;
1911 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
1912 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
1913 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1915 bool is_empty =
false;
1916 const char *error_s =
nullptr;
1917 std::string error_name;
1922 error_s = refs_.back().first;
1923 error_name = refs_.back().second;
1925 std::list<std::pair<const char *, std::string>> &refs_;
1928 struct DetectInfiniteLoop :
public Ope::Visitor {
1929 DetectInfiniteLoop(
const char *s,
const std::string &name) {
1930 refs_.emplace_back(s, name);
1933 void visit(Sequence &ope)
override {
1934 for (
auto op : ope.opes_) {
1936 if (has_error) {
return; }
1939 void visit(PrioritizedChoice &ope)
override {
1940 for (
auto op : ope.opes_) {
1942 if (has_error) {
return; }
1945 void visit(Repetition &ope)
override {
1946 if (ope.max_ == std::numeric_limits<size_t>::max()) {
1947 HasEmptyElement vis(refs_);
1948 ope.ope_->accept(vis);
1951 error_s = vis.error_s;
1952 error_name = vis.error_name;
1955 ope.ope_->accept(*
this);
1958 void visit(AndPredicate &ope)
override { ope.ope_->accept(*
this); }
1959 void visit(NotPredicate &ope)
override { ope.ope_->accept(*
this); }
1960 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1961 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1962 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
1963 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
1964 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
1965 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
1966 void visit(Reference &ope)
override;
1967 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
1968 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
1969 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
1971 bool has_error =
false;
1972 const char *error_s =
nullptr;
1973 std::string error_name;
1976 std::list<std::pair<const char *, std::string>> refs_;
1979 struct ReferenceChecker :
public Ope::Visitor {
1980 ReferenceChecker(
const Grammar &grammar,
1981 const std::vector<std::string> ¶ms)
1982 : grammar_(grammar), params_(params) {}
1984 void visit(Sequence &ope)
override {
1985 for (
auto op : ope.opes_) {
1989 void visit(PrioritizedChoice &ope)
override {
1990 for (
auto op : ope.opes_) {
1994 void visit(Repetition &ope)
override { ope.ope_->accept(*
this); }
1995 void visit(AndPredicate &ope)
override { ope.ope_->accept(*
this); }
1996 void visit(NotPredicate &ope)
override { ope.ope_->accept(*
this); }
1997 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
1998 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
1999 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
2000 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
2001 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
2002 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
2003 void visit(Reference &ope)
override;
2004 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
2005 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
2006 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
2008 std::unordered_map<std::string, const char *> error_s;
2009 std::unordered_map<std::string, std::string> error_message;
2012 const Grammar &grammar_;
2013 const std::vector<std::string> ¶ms_;
2016 struct LinkReferences :
public Ope::Visitor {
2017 LinkReferences(Grammar &grammar,
const std::vector<std::string> ¶ms)
2018 : grammar_(grammar), params_(params) {}
2020 void visit(Sequence &ope)
override {
2021 for (
auto op : ope.opes_) {
2025 void visit(PrioritizedChoice &ope)
override {
2026 for (
auto op : ope.opes_) {
2030 void visit(Repetition &ope)
override { ope.ope_->accept(*
this); }
2031 void visit(AndPredicate &ope)
override { ope.ope_->accept(*
this); }
2032 void visit(NotPredicate &ope)
override { ope.ope_->accept(*
this); }
2033 void visit(CaptureScope &ope)
override { ope.ope_->accept(*
this); }
2034 void visit(Capture &ope)
override { ope.ope_->accept(*
this); }
2035 void visit(TokenBoundary &ope)
override { ope.ope_->accept(*
this); }
2036 void visit(Ignore &ope)
override { ope.ope_->accept(*
this); }
2037 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
2038 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
2039 void visit(Reference &ope)
override;
2040 void visit(Whitespace &ope)
override { ope.ope_->accept(*
this); }
2041 void visit(PrecedenceClimbing &ope)
override { ope.atom_->accept(*
this); }
2042 void visit(Recovery &ope)
override { ope.ope_->accept(*
this); }
2046 const std::vector<std::string> ¶ms_;
2049 struct FindReference :
public Ope::Visitor {
2050 FindReference(
const std::vector<std::shared_ptr<Ope>> &args,
2051 const std::vector<std::string> ¶ms)
2052 : args_(args), params_(params) {}
2054 void visit(Sequence &ope)
override {
2055 std::vector<std::shared_ptr<Ope>> opes;
2056 for (
auto o : ope.opes_) {
2058 opes.push_back(found_ope);
2060 found_ope = std::make_shared<Sequence>(opes);
2062 void visit(PrioritizedChoice &ope)
override {
2063 std::vector<std::shared_ptr<Ope>> opes;
2064 for (
auto o : ope.opes_) {
2066 opes.push_back(found_ope);
2068 found_ope = std::make_shared<PrioritizedChoice>(opes);
2070 void visit(Repetition &ope)
override {
2071 ope.ope_->accept(*
this);
2072 found_ope = rep(found_ope, ope.min_, ope.max_);
2074 void visit(AndPredicate &ope)
override {
2075 ope.ope_->accept(*
this);
2076 found_ope = apd(found_ope);
2078 void visit(NotPredicate &ope)
override {
2079 ope.ope_->accept(*
this);
2080 found_ope = npd(found_ope);
2082 void visit(Dictionary &ope)
override { found_ope = ope.shared_from_this(); }
2083 void visit(LiteralString &ope)
override {
2084 found_ope = ope.shared_from_this();
2086 void visit(CharacterClass &ope)
override {
2087 found_ope = ope.shared_from_this();
2089 void visit(Character &ope)
override { found_ope = ope.shared_from_this(); }
2090 void visit(AnyCharacter &ope)
override { found_ope = ope.shared_from_this(); }
2091 void visit(CaptureScope &ope)
override {
2092 ope.ope_->accept(*
this);
2093 found_ope = csc(found_ope);
2095 void visit(Capture &ope)
override {
2096 ope.ope_->accept(*
this);
2097 found_ope = cap(found_ope, ope.match_action_);
2099 void visit(TokenBoundary &ope)
override {
2100 ope.ope_->accept(*
this);
2101 found_ope = tok(found_ope);
2103 void visit(Ignore &ope)
override {
2104 ope.ope_->accept(*
this);
2105 found_ope = ign(found_ope);
2107 void visit(WeakHolder &ope)
override { ope.weak_.lock()->accept(*
this); }
2108 void visit(Holder &ope)
override { ope.ope_->accept(*
this); }
2109 void visit(Reference &ope)
override;
2110 void visit(Whitespace &ope)
override {
2111 ope.ope_->accept(*
this);
2112 found_ope = wsp(found_ope);
2114 void visit(PrecedenceClimbing &ope)
override {
2115 ope.atom_->accept(*
this);
2116 found_ope = csc(found_ope);
2118 void visit(Recovery &ope)
override {
2119 ope.ope_->accept(*
this);
2120 found_ope = rec(found_ope);
2123 std::shared_ptr<Ope> found_ope;
2126 const std::vector<std::shared_ptr<Ope>> &args_;
2127 const std::vector<std::string> ¶ms_;
2130 struct IsPrioritizedChoice :
public Ope::Visitor {
2131 void visit(PrioritizedChoice &)
override { result_ =
true; }
2133 static bool check(Ope &ope) {
2134 IsPrioritizedChoice vis;
2140 bool result_ =
false;
2146 static const char *WHITESPACE_DEFINITION_NAME =
"%whitespace";
2147 static const char *WORD_DEFINITION_NAME =
"%word";
2148 static const char *RECOVER_DEFINITION_NAME =
"%recover";
2159 ErrorInfo error_info;
2162 Definition() : holder_(std::make_shared<Holder>(this)) {}
2164 Definition(
const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2165 holder_->outer_ =
this;
2168 Definition(
const std::shared_ptr<Ope> &ope)
2169 : holder_(std::make_shared<Holder>(this)) {
2173 operator std::shared_ptr<Ope>() {
2174 return std::make_shared<WeakHolder>(holder_);
2177 Definition &operator<=(
const std::shared_ptr<Ope> &ope) {
2178 holder_->ope_ = ope;
2182 Result parse(
const char *s,
size_t n,
const char *path =
nullptr,
2183 Log log =
nullptr)
const {
2186 return parse_core(s, n, vs, dt, path, log);
2189 Result parse(
const char *s,
const char *path =
nullptr,
2190 Log log =
nullptr)
const {
2192 return parse(s, n, path, log);
2195 Result parse(
const char *s,
size_t n, std::any &dt,
2196 const char *path =
nullptr, Log log =
nullptr)
const {
2198 return parse_core(s, n, vs, dt, path, log);
2201 Result parse(
const char *s, std::any &dt,
const char *path =
nullptr,
2202 Log log =
nullptr)
const {
2204 return parse(s, n, dt, path, log);
2207 template <
typename T>
2208 Result parse_and_get_value(
const char *s,
size_t n, T &val,
2209 const char *path =
nullptr,
2210 Log log =
nullptr)
const {
2213 auto r = parse_core(s, n, vs, dt, path, log);
2214 if (r.ret && !vs.empty() && vs.front().has_value()) {
2215 val = std::any_cast<T>(vs[0]);
2220 template <
typename T>
2221 Result parse_and_get_value(
const char *s, T &val,
const char *path =
nullptr,
2222 Log log =
nullptr)
const {
2224 return parse_and_get_value(s, n, val, path, log);
2227 template <
typename T>
2228 Result parse_and_get_value(
const char *s,
size_t n, std::any &dt, T &val,
2229 const char *path =
nullptr,
2230 Log log =
nullptr)
const {
2232 auto r = parse_core(s, n, vs, dt, path, log);
2233 if (r.ret && !vs.empty() && vs.front().has_value()) {
2234 val = std::any_cast<T>(vs[0]);
2239 template <
typename T>
2240 Result parse_and_get_value(
const char *s, std::any &dt, T &val,
2241 const char *path =
nullptr,
2242 Log log =
nullptr)
const {
2244 return parse_and_get_value(s, n, dt, val, path, log);
2247 void operator=(Action a) { action = a; }
2249 template <
typename T> Definition &operator,(T fn) {
2254 Definition &operator~() {
2255 ignoreSemanticValue =
true;
2259 void accept(Ope::Visitor &v) { holder_->accept(v); }
2261 std::shared_ptr<Ope> get_core_operator()
const {
return holder_->ope_; }
2263 bool is_token()
const {
2264 std::call_once(is_token_init_, [
this]() {
2265 is_token_ = TokenChecker::is_token(*get_core_operator());
2271 const char *s_ =
nullptr;
2275 std::function<void(
const char *s,
size_t n, std::any &dt)> enter;
2276 std::function<void(
const char *s,
size_t n,
size_t matchlen, std::any &value,
2279 bool ignoreSemanticValue =
false;
2280 std::shared_ptr<Ope> whitespaceOpe;
2281 std::shared_ptr<Ope> wordOpe;
2282 bool enablePackratParsing =
false;
2283 bool is_macro =
false;
2284 std::vector<std::string> params;
2285 TracerEnter tracer_enter;
2286 TracerLeave tracer_leave;
2287 bool disable_action =
false;
2289 std::string error_message;
2290 bool no_ast_opt =
false;
2293 friend class Reference;
2294 friend class ParserGenerator;
2296 Definition &operator=(
const Definition &rhs);
2297 Definition &operator=(Definition &&rhs);
2299 void initialize_definition_ids()
const {
2300 std::call_once(definition_ids_init_, [&]() {
2301 AssignIDToDefinition vis;
2302 holder_->accept(vis);
2303 if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2304 if (wordOpe) { wordOpe->accept(vis); }
2305 definition_ids_.swap(vis.ids);
2309 Result parse_core(
const char *s,
size_t n, SemanticValues &vs, std::any &dt,
2310 const char *path, Log log)
const {
2311 initialize_definition_ids();
2313 std::shared_ptr<Ope> ope = holder_;
2314 if (whitespaceOpe) { ope = std::make_shared<Sequence>(whitespaceOpe, ope); }
2316 Context cxt(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2317 enablePackratParsing, tracer_enter, tracer_leave, log);
2319 auto len = ope->parse(s, n, vs, cxt, dt);
2320 return Result{success(len), cxt.recovered, len, cxt.error_info};
2323 std::shared_ptr<Holder> holder_;
2324 mutable std::once_flag is_token_init_;
2325 mutable bool is_token_ =
false;
2326 mutable std::once_flag assign_id_to_definition_init_;
2327 mutable std::once_flag definition_ids_init_;
2328 mutable std::unordered_map<void *, size_t> definition_ids_;
2335 inline size_t parse_literal(
const char *s,
size_t n, SemanticValues &vs,
2336 Context &c, std::any &dt,
const std::string &lit,
2337 std::once_flag &init_is_word,
bool &is_word,
2340 for (; i < lit.size(); i++) {
2341 if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i]))
2342 : (s[i] != lit[i]))) {
2343 c.set_error_pos(s, lit.c_str());
2344 return static_cast<size_t>(-1);
2349 SemanticValues dummy_vs;
2350 Context dummy_c(
nullptr, c.s, c.l, 0,
nullptr,
nullptr,
false,
nullptr,
2354 std::call_once(init_is_word, [&]() {
2357 c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2358 is_word = success(len);
2363 NotPredicate ope(c.wordOpe);
2364 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2365 if (fail(len)) {
return len; }
2370 if (!c.in_token_boundary_count) {
2371 if (c.whitespaceOpe) {
2372 auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2373 if (fail(len)) {
return len; }
2381 inline void Context::set_error_pos(
const char *a_s,
const char *literal) {
2383 if (error_info.error_pos <= a_s) {
2384 if (error_info.error_pos < a_s) {
2385 error_info.error_pos = a_s;
2386 error_info.expected_tokens.clear();
2389 error_info.add(literal,
true);
2390 }
else if (!rule_stack.empty()) {
2391 auto rule = rule_stack.back();
2392 auto ope = rule->get_core_operator();
2393 if (
auto token = FindLiteralToken::token(*ope);
2394 token && token[0] !=
'\0') {
2395 error_info.add(token,
true);
2397 error_info.add(rule->name.c_str(),
false);
2404 inline void Context::trace_enter(
const Ope &ope,
const char *a_s,
size_t n,
2405 SemanticValues &vs, std::any &dt)
const {
2406 trace_ids.push_back(next_trace_id++);
2407 tracer_enter(ope, a_s, n, vs, *
this, dt);
2410 inline void Context::trace_leave(
const Ope &ope,
const char *a_s,
size_t n,
2411 SemanticValues &vs, std::any &dt,
2413 tracer_leave(ope, a_s, n, vs, *
this, dt, len);
2414 trace_ids.pop_back();
2417 inline bool Context::is_traceable(
const Ope &ope)
const {
2418 if (tracer_enter && tracer_leave) {
2419 return !IsReference::check(
const_cast<Ope &
>(ope));
2424 inline size_t Ope::parse(
const char *s,
size_t n, SemanticValues &vs,
2425 Context &c, std::any &dt)
const {
2426 if (c.is_traceable(*
this)) {
2427 c.trace_enter(*
this, s, n, vs, dt);
2428 auto len = parse_core(s, n, vs, c, dt);
2429 c.trace_leave(*
this, s, n, vs, dt, len);
2432 return parse_core(s, n, vs, c, dt);
2435 inline size_t Dictionary::parse_core(
const char *s,
size_t n,
2436 SemanticValues & , Context &c,
2437 std::any & )
const {
2438 auto len = trie_.match(s, n);
2439 if (len > 0) {
return len; }
2441 return static_cast<size_t>(-1);
2444 inline size_t LiteralString::parse_core(
const char *s,
size_t n,
2445 SemanticValues &vs, Context &c,
2446 std::any &dt)
const {
2447 return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
2451 inline size_t TokenBoundary::parse_core(
const char *s,
size_t n,
2452 SemanticValues &vs, Context &c,
2453 std::any &dt)
const {
2456 c.in_token_boundary_count++;
2457 auto se = scope_exit([&]() { c.in_token_boundary_count--; });
2458 len = ope_->parse(s, n, vs, c, dt);
2462 vs.tokens.emplace_back(std::string_view(s, len));
2464 if (!c.in_token_boundary_count) {
2465 if (c.whitespaceOpe) {
2466 auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt);
2467 if (fail(l)) {
return l; }
2475 inline size_t Holder::parse_core(
const char *s,
size_t n, SemanticValues &vs,
2476 Context &c, std::any &dt)
const {
2478 throw std::logic_error(
"Uninitialized definition ope was used...");
2482 if (outer_->is_macro) {
2483 c.rule_stack.push_back(outer_);
2484 auto len = ope_->parse(s, n, vs, c, dt);
2485 c.rule_stack.pop_back();
2492 c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
2493 if (outer_->enter) { outer_->enter(s, n, dt); }
2495 auto se2 = scope_exit([&]() {
2497 if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
2500 auto &chldsv = c.push();
2502 c.rule_stack.push_back(outer_);
2503 len = ope_->parse(s, n, chldsv, c, dt);
2504 c.rule_stack.pop_back();
2508 chldsv.sv_ = std::string_view(s, len);
2509 chldsv.name_ = outer_->name;
2511 if (!IsPrioritizedChoice::check(*ope_)) {
2512 chldsv.choice_count_ = 0;
2517 a_val = reduce(chldsv, dt);
2518 }
catch (
const parse_error &e) {
2520 if (c.error_info.message_pos < s) {
2521 c.error_info.message_pos = s;
2522 c.error_info.message = e.what();
2525 len =
static_cast<size_t>(-1);
2531 if (!outer_->ignoreSemanticValue) {
2532 vs.emplace_back(std::move(val));
2533 vs.tags.emplace_back(str2tag(outer_->name));
2540 inline std::any Holder::reduce(SemanticValues &vs, std::any &dt)
const {
2541 if (outer_->action && !outer_->disable_action) {
2542 return outer_->action(vs, dt);
2543 }
else if (vs.empty()) {
2546 return std::move(vs.front());
2550 inline const char *Holder::trace_name()
const {
2551 if (trace_name_.empty()) { trace_name_ =
"[" + outer_->name +
"]"; }
2552 return trace_name_.data();
2555 inline size_t Reference::parse_core(
const char *s,
size_t n, SemanticValues &vs,
2556 Context &c, std::any &dt)
const {
2559 if (rule_->is_macro) {
2561 FindReference vis(c.top_args(), c.rule_stack.back()->params);
2564 std::vector<std::shared_ptr<Ope>> args;
2565 for (
auto arg : args_) {
2567 args.emplace_back(std::move(vis.found_ope));
2570 c.push_args(std::move(args));
2571 auto se = scope_exit([&]() { c.pop_args(); });
2572 auto ope = get_core_operator();
2573 return ope->parse(s, n, vs, c, dt);
2576 c.push_args(std::vector<std::shared_ptr<Ope>>());
2577 auto se = scope_exit([&]() { c.pop_args(); });
2578 auto ope = get_core_operator();
2579 return ope->parse(s, n, vs, c, dt);
2583 const auto &args = c.top_args();
2584 return args[iarg_]->parse(s, n, vs, c, dt);
2588 inline std::shared_ptr<Ope> Reference::get_core_operator()
const {
2589 return rule_->holder_;
2592 inline size_t BackReference::parse_core(
const char *s,
size_t n,
2593 SemanticValues &vs, Context &c,
2594 std::any &dt)
const {
2595 auto size =
static_cast<int>(c.capture_scope_stack_size);
2596 for (
auto i = size - 1; i >= 0; i--) {
2597 auto index =
static_cast<size_t>(i);
2598 const auto &cs = c.capture_scope_stack[index];
2599 if (cs.find(name_) != cs.end()) {
2600 const auto &lit = cs.at(name_);
2601 std::once_flag init_is_word;
2602 auto is_word =
false;
2603 return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word,
false);
2606 throw std::runtime_error(
"Invalid back reference...");
2610 PrecedenceClimbing::get_reference_for_binop(Context &c)
const {
2611 if (rule_.is_macro) {
2613 const auto &args = c.top_args();
2614 auto iarg =
dynamic_cast<Reference &
>(*binop_).iarg_;
2615 auto arg = args[iarg];
2616 return *
dynamic_cast<Reference &
>(*arg).rule_;
2619 return *
dynamic_cast<Reference &
>(*binop_).rule_;
2622 inline size_t PrecedenceClimbing::parse_expression(
const char *s,
size_t n,
2624 Context &c, std::any &dt,
2625 size_t min_prec)
const {
2626 auto len = atom_->parse(s, n, vs, c, dt);
2627 if (fail(len)) {
return len; }
2630 auto &rule = get_reference_for_binop(c);
2631 auto action = std::move(rule.action);
2633 rule.action = [&](SemanticValues &vs2, std::any &dt2) {
2636 return action(vs2, dt2);
2637 }
else if (!vs2.empty()) {
2642 auto action_se = scope_exit([&]() { rule.action = std::move(action); });
2646 std::vector<std::any> save_values(vs.begin(), vs.end());
2647 auto save_tokens = vs.tokens;
2649 auto chv = c.push();
2650 auto chl = binop_->parse(s + i, n - i, chv, c, dt);
2653 if (fail(chl)) {
break; }
2655 auto it = info_.find(tok);
2656 if (it == info_.end()) {
break; }
2658 auto level = std::get<0>(it->second);
2659 auto assoc = std::get<1>(it->second);
2661 if (level < min_prec) {
break; }
2663 vs.emplace_back(std::move(chv[0]));
2666 auto next_min_prec = level;
2667 if (assoc ==
'L') { next_min_prec = level + 1; }
2670 chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec);
2674 vs.assign(save_values.begin(), save_values.end());
2675 vs.tokens = save_tokens;
2679 vs.emplace_back(std::move(chv[0]));
2684 vs.sv_ = std::string_view(s, i);
2685 val = rule_.action(vs, dt);
2686 }
else if (!vs.empty()) {
2690 vs.emplace_back(std::move(val));
2696 inline size_t Recovery::parse_core(
const char *s,
size_t n,
2697 SemanticValues & , Context &c,
2698 std::any & )
const {
2699 auto save_log = c.log;
2702 const auto &rule =
dynamic_cast<Reference &
>(*ope_);
2704 SemanticValues dummy_vs;
2706 auto len = rule.parse(s, n, dummy_vs, c, dummy_dt);
2713 auto label =
dynamic_cast<Reference *
>(rule.args_[0].get());
2715 if (!label->rule_->error_message.empty()) {
2716 c.error_info.message_pos = c.error_info.error_pos;
2717 c.error_info.message = label->rule_->error_message;
2720 c.error_info.output_log(c.log, c.s, c.l);
2723 c.error_info.clear();
2728 inline void Sequence::accept(Visitor &v) { v.visit(*
this); }
2729 inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*
this); }
2730 inline void Repetition::accept(Visitor &v) { v.visit(*
this); }
2731 inline void AndPredicate::accept(Visitor &v) { v.visit(*
this); }
2732 inline void NotPredicate::accept(Visitor &v) { v.visit(*
this); }
2733 inline void Dictionary::accept(Visitor &v) { v.visit(*
this); }
2734 inline void LiteralString::accept(Visitor &v) { v.visit(*
this); }
2735 inline void CharacterClass::accept(Visitor &v) { v.visit(*
this); }
2736 inline void Character::accept(Visitor &v) { v.visit(*
this); }
2737 inline void AnyCharacter::accept(Visitor &v) { v.visit(*
this); }
2738 inline void CaptureScope::accept(Visitor &v) { v.visit(*
this); }
2739 inline void Capture::accept(Visitor &v) { v.visit(*
this); }
2740 inline void TokenBoundary::accept(Visitor &v) { v.visit(*
this); }
2741 inline void Ignore::accept(Visitor &v) { v.visit(*
this); }
2742 inline void User::accept(Visitor &v) { v.visit(*
this); }
2743 inline void WeakHolder::accept(Visitor &v) { v.visit(*
this); }
2744 inline void Holder::accept(Visitor &v) { v.visit(*
this); }
2745 inline void Reference::accept(Visitor &v) { v.visit(*
this); }
2746 inline void Whitespace::accept(Visitor &v) { v.visit(*
this); }
2747 inline void BackReference::accept(Visitor &v) { v.visit(*
this); }
2748 inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*
this); }
2749 inline void Recovery::accept(Visitor &v) { v.visit(*
this); }
2751 inline void AssignIDToDefinition::visit(Holder &ope) {
2752 auto p =
static_cast<void *
>(ope.outer_);
2753 if (ids.count(p)) {
return; }
2754 auto id = ids.size();
2756 ope.outer_->id = id;
2757 ope.ope_->accept(*
this);
2760 inline void AssignIDToDefinition::visit(Reference &ope) {
2762 for (
auto arg : ope.args_) {
2765 ope.rule_->accept(*
this);
2769 inline void AssignIDToDefinition::visit(PrecedenceClimbing &ope) {
2770 ope.atom_->accept(*
this);
2771 ope.binop_->accept(*
this);
2774 inline void TokenChecker::visit(Reference &ope) {
2775 if (ope.is_macro_) {
2776 for (
auto arg : ope.args_) {
2784 inline void FindLiteralToken::visit(Reference &ope) {
2785 if (ope.is_macro_) {
2786 ope.rule_->accept(*
this);
2787 for (
auto arg : ope.args_) {
2793 inline void DetectLeftRecursion::visit(Reference &ope) {
2794 if (ope.name_ == name_) {
2796 }
else if (!refs_.count(ope.name_)) {
2797 refs_.insert(ope.name_);
2799 ope.rule_->accept(*
this);
2800 if (done_ ==
false) {
return; }
2806 inline void HasEmptyElement::visit(Reference &ope) {
2807 auto it = std::find_if(refs_.begin(), refs_.end(),
2808 [&](
const std::pair<const char *, std::string> &ref) {
2809 return ope.name_ == ref.second;
2811 if (it != refs_.end()) {
return; }
2814 refs_.emplace_back(ope.s_, ope.name_);
2815 ope.rule_->accept(*
this);
2820 inline void DetectInfiniteLoop::visit(Reference &ope) {
2821 auto it = std::find_if(refs_.begin(), refs_.end(),
2822 [&](
const std::pair<const char *, std::string> &ref) {
2823 return ope.name_ == ref.second;
2825 if (it != refs_.end()) {
return; }
2828 refs_.emplace_back(ope.s_, ope.name_);
2829 ope.rule_->accept(*
this);
2834 inline void ReferenceChecker::visit(Reference &ope) {
2835 auto it = std::find(params_.begin(), params_.end(), ope.name_);
2836 if (it != params_.end()) {
return; }
2838 if (!grammar_.count(ope.name_)) {
2839 error_s[ope.name_] = ope.s_;
2840 error_message[ope.name_] =
"'" + ope.name_ +
"' is not defined.";
2842 const auto &rule = grammar_.at(ope.name_);
2843 if (rule.is_macro) {
2844 if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
2845 error_s[ope.name_] = ope.s_;
2846 error_message[ope.name_] =
"incorrect number of arguments.";
2848 }
else if (ope.is_macro_) {
2849 error_s[ope.name_] = ope.s_;
2850 error_message[ope.name_] =
"'" + ope.name_ +
"' is not macro.";
2852 for (
auto arg : ope.args_) {
2858 inline void LinkReferences::visit(Reference &ope) {
2860 auto found_param =
false;
2861 for (
size_t i = 0; i < params_.size(); i++) {
2862 const auto ¶m = params_[i];
2863 if (param == ope.name_) {
2871 if (!found_param && grammar_.count(ope.name_)) {
2872 auto &rule = grammar_.at(ope.name_);
2876 for (
auto arg : ope.args_) {
2881 inline void FindReference::visit(Reference &ope) {
2882 for (
size_t i = 0; i < args_.size(); i++) {
2883 const auto &name = params_[i];
2884 if (name == ope.name_) {
2885 found_ope = args_[i];
2889 found_ope = ope.shared_from_this();
2896 using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
2898 class ParserGenerator {
2900 static std::shared_ptr<Grammar> parse(
const char *s,
size_t n,
2901 const Rules &rules, std::string &start,
2903 return get_instance().perform_core(s, n, rules, start, log);
2906 static std::shared_ptr<Grammar> parse(
const char *s,
size_t n,
2907 std::string &start, Log log) {
2909 return parse(s, n, dummy, start, log);
2913 static Grammar &grammar() {
return get_instance().g; }
2916 static ParserGenerator &get_instance() {
2917 static ParserGenerator instance;
2926 struct Instruction {
2932 std::shared_ptr<Grammar> grammar;
2934 const char *start_pos =
nullptr;
2935 std::vector<std::pair<std::string, const char *>> duplicates;
2936 std::map<std::string, Instruction> instructions;
2938 Data() : grammar(std::make_shared<Grammar>()) {}
2941 void make_grammar() {
2943 g[
"Grammar"] <= seq(g[
"Spacing"], oom(g[
"Definition"]), g[
"EndOfFile"]);
2945 cho(seq(g[
"Ignore"], g[
"IdentCont"], g[
"Parameters"], g[
"LEFTARROW"],
2946 g[
"Expression"], opt(g[
"Instruction"])),
2947 seq(g[
"Ignore"], g[
"Identifier"], g[
"LEFTARROW"], g[
"Expression"],
2948 opt(g[
"Instruction"])));
2949 g[
"Expression"] <= seq(g[
"Sequence"], zom(seq(g[
"SLASH"], g[
"Sequence"])));
2950 g[
"Sequence"] <= zom(g[
"Prefix"]);
2951 g[
"Prefix"] <= seq(opt(cho(g[
"AND"], g[
"NOT"])), g[
"SuffixWithLabel"]);
2952 g[
"SuffixWithLabel"] <=
2953 seq(g[
"Suffix"], opt(seq(g[
"HAT"], g[
"Identifier"])));
2954 g[
"Suffix"] <= seq(g[
"Primary"], opt(g[
"Loop"]));
2955 g[
"Loop"] <= cho(g[
"QUESTION"], g[
"STAR"], g[
"PLUS"], g[
"Repetition"]);
2957 cho(seq(g[
"Ignore"], g[
"IdentCont"], g[
"Arguments"],
2958 npd(g[
"LEFTARROW"])),
2959 seq(g[
"Ignore"], g[
"Identifier"],
2960 npd(seq(opt(g[
"Parameters"]), g[
"LEFTARROW"]))),
2961 seq(g[
"OPEN"], g[
"Expression"], g[
"CLOSE"]),
2962 seq(g[
"BeginTok"], g[
"Expression"], g[
"EndTok"]),
2963 seq(g[
"BeginCapScope"], g[
"Expression"], g[
"EndCapScope"]),
2964 seq(g[
"BeginCap"], g[
"Expression"], g[
"EndCap"]), g[
"BackRef"],
2965 g[
"LiteralI"], g[
"Dictionary"], g[
"Literal"], g[
"NegatedClass"],
2966 g[
"Class"], g[
"DOT"]);
2968 g[
"Identifier"] <= seq(g[
"IdentCont"], g[
"Spacing"]);
2969 g[
"IdentCont"] <= seq(g[
"IdentStart"], zom(g[
"IdentRest"]));
2971 const static std::vector<std::pair<char32_t, char32_t>> range = {
2973 g[
"IdentStart"] <= cho(cls(
"a-zA-Z_%"), cls(range));
2975 g[
"IdentRest"] <= cho(g[
"IdentStart"], cls(
"0-9"));
2977 g[
"Dictionary"] <= seq(g[
"LiteralD"], oom(seq(g[
"PIPE"], g[
"LiteralD"])));
2979 auto lit_ope = cho(seq(cls(
"'"), tok(zom(seq(npd(cls(
"'")), g[
"Char"]))),
2980 cls(
"'"), g[
"Spacing"]),
2981 seq(cls(
"\""), tok(zom(seq(npd(cls(
"\"")), g[
"Char"]))),
2982 cls(
"\""), g[
"Spacing"]));
2983 g[
"Literal"] <= lit_ope;
2984 g[
"LiteralD"] <= lit_ope;
2987 cho(seq(cls(
"'"), tok(zom(seq(npd(cls(
"'")), g[
"Char"]))), lit(
"'i"),
2989 seq(cls(
"\""), tok(zom(seq(npd(cls(
"\"")), g[
"Char"]))), lit(
"\"i"),
2993 g[
"Class"] <= seq(chr(
'['), npd(chr(
'^')),
2994 tok(oom(seq(npd(chr(
']')), g[
"Range"]))), chr(
']'),
2996 g[
"NegatedClass"] <= seq(lit(
"[^"),
2997 tok(oom(seq(npd(chr(
']')), g[
"Range"]))), chr(
']'),
3000 g[
"Range"] <= cho(seq(g[
"Char"], chr(
'-'), g[
"Char"]), g[
"Char"]);
3002 cho(seq(chr(
'\\'), cls(
"nrt'\"[]\\^")),
3003 seq(chr(
'\\'), cls(
"0-3"), cls(
"0-7"), cls(
"0-7")),
3004 seq(chr(
'\\'), cls(
"0-7"), opt(cls(
"0-7"))),
3005 seq(lit(
"\\x"), cls(
"0-9a-fA-F"), opt(cls(
"0-9a-fA-F"))),
3007 cho(seq(cho(seq(chr(
'0'), cls(
"0-9a-fA-F")), lit(
"10")),
3008 rep(cls(
"0-9a-fA-F"), 4, 4)),
3009 rep(cls(
"0-9a-fA-F"), 4, 5))),
3010 seq(npd(chr(
'\\')), dot()));
3013 seq(g[
"BeginBlacket"], g[
"RepetitionRange"], g[
"EndBlacket"]);
3014 g[
"RepetitionRange"] <= cho(seq(g[
"Number"], g[
"COMMA"], g[
"Number"]),
3015 seq(g[
"Number"], g[
"COMMA"]), g[
"Number"],
3016 seq(g[
"COMMA"], g[
"Number"]));
3017 g[
"Number"] <= seq(oom(cls(
"0-9")), g[
"Spacing"]);
3020 seq(cho(lit(
"<-"), lit(
reinterpret_cast<const char *
>(u8
"←"))),
3022 ~g[
"SLASH"] <= seq(chr(
'/'), g[
"Spacing"]);
3023 ~g[
"PIPE"] <= seq(chr(
'|'), g[
"Spacing"]);
3024 g[
"AND"] <= seq(chr(
'&'), g[
"Spacing"]);
3025 g[
"NOT"] <= seq(chr(
'!'), g[
"Spacing"]);
3026 ~g[
"HAT"] <= seq(chr(
'^'), g[
"Spacing"]);
3027 g[
"QUESTION"] <= seq(chr(
'?'), g[
"Spacing"]);
3028 g[
"STAR"] <= seq(chr(
'*'), g[
"Spacing"]);
3029 g[
"PLUS"] <= seq(chr(
'+'), g[
"Spacing"]);
3030 ~g[
"OPEN"] <= seq(chr(
'('), g[
"Spacing"]);
3031 ~g[
"CLOSE"] <= seq(chr(
')'), g[
"Spacing"]);
3032 g[
"DOT"] <= seq(chr(
'.'), g[
"Spacing"]);
3034 ~g[
"Spacing"] <= zom(cho(g[
"Space"], g[
"Comment"]));
3036 seq(chr(
'#'), zom(seq(npd(g[
"EndOfLine"]), dot())), g[
"EndOfLine"]);
3037 g[
"Space"] <= cho(chr(
' '), chr(
'\t'), g[
"EndOfLine"]);
3038 g[
"EndOfLine"] <= cho(lit(
"\r\n"), chr(
'\n'), chr(
'\r'));
3039 g[
"EndOfFile"] <= npd(dot());
3041 ~g[
"BeginTok"] <= seq(chr(
'<'), g[
"Spacing"]);
3042 ~g[
"EndTok"] <= seq(chr(
'>'), g[
"Spacing"]);
3044 ~g[
"BeginCapScope"] <= seq(chr(
'$'), chr(
'('), g[
"Spacing"]);
3045 ~g[
"EndCapScope"] <= seq(chr(
')'), g[
"Spacing"]);
3047 g[
"BeginCap"] <= seq(chr(
'$'), tok(g[
"IdentCont"]), chr(
'<'), g[
"Spacing"]);
3048 ~g[
"EndCap"] <= seq(chr(
'>'), g[
"Spacing"]);
3050 g[
"BackRef"] <= seq(chr(
'$'), tok(g[
"IdentCont"]), g[
"Spacing"]);
3052 g[
"IGNORE"] <= chr(
'~');
3054 g[
"Ignore"] <= opt(g[
"IGNORE"]);
3055 g[
"Parameters"] <= seq(g[
"OPEN"], g[
"Identifier"],
3056 zom(seq(g[
"COMMA"], g[
"Identifier"])), g[
"CLOSE"]);
3057 g[
"Arguments"] <= seq(g[
"OPEN"], g[
"Expression"],
3058 zom(seq(g[
"COMMA"], g[
"Expression"])), g[
"CLOSE"]);
3059 ~g[
"COMMA"] <= seq(chr(
','), g[
"Spacing"]);
3062 g[
"Instruction"] <= seq(g[
"BeginBlacket"],
3063 cho(cho(g[
"PrecedenceClimbing"]),
3064 cho(g[
"ErrorMessage"]), cho(g[
"NoAstOpt"])),
3067 ~g[
"SpacesZom"] <= zom(g[
"Space"]);
3068 ~g[
"SpacesOom"] <= oom(g[
"Space"]);
3069 ~g[
"BeginBlacket"] <= seq(chr(
'{'), g[
"Spacing"]);
3070 ~g[
"EndBlacket"] <= seq(chr(
'}'), g[
"Spacing"]);
3073 g[
"PrecedenceClimbing"] <=
3074 seq(lit(
"precedence"), g[
"SpacesOom"], g[
"PrecedenceInfo"],
3075 zom(seq(g[
"SpacesOom"], g[
"PrecedenceInfo"])), g[
"SpacesZom"]);
3076 g[
"PrecedenceInfo"] <=
3077 seq(g[
"PrecedenceAssoc"],
3078 oom(seq(ign(g[
"SpacesOom"]), g[
"PrecedenceOpe"])));
3079 g[
"PrecedenceOpe"] <=
3081 seq(npd(cho(g[
"PrecedenceAssoc"], g[
"Space"], chr(
'}'))), dot())));
3082 g[
"PrecedenceAssoc"] <= cls(
"LR");
3085 g[
"ErrorMessage"] <=
3086 seq(lit(
"message"), g[
"SpacesOom"], g[
"LiteralD"], g[
"SpacesZom"]);
3089 g[
"NoAstOpt"] <= seq(lit(
"no_ast_opt"), g[
"SpacesZom"]);
3093 x.second.name = x.first;
3097 void setup_actions() {
3098 g[
"Definition"] = [&](
const SemanticValues &vs, std::any &dt) {
3099 auto &data = *std::any_cast<Data *>(dt);
3101 auto is_macro = vs.choice() == 0;
3102 auto ignore = std::any_cast<bool>(vs[0]);
3103 auto name = std::any_cast<std::string>(vs[1]);
3105 std::vector<std::string> params;
3106 std::shared_ptr<Ope> ope;
3108 params = std::any_cast<std::vector<std::string>>(vs[2]);
3109 ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
3110 if (vs.size() == 6) {
3111 data.instructions[name] = std::any_cast<Instruction>(vs[5]);
3114 ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
3115 if (vs.size() == 5) {
3116 data.instructions[name] = std::any_cast<Instruction>(vs[4]);
3120 auto &grammar = *data.grammar;
3121 if (!grammar.count(name)) {
3122 auto &rule = grammar[name];
3125 rule.s_ = vs.sv().data();
3126 rule.ignoreSemanticValue = ignore;
3127 rule.is_macro = is_macro;
3128 rule.params = params;
3130 if (data.start.empty()) {
3132 data.start_pos = vs.sv().data();
3135 data.duplicates.emplace_back(name, vs.sv().data());
3139 g[
"Expression"] = [&](
const SemanticValues &vs) {
3140 if (vs.size() == 1) {
3141 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3143 std::vector<std::shared_ptr<Ope>> opes;
3144 for (
auto i = 0u; i < vs.size(); i++) {
3145 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
3147 const std::shared_ptr<Ope> ope =
3148 std::make_shared<PrioritizedChoice>(opes);
3153 g[
"Sequence"] = [&](
const SemanticValues &vs) {
3155 return npd(lit(
""));
3156 }
else if (vs.size() == 1) {
3157 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3159 std::vector<std::shared_ptr<Ope>> opes;
3160 for (
const auto &x : vs) {
3161 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
3163 const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
3168 g[
"Prefix"] = [&](
const SemanticValues &vs) {
3169 std::shared_ptr<Ope> ope;
3170 if (vs.size() == 1) {
3171 ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3173 assert(vs.size() == 2);
3174 auto tok = std::any_cast<char>(vs[0]);
3175 ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3185 g[
"SuffixWithLabel"] = [&](
const SemanticValues &vs, std::any &dt) {
3186 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3187 if (vs.size() == 1) {
3190 assert(vs.size() == 2);
3191 auto &data = *std::any_cast<Data *>(dt);
3192 const auto &ident = std::any_cast<std::string>(vs[1]);
3193 auto label = ref(*data.grammar, ident, vs.sv().data(),
false, {});
3194 auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
3195 vs.sv().data(),
true, {label}));
3196 return cho(ope, recovery);
3201 enum class Type { opt = 0, zom, oom, rep };
3203 std::pair<size_t, size_t> range;
3206 g[
"Suffix"] = [&](
const SemanticValues &vs) {
3207 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3208 if (vs.size() == 1) {
3211 assert(vs.size() == 2);
3212 auto loop = std::any_cast<Loop>(vs[1]);
3213 switch (loop.type) {
3214 case Loop::Type::opt:
return opt(ope);
3215 case Loop::Type::zom:
return zom(ope);
3216 case Loop::Type::oom:
return oom(ope);
3218 return rep(ope, loop.range.first, loop.range.second);
3223 g[
"Loop"] = [&](
const SemanticValues &vs) {
3224 switch (vs.choice()) {
3226 return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
3228 return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
3230 return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
3232 return Loop{Loop::Type::rep,
3233 std::any_cast<std::pair<size_t, size_t>>(vs[0])};
3237 g[
"RepetitionRange"] = [&](
const SemanticValues &vs) {
3238 switch (vs.choice()) {
3240 auto min = std::any_cast<size_t>(vs[0]);
3241 auto max = std::any_cast<size_t>(vs[1]);
3242 return std::pair(min, max);
3245 return std::pair(std::any_cast<size_t>(vs[0]),
3246 std::numeric_limits<size_t>::max());
3248 auto n = std::any_cast<size_t>(vs[0]);
3249 return std::pair(n, n);
3252 return std::pair(std::numeric_limits<size_t>::min(),
3253 std::any_cast<size_t>(vs[0]));
3256 g[
"Number"] = [&](
const SemanticValues &vs) {
3257 return vs.token_to_number<
size_t>();
3260 g[
"Primary"] = [&](
const SemanticValues &vs, std::any &dt) {
3261 auto &data = *std::any_cast<Data *>(dt);
3263 switch (vs.choice()) {
3266 auto is_macro = vs.choice() == 0;
3267 auto ignore = std::any_cast<bool>(vs[0]);
3268 const auto &ident = std::any_cast<std::string>(vs[1]);
3270 std::vector<std::shared_ptr<Ope>> args;
3272 args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
3275 auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
3276 if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
3285 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3288 return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3291 return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3294 const auto &name = std::any_cast<std::string_view>(vs[0]);
3295 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3296 return cap(ope, [name](
const char *a_s,
size_t a_n, Context &c) {
3297 auto &cs = c.capture_scope_stack[c.capture_scope_stack_size - 1];
3298 cs[name] = std::string(a_s, a_n);
3302 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3307 g[
"IdentCont"] = [](
const SemanticValues &vs) {
3308 return std::string(vs.sv().data(), vs.sv().length());
3311 g[
"Dictionary"] = [](
const SemanticValues &vs) {
3312 auto items = vs.transform<std::string>();
3316 g[
"Literal"] = [](
const SemanticValues &vs) {
3317 const auto &tok = vs.tokens.front();
3318 return lit(resolve_escape_sequence(tok.data(), tok.size()));
3320 g[
"LiteralI"] = [](
const SemanticValues &vs) {
3321 const auto &tok = vs.tokens.front();
3322 return liti(resolve_escape_sequence(tok.data(), tok.size()));
3324 g[
"LiteralD"] = [](
const SemanticValues &vs) {
3325 auto &tok = vs.tokens.front();
3326 return resolve_escape_sequence(tok.data(), tok.size());
3329 g[
"Class"] = [](
const SemanticValues &vs) {
3330 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3333 g[
"NegatedClass"] = [](
const SemanticValues &vs) {
3334 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3335 return ncls(ranges);
3337 g[
"Range"] = [](
const SemanticValues &vs) {
3338 switch (vs.choice()) {
3340 auto s1 = std::any_cast<std::string>(vs[0]);
3341 auto s2 = std::any_cast<std::string>(vs[1]);
3342 auto cp1 = decode_codepoint(s1.data(), s1.length());
3343 auto cp2 = decode_codepoint(s2.data(), s2.length());
3344 return std::pair(cp1, cp2);
3347 auto s = std::any_cast<std::string>(vs[0]);
3348 auto cp = decode_codepoint(s.data(), s.length());
3349 return std::pair(cp, cp);
3352 return std::pair<char32_t, char32_t>(0, 0);
3354 g[
"Char"] = [](
const SemanticValues &vs) {
3355 return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
3358 g[
"AND"] = [](
const SemanticValues &vs) {
return *vs.sv().data(); };
3359 g[
"NOT"] = [](
const SemanticValues &vs) {
return *vs.sv().data(); };
3360 g[
"QUESTION"] = [](
const SemanticValues &vs) {
return *vs.sv().data(); };
3361 g[
"STAR"] = [](
const SemanticValues &vs) {
return *vs.sv().data(); };
3362 g[
"PLUS"] = [](
const SemanticValues &vs) {
return *vs.sv().data(); };
3364 g[
"DOT"] = [](
const SemanticValues & ) {
return dot(); };
3366 g[
"BeginCap"] = [](
const SemanticValues &vs) {
return vs.token(); };
3368 g[
"BackRef"] = [&](
const SemanticValues &vs) {
3369 return bkr(vs.token_to_string());
3372 g[
"Ignore"] = [](
const SemanticValues &vs) {
return vs.size() > 0; };
3374 g[
"Parameters"] = [](
const SemanticValues &vs) {
3375 return vs.transform<std::string>();
3378 g[
"Arguments"] = [](
const SemanticValues &vs) {
3379 return vs.transform<std::shared_ptr<Ope>>();
3382 g[
"PrecedenceClimbing"] = [](
const SemanticValues &vs) {
3383 PrecedenceClimbing::BinOpeInfo binOpeInfo;
3386 auto tokens = std::any_cast<std::vector<std::string_view>>(v);
3387 auto assoc = tokens[0][0];
3388 for (
size_t i = 1; i < tokens.size(); i++) {
3389 binOpeInfo[tokens[i]] = std::pair(level, assoc);
3393 Instruction instruction;
3394 instruction.type =
"precedence";
3395 instruction.data = binOpeInfo;
3398 g[
"PrecedenceInfo"] = [](
const SemanticValues &vs) {
3399 return vs.transform<std::string_view>();
3401 g[
"PrecedenceOpe"] = [](
const SemanticValues &vs) {
return vs.token(); };
3402 g[
"PrecedenceAssoc"] = [](
const SemanticValues &vs) {
return vs.token(); };
3404 g[
"ErrorMessage"] = [](
const SemanticValues &vs) {
3405 Instruction instruction;
3406 instruction.type =
"message";
3407 instruction.data = std::any_cast<std::string>(vs[0]);
3411 g[
"NoAstOpt"] = [](
const SemanticValues & ) {
3412 Instruction instruction;
3413 instruction.type =
"no_ast_opt";
3418 bool apply_precedence_instruction(Definition &rule,
3419 const PrecedenceClimbing::BinOpeInfo &info,
3420 const char *s, Log log) {
3422 auto &seq =
dynamic_cast<Sequence &
>(*rule.get_core_operator());
3423 auto atom = seq.opes_[0];
3424 auto &rep =
dynamic_cast<Repetition &
>(*seq.opes_[1]);
3425 auto &seq1 =
dynamic_cast<Sequence &
>(*rep.ope_);
3426 auto binop = seq1.opes_[0];
3427 auto atom1 = seq1.opes_[1];
3429 auto atom_name =
dynamic_cast<Reference &
>(*atom).name_;
3430 auto binop_name =
dynamic_cast<Reference &
>(*binop).name_;
3431 auto atom1_name =
dynamic_cast<Reference &
>(*atom1).name_;
3433 if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
3435 auto line = line_info(s, rule.s_);
3436 log(line.first, line.second,
3437 "'precedence' instruction cannot be applied to '" + rule.name +
3443 rule.holder_->ope_ = pre(atom, binop, info, rule);
3444 rule.disable_action =
true;
3447 auto line = line_info(s, rule.s_);
3448 log(line.first, line.second,
3449 "'precedence' instruction cannot be applied to '" + rule.name +
3457 std::shared_ptr<Grammar> perform_core(
const char *s,
size_t n,
3458 const Rules &rules, std::string &start,
3461 auto &grammar = *data.grammar;
3467 auto &rule = grammar[RECOVER_DEFINITION_NAME];
3468 rule <= ref(grammar,
"x",
"",
false, {});
3469 rule.name = RECOVER_DEFINITION_NAME;
3470 rule.s_ =
"[native]";
3471 rule.ignoreSemanticValue =
true;
3472 rule.is_macro =
true;
3473 rule.params = {
"x"};
3477 std::any dt = &data;
3478 auto r = g[
"Grammar"].parse(s, n, dt,
nullptr, log);
3482 if (r.error_info.message_pos) {
3483 auto line = line_info(s, r.error_info.message_pos);
3484 log(line.first, line.second, r.error_info.message);
3486 auto line = line_info(s, r.error_info.error_pos);
3487 log(line.first, line.second,
"syntax error");
3494 for (
const auto &x : rules) {
3495 auto name = x.first;
3496 auto ignore =
false;
3497 if (!name.empty() && name[0] ==
'~') {
3501 if (!name.empty()) {
3502 auto &rule = grammar[name];
3505 rule.ignoreSemanticValue = ignore;
3510 auto ret = data.duplicates.empty();
3512 for (
const auto &x : data.duplicates) {
3514 const auto &name = x.first;
3515 auto ptr = x.second;
3516 auto line = line_info(s, ptr);
3517 log(line.first, line.second,
"'" + name +
"' is already defined.");
3523 auto &rule = grammar[data.start];
3524 if (rule.ignoreSemanticValue) {
3526 auto line = line_info(s, rule.s_);
3527 log(line.first, line.second,
3528 "Ignore operator cannot be applied to '" + rule.name +
"'.");
3534 if (!ret) {
return nullptr; }
3537 for (
auto &x : grammar) {
3538 auto &rule = x.second;
3540 ReferenceChecker vis(grammar, rule.params);
3542 for (
const auto &y : vis.error_s) {
3543 const auto &name = y.first;
3544 const auto ptr = y.second;
3546 auto line = line_info(s, ptr);
3547 log(line.first, line.second, vis.error_message[name]);
3553 if (!ret) {
return nullptr; }
3556 for (
auto &x : grammar) {
3557 auto &rule = x.second;
3558 LinkReferences vis(grammar, rule.params);
3565 for (
auto &x : grammar) {
3566 const auto &name = x.first;
3567 auto &rule = x.second;
3569 DetectLeftRecursion vis(name);
3573 auto line = line_info(s, vis.error_s);
3574 log(line.first, line.second,
"'" + name +
"' is left recursive.");
3580 if (!ret) {
return nullptr; }
3583 auto &start_rule = grammar[data.start];
3586 for (
auto &[name, rule] : grammar) {
3587 DetectInfiniteLoop vis(rule.s_, name);
3589 if (vis.has_error) {
3591 auto line = line_info(s, vis.error_s);
3592 log(line.first, line.second,
3593 "infinite loop is detected in '" + vis.error_name +
"'.");
3600 if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
3601 for (
auto &x : grammar) {
3602 auto &rule = x.second;
3603 auto ope = rule.get_core_operator();
3604 if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
3607 start_rule.whitespaceOpe =
3608 wsp(grammar[WHITESPACE_DEFINITION_NAME].get_core_operator());
3612 if (grammar.count(WORD_DEFINITION_NAME)) {
3613 start_rule.wordOpe = grammar[WORD_DEFINITION_NAME].get_core_operator();
3617 for (
const auto &item : data.instructions) {
3618 const auto &name = item.first;
3619 const auto &instruction = item.second;
3620 auto &rule = grammar[name];
3622 if (instruction.type ==
"precedence") {
3624 std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
3626 if (!apply_precedence_instruction(rule, info, s, log)) {
3629 }
else if (instruction.type ==
"message") {
3630 rule.error_message = std::any_cast<std::string>(instruction.data);
3631 }
else if (instruction.type ==
"no_ast_opt") {
3632 rule.no_ast_opt =
true;
3639 return data.grammar;
3649 template <
typename Annotation>
struct AstBase :
public Annotation {
3650 AstBase(
const char *path,
size_t line,
size_t column,
const char *name,
3651 const std::vector<std::shared_ptr<AstBase>> &nodes,
3652 size_t position = 0,
size_t length = 0,
size_t choice_count = 0,
3654 : path(path ? path :
""), line(line), column(column), name(name),
3655 position(position), length(length), choice_count(choice_count),
3656 choice(choice), original_name(name),
3657 original_choice_count(choice_count), original_choice(choice),
3658 tag(str2tag(name)), original_tag(tag), is_token(false), nodes(nodes) {}
3660 AstBase(
const char *path,
size_t line,
size_t column,
const char *name,
3661 const std::string_view &token,
size_t position = 0,
size_t length = 0,
3662 size_t choice_count = 0,
size_t choice = 0)
3663 : path(path ? path :
""), line(line), column(column), name(name),
3664 position(position), length(length), choice_count(choice_count),
3665 choice(choice), original_name(name),
3666 original_choice_count(choice_count), original_choice(choice),
3667 tag(str2tag(name)), original_tag(tag), is_token(true), token(token) {}
3669 AstBase(
const AstBase &ast,
const char *original_name,
size_t position = 0,
3670 size_t length = 0,
size_t original_choice_count = 0,
3671 size_t original_choise = 0)
3672 : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
3673 position(position), length(length), choice_count(ast.choice_count),
3674 choice(ast.choice), original_name(original_name),
3675 original_choice_count(original_choice_count),
3676 original_choice(original_choise), tag(ast.tag),
3677 original_tag(str2tag(original_name)), is_token(ast.is_token),
3678 token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
3680 const std::string path;
3681 const size_t line = 1;
3682 const size_t column = 1;
3684 const std::string name;
3687 const size_t choice_count;
3688 const size_t choice;
3689 const std::string original_name;
3690 const size_t original_choice_count;
3691 const size_t original_choice;
3692 const unsigned int tag;
3693 const unsigned int original_tag;
3695 const bool is_token;
3696 const std::string_view token;
3698 std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
3699 std::weak_ptr<AstBase<Annotation>> parent;
3701 std::string token_to_string()
const {
3703 return std::string(token);
3706 template <
typename T> T token_to_number()
const {
3708 if constexpr (std::is_floating_point<T>::value) {
3710 std::istringstream ss(token_to_string());
3715 std::from_chars(token.data(), token.data() + token.size(), n);
3721 template <
typename T>
3722 void ast_to_s_core(
const std::shared_ptr<T> &ptr, std::string &s,
int level,
3723 std::function<std::string(
const T &ast,
int level)> fn) {
3724 const auto &ast = *ptr;
3725 for (
auto i = 0; i < level; i++) {
3728 auto name = ast.original_name;
3729 if (ast.original_choice_count > 0) {
3730 name +=
"/" + std::to_string(ast.original_choice);
3732 if (ast.name != ast.original_name) { name +=
"[" + ast.name +
"]"; }
3734 s +=
"- " + name +
" (";
3738 s +=
"+ " + name +
"\n";
3740 if (fn) { s += fn(ast, level + 1); }
3741 for (
auto node : ast.nodes) {
3742 ast_to_s_core(node, s, level + 1, fn);
3746 template <
typename T>
3748 ast_to_s(
const std::shared_ptr<T> &ptr,
3749 std::function<std::string(
const T &ast,
int level)> fn =
nullptr) {
3751 ast_to_s_core(ptr, s, 0, fn);
3755 struct AstOptimizer {
3756 AstOptimizer(
bool mode,
const std::vector<std::string> &rules = {})
3757 : mode_(mode), rules_(rules) {}
3759 template <
typename T>
3760 std::shared_ptr<T> optimize(std::shared_ptr<T> original,
3761 std::shared_ptr<T> parent =
nullptr) {
3763 std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
3764 bool opt = mode_ ? !found : found;
3766 if (opt && original->nodes.size() == 1) {
3767 auto child = optimize(original->nodes[0], parent);
3768 return std::make_shared<T>(*child, original->name.data(),
3769 original->choice_count, original->position,
3770 original->length, original->choice);
3773 auto ast = std::make_shared<T>(*original);
3774 ast->parent = parent;
3776 for (
auto node : original->nodes) {
3777 auto child = optimize(node, ast);
3778 ast->nodes.push_back(child);
3785 const std::vector<std::string> rules_;
3788 struct EmptyType {};
3789 using Ast = AstBase<EmptyType>;
3791 template <
typename T = Ast>
void add_ast_action(Definition &rule) {
3792 rule.action = [&](
const SemanticValues &vs) {
3793 auto line = vs.line_info();
3795 if (rule.is_token()) {
3796 return std::make_shared<T>(
3797 vs.path, line.first, line.second, rule.name.data(), vs.token(),
3798 std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
3799 vs.choice_count(), vs.choice());
3803 std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
3804 vs.transform<std::shared_ptr<T>>(),
3805 std::distance(vs.ss, vs.sv().data()),
3806 vs.sv().length(), vs.choice_count(), vs.choice());
3808 for (
auto node : ast->nodes) {
3815 #define PEG_EXPAND(...) __VA_ARGS__
3816 #define PEG_CONCAT(a, b) a##b
3817 #define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
3820 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
3821 a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
3822 a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
3823 a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
3824 a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
3825 a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
3826 a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
3829 #define PEG_COUNT(...) \
3830 PEG_EXPAND(PEG_PICK( \
3831 __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
3832 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
3833 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
3834 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
3835 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
3836 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
3838 #define PEG_DEF_1(r) \
3839 peg::Definition r; \
3841 peg::add_ast_action(r);
3843 #define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
3844 #define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
3845 #define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
3846 #define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
3847 #define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
3848 #define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
3849 #define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
3850 #define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
3851 #define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
3852 #define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
3853 #define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
3854 #define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
3855 #define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
3856 #define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
3857 #define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
3858 #define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
3859 #define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
3860 #define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
3861 #define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
3862 #define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
3863 #define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
3864 #define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
3865 #define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
3866 #define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
3867 #define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
3868 #define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
3869 #define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
3870 #define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
3871 #define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
3872 #define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
3873 #define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
3874 #define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
3875 #define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
3876 #define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
3877 #define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
3878 #define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
3879 #define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
3880 #define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
3881 #define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
3882 #define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
3883 #define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
3884 #define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
3885 #define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
3886 #define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
3887 #define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
3888 #define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
3889 #define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
3890 #define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
3891 #define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
3892 #define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
3893 #define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
3894 #define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
3895 #define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
3896 #define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
3897 #define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
3898 #define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
3899 #define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
3900 #define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
3901 #define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
3902 #define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
3903 #define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
3904 #define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
3905 #define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
3906 #define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
3907 #define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
3908 #define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
3909 #define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
3910 #define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
3911 #define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
3912 #define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
3913 #define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
3914 #define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
3915 #define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
3916 #define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
3917 #define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
3918 #define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
3919 #define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
3920 #define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
3921 #define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
3922 #define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
3923 #define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
3924 #define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
3925 #define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
3926 #define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
3927 #define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
3928 #define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
3929 #define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
3930 #define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
3931 #define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
3932 #define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
3933 #define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
3934 #define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
3935 #define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
3936 #define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
3937 #define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
3938 #define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
3939 #define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
3940 #define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
3941 #define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
3943 #define AST_DEFINITIONS(...) \
3944 PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
3954 parser(
const char *s,
size_t n,
const Rules &rules) {
3955 load_grammar(s, n, rules);
3958 parser(
const char *s,
const Rules &rules) : parser(s, strlen(s), rules) {}
3960 parser(
const char *s,
size_t n) : parser(s, n, Rules()) {}
3962 parser(
const char *s) : parser(s, strlen(s), Rules()) {}
3964 operator bool() {
return grammar_ !=
nullptr; }
3966 bool load_grammar(
const char *s,
size_t n,
const Rules &rules) {
3967 grammar_ = ParserGenerator::parse(s, n, rules, start_, log);
3968 return grammar_ !=
nullptr;
3971 bool load_grammar(
const char *s,
size_t n) {
3972 return load_grammar(s, n, Rules());
3975 bool load_grammar(
const char *s,
const Rules &rules) {
3977 return load_grammar(s, n, rules);
3980 bool load_grammar(
const char *s) {
3982 return load_grammar(s, n);
3985 bool parse_n(
const char *s,
size_t n,
const char *path =
nullptr)
const {
3986 if (grammar_ !=
nullptr) {
3987 const auto &rule = (*grammar_)[start_];
3988 return post_process(s, n, rule.parse(s, n, path, log));
3993 bool parse(
const char *s,
const char *path =
nullptr)
const {
3995 return parse_n(s, n, path);
3998 bool parse_n(
const char *s,
size_t n, std::any &dt,
3999 const char *path =
nullptr)
const {
4000 if (grammar_ !=
nullptr) {
4001 const auto &rule = (*grammar_)[start_];
4002 return post_process(s, n, rule.parse(s, n, dt, path, log));
4007 bool parse(
const char *s, std::any &dt,
const char *path =
nullptr)
const {
4009 return parse_n(s, n, dt, path);
4012 template <
typename T>
4013 bool parse_n(
const char *s,
size_t n, T &val,
4014 const char *path =
nullptr)
const {
4015 if (grammar_ !=
nullptr) {
4016 const auto &rule = (*grammar_)[start_];
4017 return post_process(s, n, rule.parse_and_get_value(s, n, val, path, log));
4022 template <
typename T>
4023 bool parse(
const char *s, T &val,
const char *path =
nullptr)
const {
4025 return parse_n(s, n, val, path);
4028 template <
typename T>
4029 bool parse_n(
const char *s,
size_t n, std::any &dt, T &val,
4030 const char *path =
nullptr)
const {
4031 if (grammar_ !=
nullptr) {
4032 const auto &rule = (*grammar_)[start_];
4033 return post_process(s, n,
4034 rule.parse_and_get_value(s, n, dt, val, path, log));
4039 template <
typename T>
4040 bool parse(
const char *s, std::any &dt, T &val,
4041 const char *path =
nullptr)
const {
4043 return parse_n(s, n, dt, val, path);
4046 Definition &operator[](
const char *s) {
return (*grammar_)[s]; }
4048 const Definition &operator[](
const char *s)
const {
return (*grammar_)[s]; }
4050 std::vector<std::string> get_rule_names()
const {
4051 std::vector<std::string> rules;
4052 for (
auto &[name, _] : *grammar_) {
4053 rules.push_back(name);
4058 void enable_packrat_parsing() {
4059 if (grammar_ !=
nullptr) {
4060 auto &rule = (*grammar_)[start_];
4061 rule.enablePackratParsing =
true;
4065 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
4066 if (grammar_ !=
nullptr) {
4067 auto &rule = (*grammar_)[start_];
4068 rule.tracer_enter = tracer_enter;
4069 rule.tracer_leave = tracer_leave;
4073 template <
typename T = Ast> parser &enable_ast() {
4074 for (
auto &[_, rule] : *grammar_) {
4075 if (!rule.action) { add_ast_action<T>(rule); }
4080 template <
typename T> std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
bool opt_mode =
true)
const {
4081 return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
4087 bool post_process(
const char *s,
size_t n,
4088 const Definition::Result &r)
const {
4089 auto ret = r.ret && r.len == n;
4090 if (log && !ret) { r.error_info.output_log(log, s, n); }
4091 return ret && !r.recovered;
4094 std::vector<std::string> get_no_ast_opt_rules()
const {
4095 std::vector<std::string> rules;
4096 for (
auto &[name, rule] : *grammar_) {
4097 if (rule.no_ast_opt) { rules.push_back(name); }
4102 std::shared_ptr<Grammar> grammar_;