Pteros  2.0
Molecular modeling library for human beings!
peglib.h
1 //
2 // peglib.h
3 //
4 // Copyright (c) 2020 Yuji Hirose. All rights reserved.
5 // MIT License
6 //
7 
8 #pragma once
9 
10 #include <algorithm>
11 #include <any>
12 #include <cassert>
13 #include <cctype>
14 #include <charconv>
15 #include <cstring>
16 #include <functional>
17 #include <initializer_list>
18 #include <iostream>
19 #include <limits>
20 #include <list>
21 #include <map>
22 #include <memory>
23 #include <mutex>
24 #include <set>
25 #include <sstream>
26 #include <string>
27 #include <unordered_map>
28 #include <vector>
29 
30 #if !defined(__cplusplus) || __cplusplus < 201703L
31 #error "Requires complete C++17 support"
32 #endif
33 
34 namespace peg {
35 
36 /*-----------------------------------------------------------------------------
37  * scope_exit
38  *---------------------------------------------------------------------------*/
39 
40 // This is based on
41 // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
42 
43 template <typename EF> struct scope_exit {
44  explicit scope_exit(EF &&f)
45  : exit_function(std::move(f)), execute_on_destruction{true} {}
46 
47  scope_exit(scope_exit &&rhs)
48  : exit_function(std::move(rhs.exit_function)),
49  execute_on_destruction{rhs.execute_on_destruction} {
50  rhs.release();
51  }
52 
53  ~scope_exit() {
54  if (execute_on_destruction) { this->exit_function(); }
55  }
56 
57  void release() { this->execute_on_destruction = false; }
58 
59 private:
60  scope_exit(const scope_exit &) = delete;
61  void operator=(const scope_exit &) = delete;
62  scope_exit &operator=(scope_exit &&) = delete;
63 
64  EF exit_function;
65  bool execute_on_destruction;
66 };
67 
68 /*-----------------------------------------------------------------------------
69  * UTF8 functions
70  *---------------------------------------------------------------------------*/
71 
72 inline size_t codepoint_length(const char *s8, size_t l) {
73  if (l) {
74  auto b = static_cast<uint8_t>(s8[0]);
75  if ((b & 0x80) == 0) {
76  return 1;
77  } else if ((b & 0xE0) == 0xC0 && l >= 2) {
78  return 2;
79  } else if ((b & 0xF0) == 0xE0 && l >= 3) {
80  return 3;
81  } else if ((b & 0xF8) == 0xF0 && l >= 4) {
82  return 4;
83  }
84  }
85  return 0;
86 }
87 
88 inline size_t encode_codepoint(char32_t cp, char *buff) {
89  if (cp < 0x0080) {
90  buff[0] = static_cast<char>(cp & 0x7F);
91  return 1;
92  } else if (cp < 0x0800) {
93  buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
94  buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
95  return 2;
96  } else if (cp < 0xD800) {
97  buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
98  buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
99  buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
100  return 3;
101  } else if (cp < 0xE000) {
102  // D800 - DFFF is invalid...
103  return 0;
104  } else if (cp < 0x10000) {
105  buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
106  buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
107  buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
108  return 3;
109  } else if (cp < 0x110000) {
110  buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
111  buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
112  buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
113  buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
114  return 4;
115  }
116  return 0;
117 }
118 
119 inline std::string encode_codepoint(char32_t cp) {
120  char buff[4];
121  auto l = encode_codepoint(cp, buff);
122  return std::string(buff, l);
123 }
124 
125 inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
126  char32_t &cp) {
127  if (l) {
128  auto b = static_cast<uint8_t>(s8[0]);
129  if ((b & 0x80) == 0) {
130  bytes = 1;
131  cp = b;
132  return true;
133  } else if ((b & 0xE0) == 0xC0) {
134  if (l >= 2) {
135  bytes = 2;
136  cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
137  (static_cast<char32_t>(s8[1] & 0x3F));
138  return true;
139  }
140  } else if ((b & 0xF0) == 0xE0) {
141  if (l >= 3) {
142  bytes = 3;
143  cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
144  ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
145  (static_cast<char32_t>(s8[2] & 0x3F));
146  return true;
147  }
148  } else if ((b & 0xF8) == 0xF0) {
149  if (l >= 4) {
150  bytes = 4;
151  cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
152  ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
153  ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
154  (static_cast<char32_t>(s8[3] & 0x3F));
155  return true;
156  }
157  }
158  }
159  return false;
160 }
161 
162 inline size_t decode_codepoint(const char *s8, size_t l, char32_t &out) {
163  size_t bytes;
164  if (decode_codepoint(s8, l, bytes, out)) { return bytes; }
165  return 0;
166 }
167 
168 inline char32_t decode_codepoint(const char *s8, size_t l) {
169  char32_t out = 0;
170  decode_codepoint(s8, l, out);
171  return out;
172 }
173 
174 inline std::u32string decode(const char *s8, size_t l) {
175  std::u32string out;
176  size_t i = 0;
177  while (i < l) {
178  auto beg = i++;
179  while (i < l && (s8[i] & 0xc0) == 0x80) {
180  i++;
181  }
182  out += decode_codepoint(&s8[beg], (i - beg));
183  }
184  return out;
185 }
186 
187 /*-----------------------------------------------------------------------------
188  * escape_characters
189  *---------------------------------------------------------------------------*/
190 
191 inline std::string escape_characters(const char *s, size_t n) {
192  std::string str;
193  for (size_t i = 0; i < n; i++) {
194  auto c = s[i];
195  switch (c) {
196  case '\n': str += "\\n"; break;
197  case '\r': str += "\\r"; break;
198  case '\t': str += "\\t"; break;
199  default: str += c; break;
200  }
201  }
202  return str;
203 }
204 
205 inline std::string escape_characters(std::string_view sv) {
206  return escape_characters(sv.data(), sv.size());
207 }
208 
209 /*-----------------------------------------------------------------------------
210  * resolve_escape_sequence
211  *---------------------------------------------------------------------------*/
212 
213 inline bool is_hex(char c, int &v) {
214  if ('0' <= c && c <= '9') {
215  v = c - '0';
216  return true;
217  } else if ('a' <= c && c <= 'f') {
218  v = c - 'a' + 10;
219  return true;
220  } else if ('A' <= c && c <= 'F') {
221  v = c - 'A' + 10;
222  return true;
223  }
224  return false;
225 }
226 
227 inline bool is_digit(char c, int &v) {
228  if ('0' <= c && c <= '9') {
229  v = c - '0';
230  return true;
231  }
232  return false;
233 }
234 
235 inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
236  size_t i) {
237  int ret = 0;
238  int val;
239  while (i < n && is_hex(s[i], val)) {
240  ret = static_cast<int>(ret * 16 + val);
241  i++;
242  }
243  return std::pair(ret, i);
244 }
245 
246 inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
247  size_t i) {
248  int ret = 0;
249  int val;
250  while (i < n && is_digit(s[i], val)) {
251  ret = static_cast<int>(ret * 8 + val);
252  i++;
253  }
254  return std::pair(ret, i);
255 }
256 
257 inline std::string resolve_escape_sequence(const char *s, size_t n) {
258  std::string r;
259  r.reserve(n);
260 
261  size_t i = 0;
262  while (i < n) {
263  auto ch = s[i];
264  if (ch == '\\') {
265  i++;
266  if (i == n) { throw std::runtime_error("Invalid escape sequence..."); }
267  switch (s[i]) {
268  case 'n':
269  r += '\n';
270  i++;
271  break;
272  case 'r':
273  r += '\r';
274  i++;
275  break;
276  case 't':
277  r += '\t';
278  i++;
279  break;
280  case '\'':
281  r += '\'';
282  i++;
283  break;
284  case '"':
285  r += '"';
286  i++;
287  break;
288  case '[':
289  r += '[';
290  i++;
291  break;
292  case ']':
293  r += ']';
294  i++;
295  break;
296  case '\\':
297  r += '\\';
298  i++;
299  break;
300  case 'x':
301  case 'u': {
302  char32_t cp;
303  std::tie(cp, i) = parse_hex_number(s, n, i + 1);
304  r += encode_codepoint(cp);
305  break;
306  }
307  default: {
308  char32_t cp;
309  std::tie(cp, i) = parse_octal_number(s, n, i);
310  r += encode_codepoint(cp);
311  break;
312  }
313  }
314  } else {
315  r += ch;
316  i++;
317  }
318  }
319  return r;
320 }
321 
322 /*-----------------------------------------------------------------------------
323  * Trie
324  *---------------------------------------------------------------------------*/
325 
326 class Trie {
327 public:
328  Trie() = default;
329  Trie(const Trie &) = default;
330 
331  Trie(const std::vector<std::string> &items) {
332  for (const auto &item : items) {
333  for (size_t len = 1; len <= item.size(); len++) {
334  auto last = len == item.size();
335  std::string_view sv(item.data(), len);
336  auto it = dic_.find(sv);
337  if (it == dic_.end()) {
338  dic_.emplace(sv, Info{last, last});
339  } else if (last) {
340  it->second.match = true;
341  } else {
342  it->second.done = false;
343  }
344  }
345  }
346  }
347 
348  size_t match(const char *text, size_t text_len) const {
349  size_t match_len = 0;
350  auto done = false;
351  size_t len = 1;
352  while (!done && len <= text_len) {
353  std::string_view sv(text, len);
354  auto it = dic_.find(sv);
355  if (it == dic_.end()) {
356  done = true;
357  } else {
358  if (it->second.match) { match_len = len; }
359  if (it->second.done) { done = true; }
360  }
361  len += 1;
362  }
363  return match_len;
364  }
365 
366 private:
367  struct Info {
368  bool done;
369  bool match;
370  };
371 
372  // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
373  // std::unordered_map<std::string, Info> dic_;
374  std::map<std::string, Info, std::less<>> dic_;
375 };
376 
377 /*-----------------------------------------------------------------------------
378  * PEG
379  *---------------------------------------------------------------------------*/
380 
381 /*
382  * Line information utility function
383  */
384 inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
385  auto p = start;
386  auto col_ptr = p;
387  auto no = 1;
388 
389  while (p < cur) {
390  if (*p == '\n') {
391  no++;
392  col_ptr = p + 1;
393  }
394  p++;
395  }
396 
397  auto col = p - col_ptr + 1;
398 
399  return std::pair(no, col);
400 }
401 
402 /*
403  * String tag
404  */
405 inline constexpr unsigned int str2tag_core(const char *s, size_t l,
406  unsigned int h) {
407  return (l == 0) ? h
408  : str2tag_core(s + 1, l - 1,
409  (h * 33) ^ static_cast<unsigned char>(*s));
410 }
411 
412 inline constexpr unsigned int str2tag(std::string_view sv) {
413  return str2tag_core(sv.data(), sv.size(), 0);
414 }
415 
416 namespace udl {
417 
418 inline constexpr unsigned int operator"" _(const char *s, size_t l) {
419  return str2tag_core(s, l, 0);
420 }
421 
422 } // namespace udl
423 
424 /*
425  * Semantic values
426  */
427 struct SemanticValues : protected std::vector<std::any> {
428  // Input text
429  const char *path = nullptr;
430  const char *ss = nullptr;
431  const std::vector<size_t> *source_line_index = nullptr;
432 
433  // Matched string
434  std::string_view sv() const { return sv_; }
435 
436  // Definition name
437  const std::string &name() const { return name_; }
438 
439  std::vector<unsigned int> tags;
440 
441  // Line number and column at which the matched string is
442  std::pair<size_t, size_t> line_info() const {
443  const auto &idx = *source_line_index;
444 
445  auto cur = static_cast<size_t>(std::distance(ss, sv_.data()));
446  auto it = std::lower_bound(
447  idx.begin(), idx.end(), cur,
448  [](size_t element, size_t value) { return element < value; });
449 
450  auto id = static_cast<size_t>(std::distance(idx.begin(), it));
451  auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1);
452  return std::pair(id + 1, off + 1);
453  }
454 
455  // Choice count
456  size_t choice_count() const { return choice_count_; }
457 
458  // Choice number (0 based index)
459  size_t choice() const { return choice_; }
460 
461  // Tokens
462  std::vector<std::string_view> tokens;
463 
464  std::string_view token(size_t id = 0) const {
465  if (tokens.empty()) { return sv_; }
466  assert(id < tokens.size());
467  return tokens[id];
468  }
469 
470  // Token conversion
471  std::string token_to_string(size_t id = 0) const {
472  return std::string(token(id));
473  }
474 
475  template <typename T> T token_to_number() const {
476  T n = 0;
477  if constexpr (std::is_floating_point<T>::value) {
478  // TODO: The following code should be removed eventually.
479  std::istringstream ss(token_to_string());
480  ss >> n;
481  return n;
482  } else {
483  auto sv = token();
484  std::from_chars(sv.data(), sv.data() + sv.size(), n);
485  return n;
486  }
487  }
488 
489  // Transform the semantic value vector to another vector
490  template <typename T>
491  std::vector<T> transform(size_t beg = 0,
492  size_t end = static_cast<size_t>(-1)) const {
493  std::vector<T> r;
494  end = (std::min)(end, size());
495  for (size_t i = beg; i < end; i++) {
496  r.emplace_back(std::any_cast<T>((*this)[i]));
497  }
498  return r;
499  }
500 
501  using std::vector<std::any>::iterator;
502  using std::vector<std::any>::const_iterator;
503  using std::vector<std::any>::size;
504  using std::vector<std::any>::empty;
505  using std::vector<std::any>::assign;
506  using std::vector<std::any>::begin;
507  using std::vector<std::any>::end;
508  using std::vector<std::any>::rbegin;
509  using std::vector<std::any>::rend;
510  using std::vector<std::any>::operator[];
511  using std::vector<std::any>::at;
512  using std::vector<std::any>::resize;
513  using std::vector<std::any>::front;
514  using std::vector<std::any>::back;
515  using std::vector<std::any>::push_back;
516  using std::vector<std::any>::pop_back;
517  using std::vector<std::any>::insert;
518  using std::vector<std::any>::erase;
519  using std::vector<std::any>::clear;
520  using std::vector<std::any>::swap;
521  using std::vector<std::any>::emplace;
522  using std::vector<std::any>::emplace_back;
523 
524 private:
525  friend class Context;
526  friend class Sequence;
527  friend class PrioritizedChoice;
528  friend class Holder;
529  friend class PrecedenceClimbing;
530 
531  std::string_view sv_;
532  size_t choice_count_ = 0;
533  size_t choice_ = 0;
534  std::string name_;
535 };
536 
537 /*
538  * Semantic action
539  */
540 template <typename F, typename... Args> std::any call(F fn, Args &&... args) {
541  using R = decltype(fn(std::forward<Args>(args)...));
542  if constexpr (std::is_void<R>::value) {
543  fn(std::forward<Args>(args)...);
544  return std::any();
545  } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
546  std::any>::value) {
547  return fn(std::forward<Args>(args)...);
548  } else {
549  return std::any(fn(std::forward<Args>(args)...));
550  }
551 }
552 
553 template <typename T>
554 struct argument_count : argument_count<decltype(&T::operator())> {};
555 template <typename R, typename... Args>
556 struct argument_count<R (*)(Args...)>
557  : std::integral_constant<unsigned, sizeof...(Args)> {};
558 template <typename R, typename C, typename... Args>
559 struct argument_count<R (C::*)(Args...)>
560  : std::integral_constant<unsigned, sizeof...(Args)> {};
561 template <typename R, typename C, typename... Args>
562 struct argument_count<R (C::*)(Args...) const>
563  : std::integral_constant<unsigned, sizeof...(Args)> {};
564 
565 class Action {
566 public:
567  Action() = default;
568  Action(Action &&rhs) = default;
569  template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
570  template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
571  Action &operator=(const Action &rhs) = default;
572 
573  operator bool() const { return bool(fn_); }
574 
575  std::any operator()(SemanticValues &vs, std::any &dt) const {
576  return fn_(vs, dt);
577  }
578 
579 private:
580  using Fty = std::function<std::any(SemanticValues &vs, std::any &dt)>;
581 
582  template <typename F> Fty make_adaptor(F fn) {
583  if constexpr (argument_count<F>::value == 1) {
584  return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); };
585  } else {
586  return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); };
587  }
588  }
589 
590  Fty fn_;
591 };
592 
593 /*
594  * Semantic predicate
595  */
596 // Note: 'parse_error' exception class should be be used in sematic action
597 // handlers to reject the rule.
598 struct parse_error {
599  parse_error() = default;
600  parse_error(const char *s) : s_(s) {}
601  const char *what() const { return s_.empty() ? nullptr : s_.data(); }
602 
603 private:
604  std::string s_;
605 };
606 
607 /*
608  * Parse result helper
609  */
610 inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
611 
612 inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
613 
614 /*
615  * Log
616  */
617 using Log = std::function<void(size_t, size_t, const std::string &)>;
618 
619 /*
620  * ErrorInfo
621  */
622 struct ErrorInfo {
623  const char *error_pos = nullptr;
624  std::vector<std::pair<const char *, bool>> expected_tokens;
625  const char *message_pos = nullptr;
626  std::string message;
627 
628  void clear() {
629  error_pos = nullptr;
630  expected_tokens.clear();
631  message_pos = nullptr;
632  message.clear();
633  }
634 
635  void add(const char *token, bool is_literal) {
636  for (const auto &x : expected_tokens) {
637  if (x.first == token && x.second == is_literal) { return; }
638  }
639  expected_tokens.push_back(std::make_pair(token, is_literal));
640  }
641 
642  void output_log(const Log &log, const char *s, size_t n) const {
643  if (message_pos) {
644  auto line = line_info(s, message_pos);
645  std::string msg;
646  if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
647  !unexpected_token.empty()) {
648  msg = replace_all(message, "%t", unexpected_token);
649  } else {
650  msg = message;
651  }
652  log(line.first, line.second, msg);
653  } else if (error_pos) {
654  auto line = line_info(s, error_pos);
655 
656  std::string msg;
657  if (expected_tokens.empty()) {
658  msg = "syntax error.";
659  } else {
660  msg = "syntax error";
661 
662  // unexpected token
663  if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
664  !unexpected_token.empty()) {
665  msg += ", unexpected '";
666  msg += unexpected_token;
667  msg += "'";
668  }
669 
670  auto first_item = true;
671  size_t i = 0;
672  while (i < expected_tokens.size()) {
673  auto [token, is_literal] =
674  expected_tokens[expected_tokens.size() - i - 1];
675 
676  // Skip rules start with '_'
677  if (!is_literal && token[0] != '_') {
678  msg += (first_item ? ", expecting " : ", ");
679  if (is_literal) {
680  msg += "'";
681  msg += token;
682  msg += "'";
683  } else {
684  msg += "<";
685  msg += token;
686  msg += ">";
687  }
688  first_item = false;
689  }
690 
691  i++;
692  }
693  msg += ".";
694  }
695 
696  log(line.first, line.second, msg);
697  }
698  }
699 
700 private:
701  std::string heuristic_error_token(const char *s, size_t n,
702  const char *error_pos) const {
703  auto len = n - std::distance(s, error_pos);
704  if (len) {
705  size_t i = 0;
706  int c = error_pos[i++];
707  if (!std::ispunct(c) && !std::isspace(c)) {
708  while (i < len && !std::ispunct(error_pos[i]) &&
709  !std::isspace(error_pos[i])) {
710  i++;
711  }
712  }
713  return escape_characters(error_pos, std::min<size_t>(i, 8));
714  }
715  return std::string();
716  }
717 
718  std::string replace_all(std::string str, const std::string &from,
719  const std::string &to) const {
720  size_t pos = 0;
721  while ((pos = str.find(from, pos)) != std::string::npos) {
722  str.replace(pos, from.length(), to);
723  pos += to.length();
724  }
725  return str;
726  }
727 };
728 
729 /*
730  * Context
731  */
732 class Context;
733 class Ope;
734 class Definition;
735 
736 using TracerEnter = std::function<void(const Ope &name, const char *s, size_t n,
737  const SemanticValues &vs,
738  const Context &c, const std::any &dt)>;
739 
740 using TracerLeave = std::function<void(
741  const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
742  const Context &c, const std::any &dt, size_t)>;
743 
744 class Context {
745 public:
746  const char *path;
747  const char *s;
748  const size_t l;
749  std::vector<size_t> source_line_index;
750 
751  ErrorInfo error_info;
752  bool recovered = false;
753 
754  std::vector<std::shared_ptr<SemanticValues>> value_stack;
755  size_t value_stack_size = 0;
756 
757  std::vector<Definition *> rule_stack;
758  std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
759 
760  size_t in_token_boundary_count = 0;
761 
762  std::shared_ptr<Ope> whitespaceOpe;
763  bool in_whitespace = false;
764 
765  std::shared_ptr<Ope> wordOpe;
766 
767  std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
768  size_t capture_scope_stack_size = 0;
769 
770  const size_t def_count;
771  const bool enablePackratParsing;
772  std::vector<bool> cache_registered;
773  std::vector<bool> cache_success;
774 
775  std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
776  cache_values;
777 
778  TracerEnter tracer_enter;
779  TracerLeave tracer_leave;
780 
781  Log log;
782 
783  Context(const char *path, const char *s, size_t l, size_t def_count,
784  std::shared_ptr<Ope> whitespaceOpe, std::shared_ptr<Ope> wordOpe,
785  bool enablePackratParsing, TracerEnter tracer_enter,
786  TracerLeave tracer_leave, Log log)
787  : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe),
788  def_count(def_count), enablePackratParsing(enablePackratParsing),
789  cache_registered(enablePackratParsing ? def_count * (l + 1) : 0),
790  cache_success(enablePackratParsing ? def_count * (l + 1) : 0),
791  tracer_enter(tracer_enter), tracer_leave(tracer_leave), log(log) {
792 
793  for (size_t pos = 0; pos < l; pos++) {
794  if (s[pos] == '\n') { source_line_index.push_back(pos); }
795  }
796  source_line_index.push_back(l);
797 
798  args_stack.resize(1);
799 
800  push_capture_scope();
801  }
802 
803  ~Context() { assert(!value_stack_size); }
804 
805  Context(const Context &) = delete;
806  Context(Context &&) = delete;
807  Context operator=(const Context &) = delete;
808 
809  template <typename T>
810  void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
811  T fn) {
812  if (!enablePackratParsing) {
813  fn(val);
814  return;
815  }
816 
817  auto col = a_s - s;
818  auto idx = def_count * static_cast<size_t>(col) + def_id;
819 
820  if (cache_registered[idx]) {
821  if (cache_success[idx]) {
822  auto key = std::pair(col, def_id);
823  std::tie(len, val) = cache_values[key];
824  return;
825  } else {
826  len = static_cast<size_t>(-1);
827  return;
828  }
829  } else {
830  fn(val);
831  cache_registered[idx] = true;
832  cache_success[idx] = success(len);
833  if (success(len)) {
834  auto key = std::pair(col, def_id);
835  cache_values[key] = std::pair(len, val);
836  }
837  return;
838  }
839  }
840 
841  SemanticValues &push() {
842  assert(value_stack_size <= value_stack.size());
843  if (value_stack_size == value_stack.size()) {
844  value_stack.emplace_back(std::make_shared<SemanticValues>());
845  } else {
846  auto &vs = *value_stack[value_stack_size];
847  if (!vs.empty()) {
848  vs.clear();
849  if (!vs.tags.empty()) { vs.tags.clear(); }
850  }
851  vs.sv_ = std::string_view();
852  vs.choice_count_ = 0;
853  vs.choice_ = 0;
854  if (!vs.tokens.empty()) { vs.tokens.clear(); }
855  }
856 
857  auto &vs = *value_stack[value_stack_size++];
858  vs.path = path;
859  vs.ss = s;
860  vs.source_line_index = &source_line_index;
861  return vs;
862  }
863 
864  void pop() { value_stack_size--; }
865 
866  void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
867  args_stack.emplace_back(args);
868  }
869 
870  void pop_args() { args_stack.pop_back(); }
871 
872  const std::vector<std::shared_ptr<Ope>> &top_args() const {
873  return args_stack[args_stack.size() - 1];
874  }
875 
876  void push_capture_scope() {
877  assert(capture_scope_stack_size <= capture_scope_stack.size());
878  if (capture_scope_stack_size == capture_scope_stack.size()) {
879  capture_scope_stack.emplace_back(
880  std::map<std::string_view, std::string>());
881  } else {
882  auto &cs = capture_scope_stack[capture_scope_stack_size];
883  if (!cs.empty()) { cs.clear(); }
884  }
885  capture_scope_stack_size++;
886  }
887 
888  void pop_capture_scope() { capture_scope_stack_size--; }
889 
890  void shift_capture_values() {
891  assert(capture_scope_stack.size() >= 2);
892  auto curr = &capture_scope_stack[capture_scope_stack_size - 1];
893  auto prev = curr - 1;
894  for (const auto &kv : *curr) {
895  (*prev)[kv.first] = kv.second;
896  }
897  }
898 
899  void set_error_pos(const char *a_s, const char *literal = nullptr);
900 
901  // void trace_enter(const char *name, const char *a_s, size_t n,
902  void trace_enter(const Ope &ope, const char *a_s, size_t n,
903  SemanticValues &vs, std::any &dt) const;
904  // void trace_leave(const char *name, const char *a_s, size_t n,
905  void trace_leave(const Ope &ope, const char *a_s, size_t n,
906  SemanticValues &vs, std::any &dt, size_t len) const;
907  bool is_traceable(const Ope &ope) const;
908 
909  mutable size_t next_trace_id = 0;
910  mutable std::list<size_t> trace_ids;
911 };
912 
913 /*
914  * Parser operators
915  */
916 class Ope {
917 public:
918  struct Visitor;
919 
920  virtual ~Ope() {}
921  size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
922  std::any &dt) const;
923  virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
924  Context &c, std::any &dt) const = 0;
925  virtual void accept(Visitor &v) = 0;
926 };
927 
928 class Sequence : public Ope {
929 public:
930  template <typename... Args>
931  Sequence(const Args &... args)
932  : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
933  Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
934  Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
935 
936  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
937  std::any &dt) const override {
938  auto &chldsv = c.push();
939  auto pop_se = scope_exit([&]() { c.pop(); });
940  size_t i = 0;
941  for (const auto &ope : opes_) {
942  const auto &rule = *ope;
943  auto len = rule.parse(s + i, n - i, chldsv, c, dt);
944  if (fail(len)) { return len; }
945  i += len;
946  }
947  if (!chldsv.empty()) {
948  for (size_t j = 0; j < chldsv.size(); j++) {
949  vs.emplace_back(std::move(chldsv[j]));
950  }
951  }
952  if (!chldsv.tags.empty()) {
953  for (size_t j = 0; j < chldsv.tags.size(); j++) {
954  vs.tags.emplace_back(std::move(chldsv.tags[j]));
955  }
956  }
957  vs.sv_ = chldsv.sv_;
958  if (!chldsv.tokens.empty()) {
959  for (size_t j = 0; j < chldsv.tokens.size(); j++) {
960  vs.tokens.emplace_back(std::move(chldsv.tokens[j]));
961  }
962  }
963  return i;
964  }
965 
966  void accept(Visitor &v) override;
967 
968  std::vector<std::shared_ptr<Ope>> opes_;
969 };
970 
971 class PrioritizedChoice : public Ope {
972 public:
973  template <typename... Args>
974  PrioritizedChoice(const Args &... args)
975  : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
976  PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
977  : opes_(opes) {}
978  PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
979 
980  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
981  std::any &dt) const override {
982  size_t id = 0;
983  for (const auto &ope : opes_) {
984  auto &chldsv = c.push();
985  c.push_capture_scope();
986  auto se = scope_exit([&]() {
987  c.pop();
988  c.pop_capture_scope();
989  });
990 
991  auto len = ope->parse(s, n, chldsv, c, dt);
992  if (success(len)) {
993  if (!chldsv.empty()) {
994  for (size_t i = 0; i < chldsv.size(); i++) {
995  vs.emplace_back(std::move(chldsv[i]));
996  }
997  }
998  if (!chldsv.tags.empty()) {
999  for (size_t i = 0; i < chldsv.tags.size(); i++) {
1000  vs.tags.emplace_back(std::move(chldsv.tags[i]));
1001  }
1002  }
1003  vs.sv_ = chldsv.sv_;
1004  vs.choice_count_ = opes_.size();
1005  vs.choice_ = id;
1006  if (!chldsv.tokens.empty()) {
1007  for (size_t i = 0; i < chldsv.tokens.size(); i++) {
1008  vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
1009  }
1010  }
1011 
1012  c.shift_capture_values();
1013  return len;
1014  }
1015 
1016  id++;
1017  }
1018  return static_cast<size_t>(-1);
1019  }
1020 
1021  void accept(Visitor &v) override;
1022 
1023  size_t size() const { return opes_.size(); }
1024 
1025  std::vector<std::shared_ptr<Ope>> opes_;
1026 };
1027 
1028 class Repetition : public Ope {
1029 public:
1030  Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1031  : ope_(ope), min_(min), max_(max) {}
1032 
1033  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1034  std::any &dt) const override {
1035  size_t count = 0;
1036  size_t i = 0;
1037  while (count < min_) {
1038  c.push_capture_scope();
1039  auto se = scope_exit([&]() { c.pop_capture_scope(); });
1040  const auto &rule = *ope_;
1041  auto len = rule.parse(s + i, n - i, vs, c, dt);
1042  if (success(len)) {
1043  c.shift_capture_values();
1044  } else {
1045  return len;
1046  }
1047  i += len;
1048  count++;
1049  }
1050 
1051  while (n - i > 0 && count < max_) {
1052  c.push_capture_scope();
1053  auto se = scope_exit([&]() { c.pop_capture_scope(); });
1054  auto save_sv_size = vs.size();
1055  auto save_tok_size = vs.tokens.size();
1056  const auto &rule = *ope_;
1057  auto len = rule.parse(s + i, n - i, vs, c, dt);
1058  if (success(len)) {
1059  c.shift_capture_values();
1060  } else {
1061  if (vs.size() != save_sv_size) {
1062  vs.erase(vs.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
1063  vs.tags.erase(vs.tags.begin() +
1064  static_cast<std::ptrdiff_t>(save_sv_size));
1065  }
1066  if (vs.tokens.size() != save_tok_size) {
1067  vs.tokens.erase(vs.tokens.begin() +
1068  static_cast<std::ptrdiff_t>(save_tok_size));
1069  }
1070  break;
1071  }
1072  i += len;
1073  count++;
1074  }
1075  return i;
1076  }
1077 
1078  void accept(Visitor &v) override;
1079 
1080  bool is_zom() const {
1081  return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1082  }
1083 
1084  static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1085  return std::make_shared<Repetition>(ope, 0,
1086  std::numeric_limits<size_t>::max());
1087  }
1088 
1089  static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1090  return std::make_shared<Repetition>(ope, 1,
1091  std::numeric_limits<size_t>::max());
1092  }
1093 
1094  static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1095  return std::make_shared<Repetition>(ope, 0, 1);
1096  }
1097 
1098  std::shared_ptr<Ope> ope_;
1099  size_t min_;
1100  size_t max_;
1101 };
1102 
1103 class AndPredicate : public Ope {
1104 public:
1105  AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1106 
1107  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1108  Context &c, std::any &dt) const override {
1109  auto &chldsv = c.push();
1110  c.push_capture_scope();
1111  auto se = scope_exit([&]() {
1112  c.pop();
1113  c.pop_capture_scope();
1114  });
1115  const auto &rule = *ope_;
1116  auto len = rule.parse(s, n, chldsv, c, dt);
1117  if (success(len)) {
1118  return 0;
1119  } else {
1120  return len;
1121  }
1122  }
1123 
1124  void accept(Visitor &v) override;
1125 
1126  std::shared_ptr<Ope> ope_;
1127 };
1128 
1129 class NotPredicate : public Ope {
1130 public:
1131  NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1132 
1133  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1134  Context &c, std::any &dt) const override {
1135  auto &chldsv = c.push();
1136  c.push_capture_scope();
1137  auto se = scope_exit([&]() {
1138  c.pop();
1139  c.pop_capture_scope();
1140  });
1141  auto len = ope_->parse(s, n, chldsv, c, dt);
1142  if (success(len)) {
1143  c.set_error_pos(s);
1144  return static_cast<size_t>(-1);
1145  } else {
1146  return 0;
1147  }
1148  }
1149 
1150  void accept(Visitor &v) override;
1151 
1152  std::shared_ptr<Ope> ope_;
1153 };
1154 
1155 class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1156 public:
1157  Dictionary(const std::vector<std::string> &v) : trie_(v) {}
1158 
1159  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1160  std::any &dt) const override;
1161 
1162  void accept(Visitor &v) override;
1163 
1164  Trie trie_;
1165 };
1166 
1167 class LiteralString : public Ope,
1168  public std::enable_shared_from_this<LiteralString> {
1169 public:
1170  LiteralString(std::string &&s, bool ignore_case)
1171  : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1172 
1173  LiteralString(const std::string &s, bool ignore_case)
1174  : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1175 
1176  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1177  std::any &dt) const override;
1178 
1179  void accept(Visitor &v) override;
1180 
1181  std::string lit_;
1182  bool ignore_case_;
1183  mutable std::once_flag init_is_word_;
1184  mutable bool is_word_;
1185 };
1186 
1187 class CharacterClass : public Ope,
1188  public std::enable_shared_from_this<CharacterClass> {
1189 public:
1190  CharacterClass(const std::string &s, bool negated) : negated_(negated) {
1191  auto chars = decode(s.data(), s.length());
1192  auto i = 0u;
1193  while (i < chars.size()) {
1194  if (i + 2 < chars.size() && chars[i + 1] == '-') {
1195  auto cp1 = chars[i];
1196  auto cp2 = chars[i + 2];
1197  ranges_.emplace_back(std::pair(cp1, cp2));
1198  i += 3;
1199  } else {
1200  auto cp = chars[i];
1201  ranges_.emplace_back(std::pair(cp, cp));
1202  i += 1;
1203  }
1204  }
1205  assert(!ranges_.empty());
1206  }
1207 
1208  CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1209  bool negated)
1210  : ranges_(ranges), negated_(negated) {
1211  assert(!ranges_.empty());
1212  }
1213 
1214  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1215  Context &c, std::any & /*dt*/) const override {
1216  if (n < 1) {
1217  c.set_error_pos(s);
1218  return static_cast<size_t>(-1);
1219  }
1220 
1221  char32_t cp = 0;
1222  auto len = decode_codepoint(s, n, cp);
1223 
1224  for (const auto &range : ranges_) {
1225  if (range.first <= cp && cp <= range.second) {
1226  if (negated_) {
1227  c.set_error_pos(s);
1228  return static_cast<size_t>(-1);
1229  } else {
1230  return len;
1231  }
1232  }
1233  }
1234 
1235  if (negated_) {
1236  return len;
1237  } else {
1238  c.set_error_pos(s);
1239  return static_cast<size_t>(-1);
1240  }
1241  }
1242 
1243  void accept(Visitor &v) override;
1244 
1245  std::vector<std::pair<char32_t, char32_t>> ranges_;
1246  bool negated_;
1247 };
1248 
1249 class Character : public Ope, public std::enable_shared_from_this<Character> {
1250 public:
1251  Character(char ch) : ch_(ch) {}
1252 
1253  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1254  Context &c, std::any & /*dt*/) const override {
1255  if (n < 1 || s[0] != ch_) {
1256  c.set_error_pos(s);
1257  return static_cast<size_t>(-1);
1258  }
1259  return 1;
1260  }
1261 
1262  void accept(Visitor &v) override;
1263 
1264  char ch_;
1265 };
1266 
1267 class AnyCharacter : public Ope,
1268  public std::enable_shared_from_this<AnyCharacter> {
1269 public:
1270  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1271  Context &c, std::any & /*dt*/) const override {
1272  auto len = codepoint_length(s, n);
1273  if (len < 1) {
1274  c.set_error_pos(s);
1275  return static_cast<size_t>(-1);
1276  }
1277  return len;
1278  }
1279 
1280  void accept(Visitor &v) override;
1281 };
1282 
1283 class CaptureScope : public Ope {
1284 public:
1285  CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1286 
1287  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1288  std::any &dt) const override {
1289  c.push_capture_scope();
1290  auto se = scope_exit([&]() { c.pop_capture_scope(); });
1291  const auto &rule = *ope_;
1292  auto len = rule.parse(s, n, vs, c, dt);
1293  return len;
1294  }
1295 
1296  void accept(Visitor &v) override;
1297 
1298  std::shared_ptr<Ope> ope_;
1299 };
1300 
1301 class Capture : public Ope {
1302 public:
1303  using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1304 
1305  Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1306  : ope_(ope), match_action_(ma) {}
1307 
1308  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1309  std::any &dt) const override {
1310  const auto &rule = *ope_;
1311  auto len = rule.parse(s, n, vs, c, dt);
1312  if (success(len) && match_action_) { match_action_(s, len, c); }
1313  return len;
1314  }
1315 
1316  void accept(Visitor &v) override;
1317 
1318  std::shared_ptr<Ope> ope_;
1319  MatchAction match_action_;
1320 };
1321 
1322 class TokenBoundary : public Ope {
1323 public:
1324  TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1325 
1326  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1327  std::any &dt) const override;
1328 
1329  void accept(Visitor &v) override;
1330 
1331  std::shared_ptr<Ope> ope_;
1332 };
1333 
1334 class Ignore : public Ope {
1335 public:
1336  Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1337 
1338  size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1339  Context &c, std::any &dt) const override {
1340  const auto &rule = *ope_;
1341  auto &chldsv = c.push();
1342  auto se = scope_exit([&]() { c.pop(); });
1343  return rule.parse(s, n, chldsv, c, dt);
1344  }
1345 
1346  void accept(Visitor &v) override;
1347 
1348  std::shared_ptr<Ope> ope_;
1349 };
1350 
1351 using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1352  std::any &dt)>;
1353 
1354 class User : public Ope {
1355 public:
1356  User(Parser fn) : fn_(fn) {}
1357  size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1358  Context & /*c*/, std::any &dt) const override {
1359  assert(fn_);
1360  return fn_(s, n, vs, dt);
1361  }
1362  void accept(Visitor &v) override;
1363  std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1364  std::any &dt)>
1365  fn_;
1366 };
1367 
1368 class WeakHolder : public Ope {
1369 public:
1370  WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1371 
1372  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1373  std::any &dt) const override {
1374  auto ope = weak_.lock();
1375  assert(ope);
1376  const auto &rule = *ope;
1377  return rule.parse(s, n, vs, c, dt);
1378  }
1379 
1380  void accept(Visitor &v) override;
1381 
1382  std::weak_ptr<Ope> weak_;
1383 };
1384 
1385 class Holder : public Ope {
1386 public:
1387  Holder(Definition *outer) : outer_(outer) {}
1388 
1389  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1390  std::any &dt) const override;
1391 
1392  void accept(Visitor &v) override;
1393 
1394  std::any reduce(SemanticValues &vs, std::any &dt) const;
1395 
1396  const char *trace_name() const;
1397 
1398  std::shared_ptr<Ope> ope_;
1399  Definition *outer_;
1400  mutable std::string trace_name_;
1401 
1402  friend class Definition;
1403 };
1404 
1405 using Grammar = std::unordered_map<std::string, Definition>;
1406 
1407 class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1408 public:
1409  Reference(const Grammar &grammar, const std::string &name, const char *s,
1410  bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1411  : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1412  rule_(nullptr), iarg_(0) {}
1413 
1414  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1415  std::any &dt) const override;
1416 
1417  void accept(Visitor &v) override;
1418 
1419  std::shared_ptr<Ope> get_core_operator() const;
1420 
1421  const Grammar &grammar_;
1422  const std::string name_;
1423  const char *s_;
1424 
1425  const bool is_macro_;
1426  const std::vector<std::shared_ptr<Ope>> args_;
1427 
1428  Definition *rule_;
1429  size_t iarg_;
1430 };
1431 
1432 class Whitespace : public Ope {
1433 public:
1434  Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1435 
1436  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1437  std::any &dt) const override {
1438  if (c.in_whitespace) { return 0; }
1439  c.in_whitespace = true;
1440  auto se = scope_exit([&]() { c.in_whitespace = false; });
1441  const auto &rule = *ope_;
1442  return rule.parse(s, n, vs, c, dt);
1443  }
1444 
1445  void accept(Visitor &v) override;
1446 
1447  std::shared_ptr<Ope> ope_;
1448 };
1449 
1450 class BackReference : public Ope {
1451 public:
1452  BackReference(std::string &&name) : name_(name) {}
1453 
1454  BackReference(const std::string &name) : name_(name) {}
1455 
1456  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1457  std::any &dt) const override;
1458 
1459  void accept(Visitor &v) override;
1460 
1461  std::string name_;
1462 };
1463 
1464 class PrecedenceClimbing : public Ope {
1465 public:
1466  using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1467 
1468  PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1469  const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1470  const Definition &rule)
1471  : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1472 
1473  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1474  std::any &dt) const override {
1475  return parse_expression(s, n, vs, c, dt, 0);
1476  }
1477 
1478  void accept(Visitor &v) override;
1479 
1480  std::shared_ptr<Ope> atom_;
1481  std::shared_ptr<Ope> binop_;
1482  BinOpeInfo info_;
1483  const Definition &rule_;
1484 
1485 private:
1486  size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1487  Context &c, std::any &dt, size_t min_prec) const;
1488 
1489  Definition &get_reference_for_binop(Context &c) const;
1490 };
1491 
1492 class Recovery : public Ope {
1493 public:
1494  Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1495 
1496  size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1497  std::any &dt) const override;
1498 
1499  void accept(Visitor &v) override;
1500 
1501  std::shared_ptr<Ope> ope_;
1502 };
1503 
1504 /*
1505  * Factories
1506  */
1507 template <typename... Args> std::shared_ptr<Ope> seq(Args &&... args) {
1508  return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1509 }
1510 
1511 template <typename... Args> std::shared_ptr<Ope> cho(Args &&... args) {
1512  return std::make_shared<PrioritizedChoice>(
1513  static_cast<std::shared_ptr<Ope>>(args)...);
1514 }
1515 
1516 inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1517  return Repetition::zom(ope);
1518 }
1519 
1520 inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1521  return Repetition::oom(ope);
1522 }
1523 
1524 inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1525  return Repetition::opt(ope);
1526 }
1527 
1528 inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1529  size_t max) {
1530  return std::make_shared<Repetition>(ope, min, max);
1531 }
1532 
1533 inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1534  return std::make_shared<AndPredicate>(ope);
1535 }
1536 
1537 inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1538  return std::make_shared<NotPredicate>(ope);
1539 }
1540 
1541 inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v) {
1542  return std::make_shared<Dictionary>(v);
1543 }
1544 
1545 inline std::shared_ptr<Ope> lit(std::string &&s) {
1546  return std::make_shared<LiteralString>(s, false);
1547 }
1548 
1549 inline std::shared_ptr<Ope> liti(std::string &&s) {
1550  return std::make_shared<LiteralString>(s, true);
1551 }
1552 
1553 inline std::shared_ptr<Ope> cls(const std::string &s) {
1554  return std::make_shared<CharacterClass>(s, false);
1555 }
1556 
1557 inline std::shared_ptr<Ope>
1558 cls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1559  return std::make_shared<CharacterClass>(ranges, false);
1560 }
1561 
1562 inline std::shared_ptr<Ope> ncls(const std::string &s) {
1563  return std::make_shared<CharacterClass>(s, true);
1564 }
1565 
1566 inline std::shared_ptr<Ope>
1567 ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1568  return std::make_shared<CharacterClass>(ranges, true);
1569 }
1570 
1571 inline std::shared_ptr<Ope> chr(char dt) {
1572  return std::make_shared<Character>(dt);
1573 }
1574 
1575 inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1576 
1577 inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1578  return std::make_shared<CaptureScope>(ope);
1579 }
1580 
1581 inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1582  Capture::MatchAction ma) {
1583  return std::make_shared<Capture>(ope, ma);
1584 }
1585 
1586 inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1587  return std::make_shared<TokenBoundary>(ope);
1588 }
1589 
1590 inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1591  return std::make_shared<Ignore>(ope);
1592 }
1593 
1594 inline std::shared_ptr<Ope>
1595 usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1596  std::any &dt)>
1597  fn) {
1598  return std::make_shared<User>(fn);
1599 }
1600 
1601 inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1602  const char *s, bool is_macro,
1603  const std::vector<std::shared_ptr<Ope>> &args) {
1604  return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1605 }
1606 
1607 inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1608  return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1609 }
1610 
1611 inline std::shared_ptr<Ope> bkr(std::string &&name) {
1612  return std::make_shared<BackReference>(name);
1613 }
1614 
1615 inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1616  const std::shared_ptr<Ope> &binop,
1617  const PrecedenceClimbing::BinOpeInfo &info,
1618  const Definition &rule) {
1619  return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1620 }
1621 
1622 inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1623  return std::make_shared<Recovery>(ope);
1624 }
1625 
1626 /*
1627  * Visitor
1628  */
1629 struct Ope::Visitor {
1630  virtual ~Visitor() {}
1631  virtual void visit(Sequence &) {}
1632  virtual void visit(PrioritizedChoice &) {}
1633  virtual void visit(Repetition &) {}
1634  virtual void visit(AndPredicate &) {}
1635  virtual void visit(NotPredicate &) {}
1636  virtual void visit(Dictionary &) {}
1637  virtual void visit(LiteralString &) {}
1638  virtual void visit(CharacterClass &) {}
1639  virtual void visit(Character &) {}
1640  virtual void visit(AnyCharacter &) {}
1641  virtual void visit(CaptureScope &) {}
1642  virtual void visit(Capture &) {}
1643  virtual void visit(TokenBoundary &) {}
1644  virtual void visit(Ignore &) {}
1645  virtual void visit(User &) {}
1646  virtual void visit(WeakHolder &) {}
1647  virtual void visit(Holder &) {}
1648  virtual void visit(Reference &) {}
1649  virtual void visit(Whitespace &) {}
1650  virtual void visit(BackReference &) {}
1651  virtual void visit(PrecedenceClimbing &) {}
1652  virtual void visit(Recovery &) {}
1653 };
1654 
1655 struct IsReference : public Ope::Visitor {
1656  void visit(Reference &) override { is_reference_ = true; }
1657 
1658  static bool check(Ope &ope) {
1659  IsReference vis;
1660  ope.accept(vis);
1661  return vis.is_reference_;
1662  }
1663 
1664 private:
1665  bool is_reference_ = false;
1666 };
1667 
1668 struct TraceOpeName : public Ope::Visitor {
1669  void visit(Sequence &) override { name_ = "Sequence"; }
1670  void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
1671  void visit(Repetition &) override { name_ = "Repetition"; }
1672  void visit(AndPredicate &) override { name_ = "AndPredicate"; }
1673  void visit(NotPredicate &) override { name_ = "NotPredicate"; }
1674  void visit(Dictionary &) override { name_ = "Dictionary"; }
1675  void visit(LiteralString &) override { name_ = "LiteralString"; }
1676  void visit(CharacterClass &) override { name_ = "CharacterClass"; }
1677  void visit(Character &) override { name_ = "Character"; }
1678  void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
1679  void visit(CaptureScope &) override { name_ = "CaptureScope"; }
1680  void visit(Capture &) override { name_ = "Capture"; }
1681  void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
1682  void visit(Ignore &) override { name_ = "Ignore"; }
1683  void visit(User &) override { name_ = "User"; }
1684  void visit(WeakHolder &) override { name_ = "WeakHolder"; }
1685  void visit(Holder &ope) override { name_ = ope.trace_name(); }
1686  void visit(Reference &) override { name_ = "Reference"; }
1687  void visit(Whitespace &) override { name_ = "Whitespace"; }
1688  void visit(BackReference &) override { name_ = "BackReference"; }
1689  void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
1690  void visit(Recovery &) override { name_ = "Recovery"; }
1691 
1692  static std::string get(Ope &ope) {
1693  TraceOpeName vis;
1694  ope.accept(vis);
1695  return vis.name_;
1696  }
1697 
1698 private:
1699  const char *name_ = nullptr;
1700 };
1701 
1702 struct AssignIDToDefinition : public Ope::Visitor {
1703  void visit(Sequence &ope) override {
1704  for (auto op : ope.opes_) {
1705  op->accept(*this);
1706  }
1707  }
1708  void visit(PrioritizedChoice &ope) override {
1709  for (auto op : ope.opes_) {
1710  op->accept(*this);
1711  }
1712  }
1713  void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1714  void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1715  void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1716  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1717  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1718  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1719  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1720  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1721  void visit(Holder &ope) override;
1722  void visit(Reference &ope) override;
1723  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1724  void visit(PrecedenceClimbing &ope) override;
1725  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1726 
1727  std::unordered_map<void *, size_t> ids;
1728 };
1729 
1730 struct IsLiteralToken : public Ope::Visitor {
1731  void visit(PrioritizedChoice &ope) override {
1732  for (auto op : ope.opes_) {
1733  if (!IsLiteralToken::check(*op)) { return; }
1734  }
1735  result_ = true;
1736  }
1737 
1738  void visit(Dictionary &) override { result_ = true; }
1739  void visit(LiteralString &) override { result_ = true; }
1740 
1741  static bool check(Ope &ope) {
1742  IsLiteralToken vis;
1743  ope.accept(vis);
1744  return vis.result_;
1745  }
1746 
1747 private:
1748  bool result_ = false;
1749 };
1750 
1751 struct TokenChecker : public Ope::Visitor {
1752  void visit(Sequence &ope) override {
1753  for (auto op : ope.opes_) {
1754  op->accept(*this);
1755  }
1756  }
1757  void visit(PrioritizedChoice &ope) override {
1758  for (auto op : ope.opes_) {
1759  op->accept(*this);
1760  }
1761  }
1762  void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1763  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1764  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1765  void visit(TokenBoundary &) override { has_token_boundary_ = true; }
1766  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1767  void visit(WeakHolder &) override { has_rule_ = true; }
1768  void visit(Holder &ope) override { ope.ope_->accept(*this); }
1769  void visit(Reference &ope) override;
1770  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1771  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1772  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1773 
1774  static bool is_token(Ope &ope) {
1775  if (IsLiteralToken::check(ope)) { return true; }
1776 
1777  TokenChecker vis;
1778  ope.accept(vis);
1779  return vis.has_token_boundary_ || !vis.has_rule_;
1780  }
1781 
1782 private:
1783  bool has_token_boundary_ = false;
1784  bool has_rule_ = false;
1785 };
1786 
1787 struct FindLiteralToken : public Ope::Visitor {
1788  void visit(LiteralString &ope) override { token_ = ope.lit_.c_str(); }
1789  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1790  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1791  void visit(Reference &ope) override;
1792  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1793 
1794  static const char *token(Ope &ope) {
1795  FindLiteralToken vis;
1796  ope.accept(vis);
1797  return vis.token_;
1798  }
1799 
1800 private:
1801  const char *token_ = nullptr;
1802 };
1803 
1804 struct DetectLeftRecursion : public Ope::Visitor {
1805  DetectLeftRecursion(const std::string &name) : name_(name) {}
1806 
1807  void visit(Sequence &ope) override {
1808  for (auto op : ope.opes_) {
1809  op->accept(*this);
1810  if (done_) {
1811  break;
1812  } else if (error_s) {
1813  done_ = true;
1814  break;
1815  }
1816  }
1817  }
1818  void visit(PrioritizedChoice &ope) override {
1819  for (auto op : ope.opes_) {
1820  op->accept(*this);
1821  if (error_s) {
1822  done_ = true;
1823  break;
1824  }
1825  }
1826  }
1827  void visit(Repetition &ope) override {
1828  ope.ope_->accept(*this);
1829  done_ = ope.min_ > 0;
1830  }
1831  void visit(AndPredicate &ope) override {
1832  ope.ope_->accept(*this);
1833  done_ = false;
1834  }
1835  void visit(NotPredicate &ope) override {
1836  ope.ope_->accept(*this);
1837  done_ = false;
1838  }
1839  void visit(Dictionary &) override { done_ = true; }
1840  void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
1841  void visit(CharacterClass &) override { done_ = true; }
1842  void visit(Character &) override { done_ = true; }
1843  void visit(AnyCharacter &) override { done_ = true; }
1844  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1845  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1846  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1847  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1848  void visit(User &) override { done_ = true; }
1849  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1850  void visit(Holder &ope) override { ope.ope_->accept(*this); }
1851  void visit(Reference &ope) override;
1852  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1853  void visit(BackReference &) override { done_ = true; }
1854  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1855  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1856 
1857  const char *error_s = nullptr;
1858 
1859 private:
1860  std::string name_;
1861  std::set<std::string> refs_;
1862  bool done_ = false;
1863 };
1864 
1865 struct HasEmptyElement : public Ope::Visitor {
1866  HasEmptyElement(std::list<std::pair<const char *, std::string>> &refs)
1867  : refs_(refs) {}
1868 
1869  void visit(Sequence &ope) override {
1870  auto save_is_empty = false;
1871  const char *save_error_s = nullptr;
1872  std::string save_error_name;
1873  for (auto op : ope.opes_) {
1874  op->accept(*this);
1875  if (!is_empty) { return; }
1876  save_is_empty = is_empty;
1877  save_error_s = error_s;
1878  save_error_name = error_name;
1879  is_empty = false;
1880  error_name.clear();
1881  }
1882  is_empty = save_is_empty;
1883  error_s = save_error_s;
1884  error_name = save_error_name;
1885  }
1886  void visit(PrioritizedChoice &ope) override {
1887  for (auto op : ope.opes_) {
1888  op->accept(*this);
1889  if (is_empty) { return; }
1890  }
1891  }
1892  void visit(Repetition &ope) override {
1893  if (ope.min_ == 0) {
1894  set_error();
1895  } else {
1896  ope.ope_->accept(*this);
1897  }
1898  }
1899  void visit(AndPredicate &) override { set_error(); }
1900  void visit(NotPredicate &) override { set_error(); }
1901  void visit(LiteralString &ope) override {
1902  if (ope.lit_.empty()) { set_error(); }
1903  }
1904  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1905  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1906  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1907  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1908  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1909  void visit(Holder &ope) override { ope.ope_->accept(*this); }
1910  void visit(Reference &ope) override;
1911  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1912  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1913  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1914 
1915  bool is_empty = false;
1916  const char *error_s = nullptr;
1917  std::string error_name;
1918 
1919 private:
1920  void set_error() {
1921  is_empty = true;
1922  error_s = refs_.back().first;
1923  error_name = refs_.back().second;
1924  }
1925  std::list<std::pair<const char *, std::string>> &refs_;
1926 };
1927 
1928 struct DetectInfiniteLoop : public Ope::Visitor {
1929  DetectInfiniteLoop(const char *s, const std::string &name) {
1930  refs_.emplace_back(s, name);
1931  }
1932 
1933  void visit(Sequence &ope) override {
1934  for (auto op : ope.opes_) {
1935  op->accept(*this);
1936  if (has_error) { return; }
1937  }
1938  }
1939  void visit(PrioritizedChoice &ope) override {
1940  for (auto op : ope.opes_) {
1941  op->accept(*this);
1942  if (has_error) { return; }
1943  }
1944  }
1945  void visit(Repetition &ope) override {
1946  if (ope.max_ == std::numeric_limits<size_t>::max()) {
1947  HasEmptyElement vis(refs_);
1948  ope.ope_->accept(vis);
1949  if (vis.is_empty) {
1950  has_error = true;
1951  error_s = vis.error_s;
1952  error_name = vis.error_name;
1953  }
1954  } else {
1955  ope.ope_->accept(*this);
1956  }
1957  }
1958  void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1959  void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1960  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1961  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1962  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1963  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1964  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1965  void visit(Holder &ope) override { ope.ope_->accept(*this); }
1966  void visit(Reference &ope) override;
1967  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1968  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1969  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1970 
1971  bool has_error = false;
1972  const char *error_s = nullptr;
1973  std::string error_name;
1974 
1975 private:
1976  std::list<std::pair<const char *, std::string>> refs_;
1977 };
1978 
1979 struct ReferenceChecker : public Ope::Visitor {
1980  ReferenceChecker(const Grammar &grammar,
1981  const std::vector<std::string> &params)
1982  : grammar_(grammar), params_(params) {}
1983 
1984  void visit(Sequence &ope) override {
1985  for (auto op : ope.opes_) {
1986  op->accept(*this);
1987  }
1988  }
1989  void visit(PrioritizedChoice &ope) override {
1990  for (auto op : ope.opes_) {
1991  op->accept(*this);
1992  }
1993  }
1994  void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1995  void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1996  void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1997  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1998  void visit(Capture &ope) override { ope.ope_->accept(*this); }
1999  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2000  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2001  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2002  void visit(Holder &ope) override { ope.ope_->accept(*this); }
2003  void visit(Reference &ope) override;
2004  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2005  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2006  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2007 
2008  std::unordered_map<std::string, const char *> error_s;
2009  std::unordered_map<std::string, std::string> error_message;
2010 
2011 private:
2012  const Grammar &grammar_;
2013  const std::vector<std::string> &params_;
2014 };
2015 
2016 struct LinkReferences : public Ope::Visitor {
2017  LinkReferences(Grammar &grammar, const std::vector<std::string> &params)
2018  : grammar_(grammar), params_(params) {}
2019 
2020  void visit(Sequence &ope) override {
2021  for (auto op : ope.opes_) {
2022  op->accept(*this);
2023  }
2024  }
2025  void visit(PrioritizedChoice &ope) override {
2026  for (auto op : ope.opes_) {
2027  op->accept(*this);
2028  }
2029  }
2030  void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2031  void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2032  void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2033  void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2034  void visit(Capture &ope) override { ope.ope_->accept(*this); }
2035  void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2036  void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2037  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2038  void visit(Holder &ope) override { ope.ope_->accept(*this); }
2039  void visit(Reference &ope) override;
2040  void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2041  void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2042  void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2043 
2044 private:
2045  Grammar &grammar_;
2046  const std::vector<std::string> &params_;
2047 };
2048 
2049 struct FindReference : public Ope::Visitor {
2050  FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2051  const std::vector<std::string> &params)
2052  : args_(args), params_(params) {}
2053 
2054  void visit(Sequence &ope) override {
2055  std::vector<std::shared_ptr<Ope>> opes;
2056  for (auto o : ope.opes_) {
2057  o->accept(*this);
2058  opes.push_back(found_ope);
2059  }
2060  found_ope = std::make_shared<Sequence>(opes);
2061  }
2062  void visit(PrioritizedChoice &ope) override {
2063  std::vector<std::shared_ptr<Ope>> opes;
2064  for (auto o : ope.opes_) {
2065  o->accept(*this);
2066  opes.push_back(found_ope);
2067  }
2068  found_ope = std::make_shared<PrioritizedChoice>(opes);
2069  }
2070  void visit(Repetition &ope) override {
2071  ope.ope_->accept(*this);
2072  found_ope = rep(found_ope, ope.min_, ope.max_);
2073  }
2074  void visit(AndPredicate &ope) override {
2075  ope.ope_->accept(*this);
2076  found_ope = apd(found_ope);
2077  }
2078  void visit(NotPredicate &ope) override {
2079  ope.ope_->accept(*this);
2080  found_ope = npd(found_ope);
2081  }
2082  void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2083  void visit(LiteralString &ope) override {
2084  found_ope = ope.shared_from_this();
2085  }
2086  void visit(CharacterClass &ope) override {
2087  found_ope = ope.shared_from_this();
2088  }
2089  void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2090  void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2091  void visit(CaptureScope &ope) override {
2092  ope.ope_->accept(*this);
2093  found_ope = csc(found_ope);
2094  }
2095  void visit(Capture &ope) override {
2096  ope.ope_->accept(*this);
2097  found_ope = cap(found_ope, ope.match_action_);
2098  }
2099  void visit(TokenBoundary &ope) override {
2100  ope.ope_->accept(*this);
2101  found_ope = tok(found_ope);
2102  }
2103  void visit(Ignore &ope) override {
2104  ope.ope_->accept(*this);
2105  found_ope = ign(found_ope);
2106  }
2107  void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2108  void visit(Holder &ope) override { ope.ope_->accept(*this); }
2109  void visit(Reference &ope) override;
2110  void visit(Whitespace &ope) override {
2111  ope.ope_->accept(*this);
2112  found_ope = wsp(found_ope);
2113  }
2114  void visit(PrecedenceClimbing &ope) override {
2115  ope.atom_->accept(*this);
2116  found_ope = csc(found_ope);
2117  }
2118  void visit(Recovery &ope) override {
2119  ope.ope_->accept(*this);
2120  found_ope = rec(found_ope);
2121  }
2122 
2123  std::shared_ptr<Ope> found_ope;
2124 
2125 private:
2126  const std::vector<std::shared_ptr<Ope>> &args_;
2127  const std::vector<std::string> &params_;
2128 };
2129 
2130 struct IsPrioritizedChoice : public Ope::Visitor {
2131  void visit(PrioritizedChoice &) override { result_ = true; }
2132 
2133  static bool check(Ope &ope) {
2134  IsPrioritizedChoice vis;
2135  ope.accept(vis);
2136  return vis.result_;
2137  }
2138 
2139 private:
2140  bool result_ = false;
2141 };
2142 
2143 /*
2144  * Keywords
2145  */
2146 static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2147 static const char *WORD_DEFINITION_NAME = "%word";
2148 static const char *RECOVER_DEFINITION_NAME = "%recover";
2149 
2150 /*
2151  * Definition
2152  */
2153 class Definition {
2154 public:
2155  struct Result {
2156  bool ret;
2157  bool recovered;
2158  size_t len;
2159  ErrorInfo error_info;
2160  };
2161 
2162  Definition() : holder_(std::make_shared<Holder>(this)) {}
2163 
2164  Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2165  holder_->outer_ = this;
2166  }
2167 
2168  Definition(const std::shared_ptr<Ope> &ope)
2169  : holder_(std::make_shared<Holder>(this)) {
2170  *this <= ope;
2171  }
2172 
2173  operator std::shared_ptr<Ope>() {
2174  return std::make_shared<WeakHolder>(holder_);
2175  }
2176 
2177  Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2178  holder_->ope_ = ope;
2179  return *this;
2180  }
2181 
2182  Result parse(const char *s, size_t n, const char *path = nullptr,
2183  Log log = nullptr) const {
2184  SemanticValues vs;
2185  std::any dt;
2186  return parse_core(s, n, vs, dt, path, log);
2187  }
2188 
2189  Result parse(const char *s, const char *path = nullptr,
2190  Log log = nullptr) const {
2191  auto n = strlen(s);
2192  return parse(s, n, path, log);
2193  }
2194 
2195  Result parse(const char *s, size_t n, std::any &dt,
2196  const char *path = nullptr, Log log = nullptr) const {
2197  SemanticValues vs;
2198  return parse_core(s, n, vs, dt, path, log);
2199  }
2200 
2201  Result parse(const char *s, std::any &dt, const char *path = nullptr,
2202  Log log = nullptr) const {
2203  auto n = strlen(s);
2204  return parse(s, n, dt, path, log);
2205  }
2206 
2207  template <typename T>
2208  Result parse_and_get_value(const char *s, size_t n, T &val,
2209  const char *path = nullptr,
2210  Log log = nullptr) const {
2211  SemanticValues vs;
2212  std::any dt;
2213  auto r = parse_core(s, n, vs, dt, path, log);
2214  if (r.ret && !vs.empty() && vs.front().has_value()) {
2215  val = std::any_cast<T>(vs[0]);
2216  }
2217  return r;
2218  }
2219 
2220  template <typename T>
2221  Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2222  Log log = nullptr) const {
2223  auto n = strlen(s);
2224  return parse_and_get_value(s, n, val, path, log);
2225  }
2226 
2227  template <typename T>
2228  Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2229  const char *path = nullptr,
2230  Log log = nullptr) const {
2231  SemanticValues vs;
2232  auto r = parse_core(s, n, vs, dt, path, log);
2233  if (r.ret && !vs.empty() && vs.front().has_value()) {
2234  val = std::any_cast<T>(vs[0]);
2235  }
2236  return r;
2237  }
2238 
2239  template <typename T>
2240  Result parse_and_get_value(const char *s, std::any &dt, T &val,
2241  const char *path = nullptr,
2242  Log log = nullptr) const {
2243  auto n = strlen(s);
2244  return parse_and_get_value(s, n, dt, val, path, log);
2245  }
2246 
2247  void operator=(Action a) { action = a; }
2248 
2249  template <typename T> Definition &operator,(T fn) {
2250  operator=(fn);
2251  return *this;
2252  }
2253 
2254  Definition &operator~() {
2255  ignoreSemanticValue = true;
2256  return *this;
2257  }
2258 
2259  void accept(Ope::Visitor &v) { holder_->accept(v); }
2260 
2261  std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2262 
2263  bool is_token() const {
2264  std::call_once(is_token_init_, [this]() {
2265  is_token_ = TokenChecker::is_token(*get_core_operator());
2266  });
2267  return is_token_;
2268  }
2269 
2270  std::string name;
2271  const char *s_ = nullptr;
2272 
2273  size_t id = 0;
2274  Action action;
2275  std::function<void(const char *s, size_t n, std::any &dt)> enter;
2276  std::function<void(const char *s, size_t n, size_t matchlen, std::any &value,
2277  std::any &dt)>
2278  leave;
2279  bool ignoreSemanticValue = false;
2280  std::shared_ptr<Ope> whitespaceOpe;
2281  std::shared_ptr<Ope> wordOpe;
2282  bool enablePackratParsing = false;
2283  bool is_macro = false;
2284  std::vector<std::string> params;
2285  TracerEnter tracer_enter;
2286  TracerLeave tracer_leave;
2287  bool disable_action = false;
2288 
2289  std::string error_message;
2290  bool no_ast_opt = false;
2291 
2292 private:
2293  friend class Reference;
2294  friend class ParserGenerator;
2295 
2296  Definition &operator=(const Definition &rhs);
2297  Definition &operator=(Definition &&rhs);
2298 
2299  void initialize_definition_ids() const {
2300  std::call_once(definition_ids_init_, [&]() {
2301  AssignIDToDefinition vis;
2302  holder_->accept(vis);
2303  if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2304  if (wordOpe) { wordOpe->accept(vis); }
2305  definition_ids_.swap(vis.ids);
2306  });
2307  }
2308 
2309  Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2310  const char *path, Log log) const {
2311  initialize_definition_ids();
2312 
2313  std::shared_ptr<Ope> ope = holder_;
2314  if (whitespaceOpe) { ope = std::make_shared<Sequence>(whitespaceOpe, ope); }
2315 
2316  Context cxt(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2317  enablePackratParsing, tracer_enter, tracer_leave, log);
2318 
2319  auto len = ope->parse(s, n, vs, cxt, dt);
2320  return Result{success(len), cxt.recovered, len, cxt.error_info};
2321  }
2322 
2323  std::shared_ptr<Holder> holder_;
2324  mutable std::once_flag is_token_init_;
2325  mutable bool is_token_ = false;
2326  mutable std::once_flag assign_id_to_definition_init_;
2327  mutable std::once_flag definition_ids_init_;
2328  mutable std::unordered_map<void *, size_t> definition_ids_;
2329 };
2330 
2331 /*
2332  * Implementations
2333  */
2334 
2335 inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2336  Context &c, std::any &dt, const std::string &lit,
2337  std::once_flag &init_is_word, bool &is_word,
2338  bool ignore_case) {
2339  size_t i = 0;
2340  for (; i < lit.size(); i++) {
2341  if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i]))
2342  : (s[i] != lit[i]))) {
2343  c.set_error_pos(s, lit.c_str());
2344  return static_cast<size_t>(-1);
2345  }
2346  }
2347 
2348  // Word check
2349  SemanticValues dummy_vs;
2350  Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2351  nullptr, nullptr);
2352  std::any dummy_dt;
2353 
2354  std::call_once(init_is_word, [&]() {
2355  if (c.wordOpe) {
2356  auto len =
2357  c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2358  is_word = success(len);
2359  }
2360  });
2361 
2362  if (is_word) {
2363  NotPredicate ope(c.wordOpe);
2364  auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2365  if (fail(len)) { return len; }
2366  i += len;
2367  }
2368 
2369  // Skip whiltespace
2370  if (!c.in_token_boundary_count) {
2371  if (c.whitespaceOpe) {
2372  auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2373  if (fail(len)) { return len; }
2374  i += len;
2375  }
2376  }
2377 
2378  return i;
2379 }
2380 
2381 inline void Context::set_error_pos(const char *a_s, const char *literal) {
2382  if (log) {
2383  if (error_info.error_pos <= a_s) {
2384  if (error_info.error_pos < a_s) {
2385  error_info.error_pos = a_s;
2386  error_info.expected_tokens.clear();
2387  }
2388  if (literal) {
2389  error_info.add(literal, true);
2390  } else if (!rule_stack.empty()) {
2391  auto rule = rule_stack.back();
2392  auto ope = rule->get_core_operator();
2393  if (auto token = FindLiteralToken::token(*ope);
2394  token && token[0] != '\0') {
2395  error_info.add(token, true);
2396  } else {
2397  error_info.add(rule->name.c_str(), false);
2398  }
2399  }
2400  }
2401  }
2402 }
2403 
2404 inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
2405  SemanticValues &vs, std::any &dt) const {
2406  trace_ids.push_back(next_trace_id++);
2407  tracer_enter(ope, a_s, n, vs, *this, dt);
2408 }
2409 
2410 inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
2411  SemanticValues &vs, std::any &dt,
2412  size_t len) const {
2413  tracer_leave(ope, a_s, n, vs, *this, dt, len);
2414  trace_ids.pop_back();
2415 }
2416 
2417 inline bool Context::is_traceable(const Ope &ope) const {
2418  if (tracer_enter && tracer_leave) {
2419  return !IsReference::check(const_cast<Ope &>(ope));
2420  }
2421  return false;
2422 }
2423 
2424 inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
2425  Context &c, std::any &dt) const {
2426  if (c.is_traceable(*this)) {
2427  c.trace_enter(*this, s, n, vs, dt);
2428  auto len = parse_core(s, n, vs, c, dt);
2429  c.trace_leave(*this, s, n, vs, dt, len);
2430  return len;
2431  }
2432  return parse_core(s, n, vs, c, dt);
2433 }
2434 
2435 inline size_t Dictionary::parse_core(const char *s, size_t n,
2436  SemanticValues & /*vs*/, Context &c,
2437  std::any & /*dt*/) const {
2438  auto len = trie_.match(s, n);
2439  if (len > 0) { return len; }
2440  c.set_error_pos(s);
2441  return static_cast<size_t>(-1);
2442 }
2443 
2444 inline size_t LiteralString::parse_core(const char *s, size_t n,
2445  SemanticValues &vs, Context &c,
2446  std::any &dt) const {
2447  return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
2448  ignore_case_);
2449 }
2450 
2451 inline size_t TokenBoundary::parse_core(const char *s, size_t n,
2452  SemanticValues &vs, Context &c,
2453  std::any &dt) const {
2454  size_t len;
2455  {
2456  c.in_token_boundary_count++;
2457  auto se = scope_exit([&]() { c.in_token_boundary_count--; });
2458  len = ope_->parse(s, n, vs, c, dt);
2459  }
2460 
2461  if (success(len)) {
2462  vs.tokens.emplace_back(std::string_view(s, len));
2463 
2464  if (!c.in_token_boundary_count) {
2465  if (c.whitespaceOpe) {
2466  auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt);
2467  if (fail(l)) { return l; }
2468  len += l;
2469  }
2470  }
2471  }
2472  return len;
2473 }
2474 
2475 inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
2476  Context &c, std::any &dt) const {
2477  if (!ope_) {
2478  throw std::logic_error("Uninitialized definition ope was used...");
2479  }
2480 
2481  // Macro reference
2482  if (outer_->is_macro) {
2483  c.rule_stack.push_back(outer_);
2484  auto len = ope_->parse(s, n, vs, c, dt);
2485  c.rule_stack.pop_back();
2486  return len;
2487  }
2488 
2489  size_t len;
2490  std::any val;
2491 
2492  c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
2493  if (outer_->enter) { outer_->enter(s, n, dt); }
2494 
2495  auto se2 = scope_exit([&]() {
2496  c.pop();
2497  if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
2498  });
2499 
2500  auto &chldsv = c.push();
2501 
2502  c.rule_stack.push_back(outer_);
2503  len = ope_->parse(s, n, chldsv, c, dt);
2504  c.rule_stack.pop_back();
2505 
2506  // Invoke action
2507  if (success(len)) {
2508  chldsv.sv_ = std::string_view(s, len);
2509  chldsv.name_ = outer_->name;
2510 
2511  if (!IsPrioritizedChoice::check(*ope_)) {
2512  chldsv.choice_count_ = 0;
2513  chldsv.choice_ = 0;
2514  }
2515 
2516  try {
2517  a_val = reduce(chldsv, dt);
2518  } catch (const parse_error &e) {
2519  if (e.what()) {
2520  if (c.error_info.message_pos < s) {
2521  c.error_info.message_pos = s;
2522  c.error_info.message = e.what();
2523  }
2524  }
2525  len = static_cast<size_t>(-1);
2526  }
2527  }
2528  });
2529 
2530  if (success(len)) {
2531  if (!outer_->ignoreSemanticValue) {
2532  vs.emplace_back(std::move(val));
2533  vs.tags.emplace_back(str2tag(outer_->name));
2534  }
2535  }
2536 
2537  return len;
2538 }
2539 
2540 inline std::any Holder::reduce(SemanticValues &vs, std::any &dt) const {
2541  if (outer_->action && !outer_->disable_action) {
2542  return outer_->action(vs, dt);
2543  } else if (vs.empty()) {
2544  return std::any();
2545  } else {
2546  return std::move(vs.front());
2547  }
2548 }
2549 
2550 inline const char *Holder::trace_name() const {
2551  if (trace_name_.empty()) { trace_name_ = "[" + outer_->name + "]"; }
2552  return trace_name_.data();
2553 }
2554 
2555 inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
2556  Context &c, std::any &dt) const {
2557  if (rule_) {
2558  // Reference rule
2559  if (rule_->is_macro) {
2560  // Macro
2561  FindReference vis(c.top_args(), c.rule_stack.back()->params);
2562 
2563  // Collect arguments
2564  std::vector<std::shared_ptr<Ope>> args;
2565  for (auto arg : args_) {
2566  arg->accept(vis);
2567  args.emplace_back(std::move(vis.found_ope));
2568  }
2569 
2570  c.push_args(std::move(args));
2571  auto se = scope_exit([&]() { c.pop_args(); });
2572  auto ope = get_core_operator();
2573  return ope->parse(s, n, vs, c, dt);
2574  } else {
2575  // Definition
2576  c.push_args(std::vector<std::shared_ptr<Ope>>());
2577  auto se = scope_exit([&]() { c.pop_args(); });
2578  auto ope = get_core_operator();
2579  return ope->parse(s, n, vs, c, dt);
2580  }
2581  } else {
2582  // Reference parameter in macro
2583  const auto &args = c.top_args();
2584  return args[iarg_]->parse(s, n, vs, c, dt);
2585  }
2586 }
2587 
2588 inline std::shared_ptr<Ope> Reference::get_core_operator() const {
2589  return rule_->holder_;
2590 }
2591 
2592 inline size_t BackReference::parse_core(const char *s, size_t n,
2593  SemanticValues &vs, Context &c,
2594  std::any &dt) const {
2595  auto size = static_cast<int>(c.capture_scope_stack_size);
2596  for (auto i = size - 1; i >= 0; i--) {
2597  auto index = static_cast<size_t>(i);
2598  const auto &cs = c.capture_scope_stack[index];
2599  if (cs.find(name_) != cs.end()) {
2600  const auto &lit = cs.at(name_);
2601  std::once_flag init_is_word;
2602  auto is_word = false;
2603  return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false);
2604  }
2605  }
2606  throw std::runtime_error("Invalid back reference...");
2607 }
2608 
2609 inline Definition &
2610 PrecedenceClimbing::get_reference_for_binop(Context &c) const {
2611  if (rule_.is_macro) {
2612  // Reference parameter in macro
2613  const auto &args = c.top_args();
2614  auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
2615  auto arg = args[iarg];
2616  return *dynamic_cast<Reference &>(*arg).rule_;
2617  }
2618 
2619  return *dynamic_cast<Reference &>(*binop_).rule_;
2620 }
2621 
2622 inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
2623  SemanticValues &vs,
2624  Context &c, std::any &dt,
2625  size_t min_prec) const {
2626  auto len = atom_->parse(s, n, vs, c, dt);
2627  if (fail(len)) { return len; }
2628 
2629  std::string tok;
2630  auto &rule = get_reference_for_binop(c);
2631  auto action = std::move(rule.action);
2632 
2633  rule.action = [&](SemanticValues &vs2, std::any &dt2) {
2634  tok = vs2.token();
2635  if (action) {
2636  return action(vs2, dt2);
2637  } else if (!vs2.empty()) {
2638  return vs2[0];
2639  }
2640  return std::any();
2641  };
2642  auto action_se = scope_exit([&]() { rule.action = std::move(action); });
2643 
2644  auto i = len;
2645  while (i < n) {
2646  std::vector<std::any> save_values(vs.begin(), vs.end());
2647  auto save_tokens = vs.tokens;
2648 
2649  auto chv = c.push();
2650  auto chl = binop_->parse(s + i, n - i, chv, c, dt);
2651  c.pop();
2652 
2653  if (fail(chl)) { break; }
2654 
2655  auto it = info_.find(tok);
2656  if (it == info_.end()) { break; }
2657 
2658  auto level = std::get<0>(it->second);
2659  auto assoc = std::get<1>(it->second);
2660 
2661  if (level < min_prec) { break; }
2662 
2663  vs.emplace_back(std::move(chv[0]));
2664  i += chl;
2665 
2666  auto next_min_prec = level;
2667  if (assoc == 'L') { next_min_prec = level + 1; }
2668 
2669  chv = c.push();
2670  chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec);
2671  c.pop();
2672 
2673  if (fail(chl)) {
2674  vs.assign(save_values.begin(), save_values.end());
2675  vs.tokens = save_tokens;
2676  break;
2677  }
2678 
2679  vs.emplace_back(std::move(chv[0]));
2680  i += chl;
2681 
2682  std::any val;
2683  if (rule_.action) {
2684  vs.sv_ = std::string_view(s, i);
2685  val = rule_.action(vs, dt);
2686  } else if (!vs.empty()) {
2687  val = vs[0];
2688  }
2689  vs.clear();
2690  vs.emplace_back(std::move(val));
2691  }
2692 
2693  return i;
2694 }
2695 
2696 inline size_t Recovery::parse_core(const char *s, size_t n,
2697  SemanticValues & /*vs*/, Context &c,
2698  std::any & /*dt*/) const {
2699  auto save_log = c.log;
2700  c.log = nullptr;
2701 
2702  const auto &rule = dynamic_cast<Reference &>(*ope_);
2703 
2704  SemanticValues dummy_vs;
2705  std::any dummy_dt;
2706  auto len = rule.parse(s, n, dummy_vs, c, dummy_dt);
2707 
2708  c.log = save_log;
2709 
2710  if (success(len)) {
2711  c.recovered = true;
2712  if (c.log) {
2713  auto label = dynamic_cast<Reference *>(rule.args_[0].get());
2714  if (label) {
2715  if (!label->rule_->error_message.empty()) {
2716  c.error_info.message_pos = c.error_info.error_pos;
2717  c.error_info.message = label->rule_->error_message;
2718  }
2719  }
2720  c.error_info.output_log(c.log, c.s, c.l);
2721  }
2722  }
2723  c.error_info.clear();
2724 
2725  return len;
2726 }
2727 
2728 inline void Sequence::accept(Visitor &v) { v.visit(*this); }
2729 inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
2730 inline void Repetition::accept(Visitor &v) { v.visit(*this); }
2731 inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
2732 inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
2733 inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
2734 inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
2735 inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
2736 inline void Character::accept(Visitor &v) { v.visit(*this); }
2737 inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
2738 inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
2739 inline void Capture::accept(Visitor &v) { v.visit(*this); }
2740 inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
2741 inline void Ignore::accept(Visitor &v) { v.visit(*this); }
2742 inline void User::accept(Visitor &v) { v.visit(*this); }
2743 inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
2744 inline void Holder::accept(Visitor &v) { v.visit(*this); }
2745 inline void Reference::accept(Visitor &v) { v.visit(*this); }
2746 inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
2747 inline void BackReference::accept(Visitor &v) { v.visit(*this); }
2748 inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
2749 inline void Recovery::accept(Visitor &v) { v.visit(*this); }
2750 
2751 inline void AssignIDToDefinition::visit(Holder &ope) {
2752  auto p = static_cast<void *>(ope.outer_);
2753  if (ids.count(p)) { return; }
2754  auto id = ids.size();
2755  ids[p] = id;
2756  ope.outer_->id = id;
2757  ope.ope_->accept(*this);
2758 }
2759 
2760 inline void AssignIDToDefinition::visit(Reference &ope) {
2761  if (ope.rule_) {
2762  for (auto arg : ope.args_) {
2763  arg->accept(*this);
2764  }
2765  ope.rule_->accept(*this);
2766  }
2767 }
2768 
2769 inline void AssignIDToDefinition::visit(PrecedenceClimbing &ope) {
2770  ope.atom_->accept(*this);
2771  ope.binop_->accept(*this);
2772 }
2773 
2774 inline void TokenChecker::visit(Reference &ope) {
2775  if (ope.is_macro_) {
2776  for (auto arg : ope.args_) {
2777  arg->accept(*this);
2778  }
2779  } else {
2780  has_rule_ = true;
2781  }
2782 }
2783 
2784 inline void FindLiteralToken::visit(Reference &ope) {
2785  if (ope.is_macro_) {
2786  ope.rule_->accept(*this);
2787  for (auto arg : ope.args_) {
2788  arg->accept(*this);
2789  }
2790  }
2791 }
2792 
2793 inline void DetectLeftRecursion::visit(Reference &ope) {
2794  if (ope.name_ == name_) {
2795  error_s = ope.s_;
2796  } else if (!refs_.count(ope.name_)) {
2797  refs_.insert(ope.name_);
2798  if (ope.rule_) {
2799  ope.rule_->accept(*this);
2800  if (done_ == false) { return; }
2801  }
2802  }
2803  done_ = true;
2804 }
2805 
2806 inline void HasEmptyElement::visit(Reference &ope) {
2807  auto it = std::find_if(refs_.begin(), refs_.end(),
2808  [&](const std::pair<const char *, std::string> &ref) {
2809  return ope.name_ == ref.second;
2810  });
2811  if (it != refs_.end()) { return; }
2812 
2813  if (ope.rule_) {
2814  refs_.emplace_back(ope.s_, ope.name_);
2815  ope.rule_->accept(*this);
2816  refs_.pop_back();
2817  }
2818 }
2819 
2820 inline void DetectInfiniteLoop::visit(Reference &ope) {
2821  auto it = std::find_if(refs_.begin(), refs_.end(),
2822  [&](const std::pair<const char *, std::string> &ref) {
2823  return ope.name_ == ref.second;
2824  });
2825  if (it != refs_.end()) { return; }
2826 
2827  if (ope.rule_) {
2828  refs_.emplace_back(ope.s_, ope.name_);
2829  ope.rule_->accept(*this);
2830  refs_.pop_back();
2831  }
2832 }
2833 
2834 inline void ReferenceChecker::visit(Reference &ope) {
2835  auto it = std::find(params_.begin(), params_.end(), ope.name_);
2836  if (it != params_.end()) { return; }
2837 
2838  if (!grammar_.count(ope.name_)) {
2839  error_s[ope.name_] = ope.s_;
2840  error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
2841  } else {
2842  const auto &rule = grammar_.at(ope.name_);
2843  if (rule.is_macro) {
2844  if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
2845  error_s[ope.name_] = ope.s_;
2846  error_message[ope.name_] = "incorrect number of arguments.";
2847  }
2848  } else if (ope.is_macro_) {
2849  error_s[ope.name_] = ope.s_;
2850  error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
2851  }
2852  for (auto arg : ope.args_) {
2853  arg->accept(*this);
2854  }
2855  }
2856 }
2857 
2858 inline void LinkReferences::visit(Reference &ope) {
2859  // Check if the reference is a macro parameter
2860  auto found_param = false;
2861  for (size_t i = 0; i < params_.size(); i++) {
2862  const auto &param = params_[i];
2863  if (param == ope.name_) {
2864  ope.iarg_ = i;
2865  found_param = true;
2866  break;
2867  }
2868  }
2869 
2870  // Check if the reference is a definition rule
2871  if (!found_param && grammar_.count(ope.name_)) {
2872  auto &rule = grammar_.at(ope.name_);
2873  ope.rule_ = &rule;
2874  }
2875 
2876  for (auto arg : ope.args_) {
2877  arg->accept(*this);
2878  }
2879 }
2880 
2881 inline void FindReference::visit(Reference &ope) {
2882  for (size_t i = 0; i < args_.size(); i++) {
2883  const auto &name = params_[i];
2884  if (name == ope.name_) {
2885  found_ope = args_[i];
2886  return;
2887  }
2888  }
2889  found_ope = ope.shared_from_this();
2890 }
2891 
2892 /*-----------------------------------------------------------------------------
2893  * PEG parser generator
2894  *---------------------------------------------------------------------------*/
2895 
2896 using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
2897 
2898 class ParserGenerator {
2899 public:
2900  static std::shared_ptr<Grammar> parse(const char *s, size_t n,
2901  const Rules &rules, std::string &start,
2902  Log log) {
2903  return get_instance().perform_core(s, n, rules, start, log);
2904  }
2905 
2906  static std::shared_ptr<Grammar> parse(const char *s, size_t n,
2907  std::string &start, Log log) {
2908  Rules dummy;
2909  return parse(s, n, dummy, start, log);
2910  }
2911 
2912  // For debuging purpose
2913  static Grammar &grammar() { return get_instance().g; }
2914 
2915 private:
2916  static ParserGenerator &get_instance() {
2917  static ParserGenerator instance;
2918  return instance;
2919  }
2920 
2921  ParserGenerator() {
2922  make_grammar();
2923  setup_actions();
2924  }
2925 
2926  struct Instruction {
2927  std::string type;
2928  std::any data;
2929  };
2930 
2931  struct Data {
2932  std::shared_ptr<Grammar> grammar;
2933  std::string start;
2934  const char *start_pos = nullptr;
2935  std::vector<std::pair<std::string, const char *>> duplicates;
2936  std::map<std::string, Instruction> instructions;
2937 
2938  Data() : grammar(std::make_shared<Grammar>()) {}
2939  };
2940 
2941  void make_grammar() {
2942  // Setup PEG syntax parser
2943  g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
2944  g["Definition"] <=
2945  cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
2946  g["Expression"], opt(g["Instruction"])),
2947  seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
2948  opt(g["Instruction"])));
2949  g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
2950  g["Sequence"] <= zom(g["Prefix"]);
2951  g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
2952  g["SuffixWithLabel"] <=
2953  seq(g["Suffix"], opt(seq(g["HAT"], g["Identifier"])));
2954  g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
2955  g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
2956  g["Primary"] <=
2957  cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
2958  npd(g["LEFTARROW"])),
2959  seq(g["Ignore"], g["Identifier"],
2960  npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
2961  seq(g["OPEN"], g["Expression"], g["CLOSE"]),
2962  seq(g["BeginTok"], g["Expression"], g["EndTok"]),
2963  seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
2964  seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
2965  g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClass"],
2966  g["Class"], g["DOT"]);
2967 
2968  g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
2969  g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
2970 
2971  const static std::vector<std::pair<char32_t, char32_t>> range = {
2972  {0x0080, 0xFFFF}};
2973  g["IdentStart"] <= cho(cls("a-zA-Z_%"), cls(range));
2974 
2975  g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
2976 
2977  g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
2978 
2979  auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
2980  cls("'"), g["Spacing"]),
2981  seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
2982  cls("\""), g["Spacing"]));
2983  g["Literal"] <= lit_ope;
2984  g["LiteralD"] <= lit_ope;
2985 
2986  g["LiteralI"] <=
2987  cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
2988  g["Spacing"]),
2989  seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
2990  g["Spacing"]));
2991 
2992  // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
2993  g["Class"] <= seq(chr('['), npd(chr('^')),
2994  tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
2995  g["Spacing"]);
2996  g["NegatedClass"] <= seq(lit("[^"),
2997  tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
2998  g["Spacing"]);
2999 
3000  g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
3001  g["Char"] <=
3002  cho(seq(chr('\\'), cls("nrt'\"[]\\^")),
3003  seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
3004  seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
3005  seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
3006  seq(lit("\\u"),
3007  cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
3008  rep(cls("0-9a-fA-F"), 4, 4)),
3009  rep(cls("0-9a-fA-F"), 4, 5))),
3010  seq(npd(chr('\\')), dot()));
3011 
3012  g["Repetition"] <=
3013  seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]);
3014  g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
3015  seq(g["Number"], g["COMMA"]), g["Number"],
3016  seq(g["COMMA"], g["Number"]));
3017  g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
3018 
3019  g["LEFTARROW"] <=
3020  seq(cho(lit("<-"), lit(reinterpret_cast<const char *>(u8"←"))),
3021  g["Spacing"]);
3022  ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
3023  ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
3024  g["AND"] <= seq(chr('&'), g["Spacing"]);
3025  g["NOT"] <= seq(chr('!'), g["Spacing"]);
3026  ~g["HAT"] <= seq(chr('^'), g["Spacing"]);
3027  g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
3028  g["STAR"] <= seq(chr('*'), g["Spacing"]);
3029  g["PLUS"] <= seq(chr('+'), g["Spacing"]);
3030  ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
3031  ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
3032  g["DOT"] <= seq(chr('.'), g["Spacing"]);
3033 
3034  ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
3035  g["Comment"] <=
3036  seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
3037  g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
3038  g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
3039  g["EndOfFile"] <= npd(dot());
3040 
3041  ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
3042  ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
3043 
3044  ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
3045  ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
3046 
3047  g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
3048  ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
3049 
3050  g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
3051 
3052  g["IGNORE"] <= chr('~');
3053 
3054  g["Ignore"] <= opt(g["IGNORE"]);
3055  g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
3056  zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
3057  g["Arguments"] <= seq(g["OPEN"], g["Expression"],
3058  zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
3059  ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
3060 
3061  // Instruction grammars
3062  g["Instruction"] <= seq(g["BeginBlacket"],
3063  cho(cho(g["PrecedenceClimbing"]),
3064  cho(g["ErrorMessage"]), cho(g["NoAstOpt"])),
3065  g["EndBlacket"]);
3066 
3067  ~g["SpacesZom"] <= zom(g["Space"]);
3068  ~g["SpacesOom"] <= oom(g["Space"]);
3069  ~g["BeginBlacket"] <= seq(chr('{'), g["Spacing"]);
3070  ~g["EndBlacket"] <= seq(chr('}'), g["Spacing"]);
3071 
3072  // PrecedenceClimbing instruction
3073  g["PrecedenceClimbing"] <=
3074  seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
3075  zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
3076  g["PrecedenceInfo"] <=
3077  seq(g["PrecedenceAssoc"],
3078  oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
3079  g["PrecedenceOpe"] <=
3080  tok(oom(
3081  seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))), dot())));
3082  g["PrecedenceAssoc"] <= cls("LR");
3083 
3084  // Error message instruction
3085  g["ErrorMessage"] <=
3086  seq(lit("message"), g["SpacesOom"], g["LiteralD"], g["SpacesZom"]);
3087 
3088  // No Ast node optimazation instruction
3089  g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
3090 
3091  // Set definition names
3092  for (auto &x : g) {
3093  x.second.name = x.first;
3094  }
3095  }
3096 
3097  void setup_actions() {
3098  g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
3099  auto &data = *std::any_cast<Data *>(dt);
3100 
3101  auto is_macro = vs.choice() == 0;
3102  auto ignore = std::any_cast<bool>(vs[0]);
3103  auto name = std::any_cast<std::string>(vs[1]);
3104 
3105  std::vector<std::string> params;
3106  std::shared_ptr<Ope> ope;
3107  if (is_macro) {
3108  params = std::any_cast<std::vector<std::string>>(vs[2]);
3109  ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
3110  if (vs.size() == 6) {
3111  data.instructions[name] = std::any_cast<Instruction>(vs[5]);
3112  }
3113  } else {
3114  ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
3115  if (vs.size() == 5) {
3116  data.instructions[name] = std::any_cast<Instruction>(vs[4]);
3117  }
3118  }
3119 
3120  auto &grammar = *data.grammar;
3121  if (!grammar.count(name)) {
3122  auto &rule = grammar[name];
3123  rule <= ope;
3124  rule.name = name;
3125  rule.s_ = vs.sv().data();
3126  rule.ignoreSemanticValue = ignore;
3127  rule.is_macro = is_macro;
3128  rule.params = params;
3129 
3130  if (data.start.empty()) {
3131  data.start = name;
3132  data.start_pos = vs.sv().data();
3133  }
3134  } else {
3135  data.duplicates.emplace_back(name, vs.sv().data());
3136  }
3137  };
3138 
3139  g["Expression"] = [&](const SemanticValues &vs) {
3140  if (vs.size() == 1) {
3141  return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3142  } else {
3143  std::vector<std::shared_ptr<Ope>> opes;
3144  for (auto i = 0u; i < vs.size(); i++) {
3145  opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
3146  }
3147  const std::shared_ptr<Ope> ope =
3148  std::make_shared<PrioritizedChoice>(opes);
3149  return ope;
3150  }
3151  };
3152 
3153  g["Sequence"] = [&](const SemanticValues &vs) {
3154  if (vs.empty()) {
3155  return npd(lit(""));
3156  } else if (vs.size() == 1) {
3157  return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3158  } else {
3159  std::vector<std::shared_ptr<Ope>> opes;
3160  for (const auto &x : vs) {
3161  opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
3162  }
3163  const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
3164  return ope;
3165  }
3166  };
3167 
3168  g["Prefix"] = [&](const SemanticValues &vs) {
3169  std::shared_ptr<Ope> ope;
3170  if (vs.size() == 1) {
3171  ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3172  } else {
3173  assert(vs.size() == 2);
3174  auto tok = std::any_cast<char>(vs[0]);
3175  ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3176  if (tok == '&') {
3177  ope = apd(ope);
3178  } else { // '!'
3179  ope = npd(ope);
3180  }
3181  }
3182  return ope;
3183  };
3184 
3185  g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
3186  auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3187  if (vs.size() == 1) {
3188  return ope;
3189  } else {
3190  assert(vs.size() == 2);
3191  auto &data = *std::any_cast<Data *>(dt);
3192  const auto &ident = std::any_cast<std::string>(vs[1]);
3193  auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
3194  auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
3195  vs.sv().data(), true, {label}));
3196  return cho(ope, recovery);
3197  }
3198  };
3199 
3200  struct Loop {
3201  enum class Type { opt = 0, zom, oom, rep };
3202  Type type;
3203  std::pair<size_t, size_t> range;
3204  };
3205 
3206  g["Suffix"] = [&](const SemanticValues &vs) {
3207  auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3208  if (vs.size() == 1) {
3209  return ope;
3210  } else {
3211  assert(vs.size() == 2);
3212  auto loop = std::any_cast<Loop>(vs[1]);
3213  switch (loop.type) {
3214  case Loop::Type::opt: return opt(ope);
3215  case Loop::Type::zom: return zom(ope);
3216  case Loop::Type::oom: return oom(ope);
3217  default: // Regex-like repetition
3218  return rep(ope, loop.range.first, loop.range.second);
3219  }
3220  }
3221  };
3222 
3223  g["Loop"] = [&](const SemanticValues &vs) {
3224  switch (vs.choice()) {
3225  case 0: // Option
3226  return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
3227  case 1: // Zero or More
3228  return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
3229  case 2: // One or More
3230  return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
3231  default: // Regex-like repetition
3232  return Loop{Loop::Type::rep,
3233  std::any_cast<std::pair<size_t, size_t>>(vs[0])};
3234  }
3235  };
3236 
3237  g["RepetitionRange"] = [&](const SemanticValues &vs) {
3238  switch (vs.choice()) {
3239  case 0: { // Number COMMA Number
3240  auto min = std::any_cast<size_t>(vs[0]);
3241  auto max = std::any_cast<size_t>(vs[1]);
3242  return std::pair(min, max);
3243  }
3244  case 1: // Number COMMA
3245  return std::pair(std::any_cast<size_t>(vs[0]),
3246  std::numeric_limits<size_t>::max());
3247  case 2: { // Number
3248  auto n = std::any_cast<size_t>(vs[0]);
3249  return std::pair(n, n);
3250  }
3251  default: // COMMA Number
3252  return std::pair(std::numeric_limits<size_t>::min(),
3253  std::any_cast<size_t>(vs[0]));
3254  }
3255  };
3256  g["Number"] = [&](const SemanticValues &vs) {
3257  return vs.token_to_number<size_t>();
3258  };
3259 
3260  g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
3261  auto &data = *std::any_cast<Data *>(dt);
3262 
3263  switch (vs.choice()) {
3264  case 0: // Macro Reference
3265  case 1: { // Reference
3266  auto is_macro = vs.choice() == 0;
3267  auto ignore = std::any_cast<bool>(vs[0]);
3268  const auto &ident = std::any_cast<std::string>(vs[1]);
3269 
3270  std::vector<std::shared_ptr<Ope>> args;
3271  if (is_macro) {
3272  args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
3273  }
3274 
3275  auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
3276  if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
3277 
3278  if (ignore) {
3279  return ign(ope);
3280  } else {
3281  return ope;
3282  }
3283  }
3284  case 2: { // (Expression)
3285  return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3286  }
3287  case 3: { // TokenBoundary
3288  return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3289  }
3290  case 4: { // CaptureScope
3291  return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3292  }
3293  case 5: { // Capture
3294  const auto &name = std::any_cast<std::string_view>(vs[0]);
3295  auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3296  return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
3297  auto &cs = c.capture_scope_stack[c.capture_scope_stack_size - 1];
3298  cs[name] = std::string(a_s, a_n);
3299  });
3300  }
3301  default: {
3302  return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3303  }
3304  }
3305  };
3306 
3307  g["IdentCont"] = [](const SemanticValues &vs) {
3308  return std::string(vs.sv().data(), vs.sv().length());
3309  };
3310 
3311  g["Dictionary"] = [](const SemanticValues &vs) {
3312  auto items = vs.transform<std::string>();
3313  return dic(items);
3314  };
3315 
3316  g["Literal"] = [](const SemanticValues &vs) {
3317  const auto &tok = vs.tokens.front();
3318  return lit(resolve_escape_sequence(tok.data(), tok.size()));
3319  };
3320  g["LiteralI"] = [](const SemanticValues &vs) {
3321  const auto &tok = vs.tokens.front();
3322  return liti(resolve_escape_sequence(tok.data(), tok.size()));
3323  };
3324  g["LiteralD"] = [](const SemanticValues &vs) {
3325  auto &tok = vs.tokens.front();
3326  return resolve_escape_sequence(tok.data(), tok.size());
3327  };
3328 
3329  g["Class"] = [](const SemanticValues &vs) {
3330  auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3331  return cls(ranges);
3332  };
3333  g["NegatedClass"] = [](const SemanticValues &vs) {
3334  auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3335  return ncls(ranges);
3336  };
3337  g["Range"] = [](const SemanticValues &vs) {
3338  switch (vs.choice()) {
3339  case 0: {
3340  auto s1 = std::any_cast<std::string>(vs[0]);
3341  auto s2 = std::any_cast<std::string>(vs[1]);
3342  auto cp1 = decode_codepoint(s1.data(), s1.length());
3343  auto cp2 = decode_codepoint(s2.data(), s2.length());
3344  return std::pair(cp1, cp2);
3345  }
3346  case 1: {
3347  auto s = std::any_cast<std::string>(vs[0]);
3348  auto cp = decode_codepoint(s.data(), s.length());
3349  return std::pair(cp, cp);
3350  }
3351  }
3352  return std::pair<char32_t, char32_t>(0, 0);
3353  };
3354  g["Char"] = [](const SemanticValues &vs) {
3355  return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
3356  };
3357 
3358  g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3359  g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3360  g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3361  g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3362  g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3363 
3364  g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
3365 
3366  g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
3367 
3368  g["BackRef"] = [&](const SemanticValues &vs) {
3369  return bkr(vs.token_to_string());
3370  };
3371 
3372  g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
3373 
3374  g["Parameters"] = [](const SemanticValues &vs) {
3375  return vs.transform<std::string>();
3376  };
3377 
3378  g["Arguments"] = [](const SemanticValues &vs) {
3379  return vs.transform<std::shared_ptr<Ope>>();
3380  };
3381 
3382  g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
3383  PrecedenceClimbing::BinOpeInfo binOpeInfo;
3384  size_t level = 1;
3385  for (auto v : vs) {
3386  auto tokens = std::any_cast<std::vector<std::string_view>>(v);
3387  auto assoc = tokens[0][0];
3388  for (size_t i = 1; i < tokens.size(); i++) {
3389  binOpeInfo[tokens[i]] = std::pair(level, assoc);
3390  }
3391  level++;
3392  }
3393  Instruction instruction;
3394  instruction.type = "precedence";
3395  instruction.data = binOpeInfo;
3396  return instruction;
3397  };
3398  g["PrecedenceInfo"] = [](const SemanticValues &vs) {
3399  return vs.transform<std::string_view>();
3400  };
3401  g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
3402  g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
3403 
3404  g["ErrorMessage"] = [](const SemanticValues &vs) {
3405  Instruction instruction;
3406  instruction.type = "message";
3407  instruction.data = std::any_cast<std::string>(vs[0]);
3408  return instruction;
3409  };
3410 
3411  g["NoAstOpt"] = [](const SemanticValues & /*vs*/) {
3412  Instruction instruction;
3413  instruction.type = "no_ast_opt";
3414  return instruction;
3415  };
3416  }
3417 
3418  bool apply_precedence_instruction(Definition &rule,
3419  const PrecedenceClimbing::BinOpeInfo &info,
3420  const char *s, Log log) {
3421  try {
3422  auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
3423  auto atom = seq.opes_[0];
3424  auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
3425  auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
3426  auto binop = seq1.opes_[0];
3427  auto atom1 = seq1.opes_[1];
3428 
3429  auto atom_name = dynamic_cast<Reference &>(*atom).name_;
3430  auto binop_name = dynamic_cast<Reference &>(*binop).name_;
3431  auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
3432 
3433  if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
3434  if (log) {
3435  auto line = line_info(s, rule.s_);
3436  log(line.first, line.second,
3437  "'precedence' instruction cannot be applied to '" + rule.name +
3438  "'.");
3439  }
3440  return false;
3441  }
3442 
3443  rule.holder_->ope_ = pre(atom, binop, info, rule);
3444  rule.disable_action = true;
3445  } catch (...) {
3446  if (log) {
3447  auto line = line_info(s, rule.s_);
3448  log(line.first, line.second,
3449  "'precedence' instruction cannot be applied to '" + rule.name +
3450  "'.");
3451  }
3452  return false;
3453  }
3454  return true;
3455  }
3456 
3457  std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
3458  const Rules &rules, std::string &start,
3459  Log log) {
3460  Data data;
3461  auto &grammar = *data.grammar;
3462 
3463  // Built-in macros
3464  {
3465  // `%recover`
3466  {
3467  auto &rule = grammar[RECOVER_DEFINITION_NAME];
3468  rule <= ref(grammar, "x", "", false, {});
3469  rule.name = RECOVER_DEFINITION_NAME;
3470  rule.s_ = "[native]";
3471  rule.ignoreSemanticValue = true;
3472  rule.is_macro = true;
3473  rule.params = {"x"};
3474  }
3475  }
3476 
3477  std::any dt = &data;
3478  auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
3479 
3480  if (!r.ret) {
3481  if (log) {
3482  if (r.error_info.message_pos) {
3483  auto line = line_info(s, r.error_info.message_pos);
3484  log(line.first, line.second, r.error_info.message);
3485  } else {
3486  auto line = line_info(s, r.error_info.error_pos);
3487  log(line.first, line.second, "syntax error");
3488  }
3489  }
3490  return nullptr;
3491  }
3492 
3493  // User provided rules
3494  for (const auto &x : rules) {
3495  auto name = x.first;
3496  auto ignore = false;
3497  if (!name.empty() && name[0] == '~') {
3498  ignore = true;
3499  name.erase(0, 1);
3500  }
3501  if (!name.empty()) {
3502  auto &rule = grammar[name];
3503  rule <= x.second;
3504  rule.name = name;
3505  rule.ignoreSemanticValue = ignore;
3506  }
3507  }
3508 
3509  // Check duplicated definitions
3510  auto ret = data.duplicates.empty();
3511 
3512  for (const auto &x : data.duplicates) {
3513  if (log) {
3514  const auto &name = x.first;
3515  auto ptr = x.second;
3516  auto line = line_info(s, ptr);
3517  log(line.first, line.second, "'" + name + "' is already defined.");
3518  }
3519  }
3520 
3521  // Check if the start rule has ignore operator
3522  {
3523  auto &rule = grammar[data.start];
3524  if (rule.ignoreSemanticValue) {
3525  if (log) {
3526  auto line = line_info(s, rule.s_);
3527  log(line.first, line.second,
3528  "Ignore operator cannot be applied to '" + rule.name + "'.");
3529  }
3530  ret = false;
3531  }
3532  }
3533 
3534  if (!ret) { return nullptr; }
3535 
3536  // Check missing definitions
3537  for (auto &x : grammar) {
3538  auto &rule = x.second;
3539 
3540  ReferenceChecker vis(grammar, rule.params);
3541  rule.accept(vis);
3542  for (const auto &y : vis.error_s) {
3543  const auto &name = y.first;
3544  const auto ptr = y.second;
3545  if (log) {
3546  auto line = line_info(s, ptr);
3547  log(line.first, line.second, vis.error_message[name]);
3548  }
3549  ret = false;
3550  }
3551  }
3552 
3553  if (!ret) { return nullptr; }
3554 
3555  // Link references
3556  for (auto &x : grammar) {
3557  auto &rule = x.second;
3558  LinkReferences vis(grammar, rule.params);
3559  rule.accept(vis);
3560  }
3561 
3562  // Check left recursion
3563  ret = true;
3564 
3565  for (auto &x : grammar) {
3566  const auto &name = x.first;
3567  auto &rule = x.second;
3568 
3569  DetectLeftRecursion vis(name);
3570  rule.accept(vis);
3571  if (vis.error_s) {
3572  if (log) {
3573  auto line = line_info(s, vis.error_s);
3574  log(line.first, line.second, "'" + name + "' is left recursive.");
3575  }
3576  ret = false;
3577  }
3578  }
3579 
3580  if (!ret) { return nullptr; }
3581 
3582  // Set root definition
3583  auto &start_rule = grammar[data.start];
3584 
3585  // Check infinite loop
3586  for (auto &[name, rule] : grammar) {
3587  DetectInfiniteLoop vis(rule.s_, name);
3588  rule.accept(vis);
3589  if (vis.has_error) {
3590  if (log) {
3591  auto line = line_info(s, vis.error_s);
3592  log(line.first, line.second,
3593  "infinite loop is detected in '" + vis.error_name + "'.");
3594  }
3595  return nullptr;
3596  }
3597  }
3598 
3599  // Automatic whitespace skipping
3600  if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
3601  for (auto &x : grammar) {
3602  auto &rule = x.second;
3603  auto ope = rule.get_core_operator();
3604  if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
3605  }
3606 
3607  start_rule.whitespaceOpe =
3608  wsp(grammar[WHITESPACE_DEFINITION_NAME].get_core_operator());
3609  }
3610 
3611  // Word expression
3612  if (grammar.count(WORD_DEFINITION_NAME)) {
3613  start_rule.wordOpe = grammar[WORD_DEFINITION_NAME].get_core_operator();
3614  }
3615 
3616  // Apply instructions
3617  for (const auto &item : data.instructions) {
3618  const auto &name = item.first;
3619  const auto &instruction = item.second;
3620  auto &rule = grammar[name];
3621 
3622  if (instruction.type == "precedence") {
3623  const auto &info =
3624  std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
3625 
3626  if (!apply_precedence_instruction(rule, info, s, log)) {
3627  return nullptr;
3628  }
3629  } else if (instruction.type == "message") {
3630  rule.error_message = std::any_cast<std::string>(instruction.data);
3631  } else if (instruction.type == "no_ast_opt") {
3632  rule.no_ast_opt = true;
3633  }
3634  }
3635 
3636  // Set root definition
3637  start = data.start;
3638 
3639  return data.grammar;
3640  }
3641 
3642  Grammar g;
3643 };
3644 
3645 /*-----------------------------------------------------------------------------
3646  * AST
3647  *---------------------------------------------------------------------------*/
3648 
3649 template <typename Annotation> struct AstBase : public Annotation {
3650  AstBase(const char *path, size_t line, size_t column, const char *name,
3651  const std::vector<std::shared_ptr<AstBase>> &nodes,
3652  size_t position = 0, size_t length = 0, size_t choice_count = 0,
3653  size_t choice = 0)
3654  : path(path ? path : ""), line(line), column(column), name(name),
3655  position(position), length(length), choice_count(choice_count),
3656  choice(choice), original_name(name),
3657  original_choice_count(choice_count), original_choice(choice),
3658  tag(str2tag(name)), original_tag(tag), is_token(false), nodes(nodes) {}
3659 
3660  AstBase(const char *path, size_t line, size_t column, const char *name,
3661  const std::string_view &token, size_t position = 0, size_t length = 0,
3662  size_t choice_count = 0, size_t choice = 0)
3663  : path(path ? path : ""), line(line), column(column), name(name),
3664  position(position), length(length), choice_count(choice_count),
3665  choice(choice), original_name(name),
3666  original_choice_count(choice_count), original_choice(choice),
3667  tag(str2tag(name)), original_tag(tag), is_token(true), token(token) {}
3668 
3669  AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
3670  size_t length = 0, size_t original_choice_count = 0,
3671  size_t original_choise = 0)
3672  : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
3673  position(position), length(length), choice_count(ast.choice_count),
3674  choice(ast.choice), original_name(original_name),
3675  original_choice_count(original_choice_count),
3676  original_choice(original_choise), tag(ast.tag),
3677  original_tag(str2tag(original_name)), is_token(ast.is_token),
3678  token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
3679 
3680  const std::string path;
3681  const size_t line = 1;
3682  const size_t column = 1;
3683 
3684  const std::string name;
3685  size_t position;
3686  size_t length;
3687  const size_t choice_count;
3688  const size_t choice;
3689  const std::string original_name;
3690  const size_t original_choice_count;
3691  const size_t original_choice;
3692  const unsigned int tag;
3693  const unsigned int original_tag;
3694 
3695  const bool is_token;
3696  const std::string_view token;
3697 
3698  std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
3699  std::weak_ptr<AstBase<Annotation>> parent;
3700 
3701  std::string token_to_string() const {
3702  assert(is_token);
3703  return std::string(token);
3704  }
3705 
3706  template <typename T> T token_to_number() const {
3707  T n = 0;
3708  if constexpr (std::is_floating_point<T>::value) {
3709  // TODO: The following code should be removed eventually.
3710  std::istringstream ss(token_to_string());
3711  ss >> n;
3712  return n;
3713  } else {
3714  assert(is_token);
3715  std::from_chars(token.data(), token.data() + token.size(), n);
3716  return n;
3717  }
3718  }
3719 };
3720 
3721 template <typename T>
3722 void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
3723  std::function<std::string(const T &ast, int level)> fn) {
3724  const auto &ast = *ptr;
3725  for (auto i = 0; i < level; i++) {
3726  s += " ";
3727  }
3728  auto name = ast.original_name;
3729  if (ast.original_choice_count > 0) {
3730  name += "/" + std::to_string(ast.original_choice);
3731  }
3732  if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
3733  if (ast.is_token) {
3734  s += "- " + name + " (";
3735  s += ast.token;
3736  s += ")\n";
3737  } else {
3738  s += "+ " + name + "\n";
3739  }
3740  if (fn) { s += fn(ast, level + 1); }
3741  for (auto node : ast.nodes) {
3742  ast_to_s_core(node, s, level + 1, fn);
3743  }
3744 }
3745 
3746 template <typename T>
3747 std::string
3748 ast_to_s(const std::shared_ptr<T> &ptr,
3749  std::function<std::string(const T &ast, int level)> fn = nullptr) {
3750  std::string s;
3751  ast_to_s_core(ptr, s, 0, fn);
3752  return s;
3753 }
3754 
3755 struct AstOptimizer {
3756  AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
3757  : mode_(mode), rules_(rules) {}
3758 
3759  template <typename T>
3760  std::shared_ptr<T> optimize(std::shared_ptr<T> original,
3761  std::shared_ptr<T> parent = nullptr) {
3762  auto found =
3763  std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
3764  bool opt = mode_ ? !found : found;
3765 
3766  if (opt && original->nodes.size() == 1) {
3767  auto child = optimize(original->nodes[0], parent);
3768  return std::make_shared<T>(*child, original->name.data(),
3769  original->choice_count, original->position,
3770  original->length, original->choice);
3771  }
3772 
3773  auto ast = std::make_shared<T>(*original);
3774  ast->parent = parent;
3775  ast->nodes.clear();
3776  for (auto node : original->nodes) {
3777  auto child = optimize(node, ast);
3778  ast->nodes.push_back(child);
3779  }
3780  return ast;
3781  }
3782 
3783 private:
3784  const bool mode_;
3785  const std::vector<std::string> rules_;
3786 };
3787 
3788 struct EmptyType {};
3789 using Ast = AstBase<EmptyType>;
3790 
3791 template <typename T = Ast> void add_ast_action(Definition &rule) {
3792  rule.action = [&](const SemanticValues &vs) {
3793  auto line = vs.line_info();
3794 
3795  if (rule.is_token()) {
3796  return std::make_shared<T>(
3797  vs.path, line.first, line.second, rule.name.data(), vs.token(),
3798  std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
3799  vs.choice_count(), vs.choice());
3800  }
3801 
3802  auto ast =
3803  std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
3804  vs.transform<std::shared_ptr<T>>(),
3805  std::distance(vs.ss, vs.sv().data()),
3806  vs.sv().length(), vs.choice_count(), vs.choice());
3807 
3808  for (auto node : ast->nodes) {
3809  node->parent = ast;
3810  }
3811  return ast;
3812  };
3813 }
3814 
3815 #define PEG_EXPAND(...) __VA_ARGS__
3816 #define PEG_CONCAT(a, b) a##b
3817 #define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
3818 
3819 #define PEG_PICK( \
3820  a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
3821  a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
3822  a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
3823  a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
3824  a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
3825  a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
3826  a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
3827  a100
3828 
3829 #define PEG_COUNT(...) \
3830  PEG_EXPAND(PEG_PICK( \
3831  __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
3832  86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
3833  68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
3834  50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
3835  32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
3836  14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
3837 
3838 #define PEG_DEF_1(r) \
3839  peg::Definition r; \
3840  r.name = #r; \
3841  peg::add_ast_action(r);
3842 
3843 #define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
3844 #define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
3845 #define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
3846 #define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
3847 #define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
3848 #define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
3849 #define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
3850 #define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
3851 #define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
3852 #define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
3853 #define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
3854 #define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
3855 #define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
3856 #define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
3857 #define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
3858 #define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
3859 #define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
3860 #define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
3861 #define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
3862 #define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
3863 #define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
3864 #define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
3865 #define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
3866 #define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
3867 #define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
3868 #define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
3869 #define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
3870 #define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
3871 #define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
3872 #define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
3873 #define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
3874 #define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
3875 #define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
3876 #define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
3877 #define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
3878 #define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
3879 #define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
3880 #define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
3881 #define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
3882 #define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
3883 #define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
3884 #define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
3885 #define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
3886 #define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
3887 #define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
3888 #define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
3889 #define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
3890 #define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
3891 #define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
3892 #define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
3893 #define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
3894 #define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
3895 #define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
3896 #define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
3897 #define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
3898 #define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
3899 #define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
3900 #define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
3901 #define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
3902 #define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
3903 #define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
3904 #define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
3905 #define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
3906 #define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
3907 #define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
3908 #define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
3909 #define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
3910 #define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
3911 #define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
3912 #define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
3913 #define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
3914 #define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
3915 #define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
3916 #define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
3917 #define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
3918 #define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
3919 #define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
3920 #define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
3921 #define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
3922 #define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
3923 #define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
3924 #define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
3925 #define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
3926 #define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
3927 #define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
3928 #define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
3929 #define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
3930 #define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
3931 #define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
3932 #define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
3933 #define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
3934 #define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
3935 #define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
3936 #define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
3937 #define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
3938 #define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
3939 #define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
3940 #define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
3941 #define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
3942 
3943 #define AST_DEFINITIONS(...) \
3944  PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
3945 
3946 /*-----------------------------------------------------------------------------
3947  * parser
3948  *---------------------------------------------------------------------------*/
3949 
3950 class parser {
3951 public:
3952  parser() = default;
3953 
3954  parser(const char *s, size_t n, const Rules &rules) {
3955  load_grammar(s, n, rules);
3956  }
3957 
3958  parser(const char *s, const Rules &rules) : parser(s, strlen(s), rules) {}
3959 
3960  parser(const char *s, size_t n) : parser(s, n, Rules()) {}
3961 
3962  parser(const char *s) : parser(s, strlen(s), Rules()) {}
3963 
3964  operator bool() { return grammar_ != nullptr; }
3965 
3966  bool load_grammar(const char *s, size_t n, const Rules &rules) {
3967  grammar_ = ParserGenerator::parse(s, n, rules, start_, log);
3968  return grammar_ != nullptr;
3969  }
3970 
3971  bool load_grammar(const char *s, size_t n) {
3972  return load_grammar(s, n, Rules());
3973  }
3974 
3975  bool load_grammar(const char *s, const Rules &rules) {
3976  auto n = strlen(s);
3977  return load_grammar(s, n, rules);
3978  }
3979 
3980  bool load_grammar(const char *s) {
3981  auto n = strlen(s);
3982  return load_grammar(s, n);
3983  }
3984 
3985  bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
3986  if (grammar_ != nullptr) {
3987  const auto &rule = (*grammar_)[start_];
3988  return post_process(s, n, rule.parse(s, n, path, log));
3989  }
3990  return false;
3991  }
3992 
3993  bool parse(const char *s, const char *path = nullptr) const {
3994  auto n = strlen(s);
3995  return parse_n(s, n, path);
3996  }
3997 
3998  bool parse_n(const char *s, size_t n, std::any &dt,
3999  const char *path = nullptr) const {
4000  if (grammar_ != nullptr) {
4001  const auto &rule = (*grammar_)[start_];
4002  return post_process(s, n, rule.parse(s, n, dt, path, log));
4003  }
4004  return false;
4005  }
4006 
4007  bool parse(const char *s, std::any &dt, const char *path = nullptr) const {
4008  auto n = strlen(s);
4009  return parse_n(s, n, dt, path);
4010  }
4011 
4012  template <typename T>
4013  bool parse_n(const char *s, size_t n, T &val,
4014  const char *path = nullptr) const {
4015  if (grammar_ != nullptr) {
4016  const auto &rule = (*grammar_)[start_];
4017  return post_process(s, n, rule.parse_and_get_value(s, n, val, path, log));
4018  }
4019  return false;
4020  }
4021 
4022  template <typename T>
4023  bool parse(const char *s, T &val, const char *path = nullptr) const {
4024  auto n = strlen(s);
4025  return parse_n(s, n, val, path);
4026  }
4027 
4028  template <typename T>
4029  bool parse_n(const char *s, size_t n, std::any &dt, T &val,
4030  const char *path = nullptr) const {
4031  if (grammar_ != nullptr) {
4032  const auto &rule = (*grammar_)[start_];
4033  return post_process(s, n,
4034  rule.parse_and_get_value(s, n, dt, val, path, log));
4035  }
4036  return false;
4037  }
4038 
4039  template <typename T>
4040  bool parse(const char *s, std::any &dt, T &val,
4041  const char *path = nullptr) const {
4042  auto n = strlen(s);
4043  return parse_n(s, n, dt, val, path);
4044  }
4045 
4046  Definition &operator[](const char *s) { return (*grammar_)[s]; }
4047 
4048  const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
4049 
4050  std::vector<std::string> get_rule_names() const {
4051  std::vector<std::string> rules;
4052  for (auto &[name, _] : *grammar_) {
4053  rules.push_back(name);
4054  }
4055  return rules;
4056  }
4057 
4058  void enable_packrat_parsing() {
4059  if (grammar_ != nullptr) {
4060  auto &rule = (*grammar_)[start_];
4061  rule.enablePackratParsing = true;
4062  }
4063  }
4064 
4065  void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
4066  if (grammar_ != nullptr) {
4067  auto &rule = (*grammar_)[start_];
4068  rule.tracer_enter = tracer_enter;
4069  rule.tracer_leave = tracer_leave;
4070  }
4071  }
4072 
4073  template <typename T = Ast> parser &enable_ast() {
4074  for (auto &[_, rule] : *grammar_) {
4075  if (!rule.action) { add_ast_action<T>(rule); }
4076  }
4077  return *this;
4078  }
4079 
4080  template <typename T> std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast, bool opt_mode = true) const {
4081  return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
4082  }
4083 
4084  Log log;
4085 
4086 private:
4087  bool post_process(const char *s, size_t n,
4088  const Definition::Result &r) const {
4089  auto ret = r.ret && r.len == n;
4090  if (log && !ret) { r.error_info.output_log(log, s, n); }
4091  return ret && !r.recovered;
4092  }
4093 
4094  std::vector<std::string> get_no_ast_opt_rules() const {
4095  std::vector<std::string> rules;
4096  for (auto &[name, rule] : *grammar_) {
4097  if (rule.no_ast_opt) { rules.push_back(name); }
4098  }
4099  return rules;
4100  }
4101 
4102  std::shared_ptr<Grammar> grammar_;
4103  std::string start_;
4104 };
4105 
4106 } // namespace peg