00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00018 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019
00020 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00021
00022 # include <vaucanson/algebra/concept/monoid_base.hh>
00023 # include <vaucanson/tools/usual_escaped_characters.hh>
00024
00025 # include <list>
00026 # include <set>
00027
00028 namespace vcsn {
00029
00030 namespace algebra {
00031
00035
00036 namespace krat_exp_lexing {
00037
00046 enum token_e
00047 {
00048 lparen,
00049 rparen,
00050 space,
00051 plus,
00052 e_star,
00053 dot,
00054 zero,
00055 one,
00056 a_word,
00057 a_weight,
00058 eof
00059 };
00060
00061 }
00062
00063 using namespace krat_exp_lexing;
00064
00075 template <class MonoidValue, class SemiringEltValue>
00076 class KRatExpToken
00077 {
00078 public:
00089 struct token
00090 {
00091 token(const token_e& tok_type) :
00092 type(tok_type)
00093 {}
00094
00095 token(const token_e& tok_type, const MonoidValue& word)
00096 {
00097 m = word;
00098 type = tok_type;
00099 }
00100
00101 token(const token_e& tok_type, const SemiringEltValue& weight)
00102 {
00103 w = weight;
00104 type = tok_type;
00105 }
00106
00107 std::string to_string() const
00108 {
00109 switch (type)
00110 {
00111 case a_word : return "word";
00112 case a_weight : return "weight";
00113 case lparen : return "(";
00114 case rparen : return ")";
00115 case one : return "one";
00116 case zero : return "zero";
00117 case krat_exp_lexing::dot : return ".";
00118 case e_star : return "*";
00119 case krat_exp_lexing::plus : return "+";
00120 case space : return " ";
00121 case eof : return "EOF";
00122 }
00123 return "";
00124 }
00125
00126 token_e type;
00127 MonoidValue m;
00128 SemiringEltValue w;
00129 };
00130
00131 KRatExpToken(const token_e& tok)
00132 {
00133 tok_.push_back(tok);
00134 }
00135
00136 KRatExpToken()
00137 {
00138 }
00139
00140 std::string to_string() const
00141 {
00142 if (tok_.size() == 0)
00143 return "no token";
00144 else if (tok_.size() == 1)
00145 return tok_.front().to_string();
00146 else
00147 {
00148 std::string s("schrod(");
00149 typename std::list<token>::const_iterator i = tok_.begin();
00150 while (i != tok_.end())
00151 s += i->to_string() + (++i != tok_.end() ? " ": "");
00152 s += ")";
00153 return s;
00154 }
00155 }
00156
00157 bool is_just_a(const token_e tok)
00158 {
00159 return ((tok_.size() == 1) && (tok_.front().type == tok));
00160 }
00161
00162 bool is_defined()
00163 {
00164 return (tok_.size() > 0);
00165 }
00166
00167 bool is_schrod()
00168 {
00169 return (tok_.size() > 1);
00170 }
00171
00172 bool is_a(const token_e tok)
00173 {
00174 for (typename std::list<token>::const_iterator i = tok_.begin();
00175 i != tok_.end();
00176 ++i)
00177 if (i->type == tok)
00178 return true;
00179 return false;
00180 }
00181
00182 MonoidValue as_word()
00183 {
00184 for (typename std::list<token>::const_iterator i = tok_.begin();
00185 i != tok_.end();
00186 ++i)
00187 if (i->type == a_word)
00188 return i->m;
00189 unreachable("internal error in krat_exp parser");
00190 return MonoidValue();
00191 }
00192
00193 SemiringEltValue as_weight()
00194 {
00195 for (typename std::list<token>::const_iterator i = tok_.begin();
00196 i != tok_.end();
00197 ++i)
00198 if (i->type == a_weight)
00199 return i->w;
00200 unreachable("internal error in krat_exp parser.");
00201 return SemiringEltValue();
00202 }
00203
00204 KRatExpToken& operator=(const token_e tok)
00205 {
00206 tok_.push_back(token(tok));
00207 return *this;
00208 }
00209
00210 KRatExpToken& operator=(const MonoidValue& word_value)
00211 {
00212 tok_.push_back(token(a_word, word_value));
00213 return *this;
00214 }
00215
00216 KRatExpToken& operator=(const SemiringEltValue& weight_value)
00217 {
00218 tok_.push_back(token(a_weight, weight_value));
00219 return *this;
00220 }
00221
00222 void reset()
00223 {
00224 tok_.clear();
00225 }
00226
00227 private:
00228 std::list<token> tok_;
00229 };
00230
00236 template <class S, class T>
00237 struct Lexer
00238 {
00239 typedef KRatExpToken
00240 <
00241 typename Element<S, T>::monoid_elt_value_t,
00242 typename Element<S, T>::semiring_elt_value_t
00243 > krat_exp_token_t;
00244 typedef std::list<krat_exp_token_t> token_stream_t;
00245
00246 Lexer(bool trace = false) :
00247 tokens_ (),
00248 trace_ (trace),
00249 error_msg_ ("No error."),
00250 error_ (false)
00251 {
00252 }
00253
00255 void
00256 lex_error(const std::string& msg = "Lex error.")
00257 {
00258 error_ = true;
00259 error_msg_ = msg;
00260 }
00261
00269 void
00270 lex(const std::string& in, const Element<S, T>& e)
00271 {
00272 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00273 typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00274 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00275 typedef std::string::const_iterator iterator_t;
00276
00277 iterator_t i = in.begin();
00278 std::set<char> escaped = tools::usual_escaped_characters();
00279 int len = 0;
00280
00281 while (!error_ & (i != in.end()))
00282 {
00283 len = 0;
00284 krat_exp_token_t tok;
00285 switch (*i)
00286 {
00287
00288 case '(' : tok = lparen; len = 1; break;
00289 case ')' : tok = rparen; len = 1; break;
00290 case '+' : tok = krat_exp_lexing::plus; len = 1; break;
00291 case '*' : tok = e_star; len = 1; break;
00292 case '.' : tok = krat_exp_lexing::dot; len = 1; break;
00293 case ' ' : tok = space; len = 1; break;
00294 case '0' : tok = zero; len = 1; break;
00295 case '1' : tok = one; len = 1; break;
00296 }
00297
00298 iterator_t mli = i;
00299 monoid_elt_t w (e.structure().monoid());
00300 if (parse_word(w, in, mli, escaped))
00301 {
00302 if (mli - i > 1)
00303 tok.reset();
00304 tok = w.value();
00305 len = mli - i;
00306 }
00307
00308 iterator_t wli = i;
00309 semiring_elt_t ww (e.structure().semiring());
00310 if (parse_weight(ww, in, wli))
00311 {
00312 if (wli - i > len)
00313 tok.reset();
00314 if (wli - i >= len)
00315 {
00316 tok = ww.value();
00317 len = wli - i;
00318 }
00319 }
00320 if (len == 0)
00321 lex_error(std::string("Invalid token '") + *i + "'.");
00322 else
00323 tokens_.push_back(tok);
00324 i += len;
00325 }
00326 }
00327
00329 bool
00330 error() const
00331 {
00332 return error_;
00333 }
00334
00336 const std::string&
00337 error_msg() const
00338 {
00339 return error_msg_;
00340 }
00341
00343 krat_exp_token_t first() const
00344 {
00345 return (tokens_.size() > 0) ? tokens_.front() :
00346 krat_exp_token_t (eof);
00347 }
00348
00350 krat_exp_token_t second() const
00351 {
00352 return (tokens_.size() >= 2) ?
00353 *++tokens_.begin() : krat_exp_token_t (eof);
00354 }
00355
00362 void eat()
00363 {
00364 if (tokens_.size() > 0)
00365 {
00366 if (trace_)
00367 std::cerr << "LEXER: eating '" << tokens_.front().to_string()
00368 << "'." << std::endl;
00369 tokens_.pop_front();
00370 }
00371 }
00372
00373 void set_trace(bool trace)
00374 {
00375 trace_ = trace;
00376 }
00377
00378 protected:
00379 token_stream_t tokens_;
00380 bool trace_;
00381 std::string error_msg_;
00382 bool error_;
00383 };
00384
00419 template <class S, class T>
00420 struct Parser
00421 {
00422 typedef KRatExpToken
00423 <
00424 typename Element<S, T>::monoid_elt_value_t,
00425 typename Element<S, T>::semiring_elt_value_t
00426 > krat_exp_token_t;
00427 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00428 typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00429 typedef typename monoid_elt_t::set_t monoid_t;
00430 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00431 typedef typename semiring_elt_t::set_t semiring_t;
00432 typedef typename Element<S, T>::semiring_elt_value_t semiring_elt_value_t;
00433 typedef std::list<krat_exp_token_t> token_stream_t;
00434
00435 Parser(Lexer<S, T>& lexer, bool trace = false) :
00436 lexer_ (lexer),
00437 trace_ (trace),
00438 error_ (lexer.error()),
00439 error_msg_ (lexer.error_msg())
00440 {
00441 }
00442
00444 void parse(Element<S, T>& exp)
00445 {
00446 if (!error_)
00447 {
00448 trace("*** START ***");
00449 try
00450 {
00451 parse_exp(exp);
00452 accept(eof);
00453 }
00454 catch (const std::string& s)
00455 {
00456 error_ = true;
00457 error_msg_ = s;
00458 }
00459 trace("*** END ***");
00460 }
00461 }
00462
00464 bool error() const
00465 {
00466 return error_;
00467 }
00468
00470 const std::string& error_msg() const
00471 {
00472 return error_msg_;
00473 }
00474
00475 void set_trace(bool trace)
00476 {
00477 trace_ = trace;
00478 }
00479
00480 protected:
00481
00483
00484 void
00485 trace(const std::string& msg)
00486 {
00487 if (trace_)
00488 std::cerr << "PARSER: " << msg << '.' << std::endl;
00489 }
00490
00491 template <class Misc>
00492 void
00493 trace(const std::string& msg, const Misc& v)
00494 {
00495 if (trace_)
00496 std::cerr << "PARSER: " << msg << " (" << v << ")." << std::endl;
00497 }
00499
00501 void parse_error(const std::string& msg = "parse_error.")
00502 throw (const std::string&)
00503 {
00504 trace("Stop on error.", msg);
00505 throw msg;
00506 }
00507
00517 void accept(token_e tok)
00518 {
00519 krat_exp_token_t get_token = lexer_.first();
00520 typename krat_exp_token_t::token expected (tok);
00521
00522 trace("accept", expected.to_string());
00523 if (!get_token.is_a(tok))
00524 parse_error("waiting for a '" + expected.to_string() +
00525 "' get a '" + get_token.to_string() + "'.");
00526 else
00527 lexer_.eat();
00528 }
00529
00531 void parse_exp(Element<S, T>& exp)
00532 {
00533 trace("parse_exp: Start");
00534 parse_term (exp);
00535 while (lexer_.first().is_a(krat_exp_lexing::plus))
00536 {
00537 accept(krat_exp_lexing::plus);
00538 Element<S, T> rhs (exp.structure());
00539 parse_term(rhs);
00540 exp = exp + rhs;
00541 }
00542 trace("parse_exp: End", exp);
00543 }
00544
00546 void parse_term(Element<S, T>& exp)
00547 {
00548 trace("parse_term: Start");
00549 parse_right_weighted (exp);
00550 while (lexer_.first().is_a(krat_exp_lexing::dot) ||
00551 lexer_.first().is_a(a_weight) ||
00552 lexer_.first().is_a(lparen) ||
00553 lexer_.first().is_a(one) ||
00554 lexer_.first().is_a(zero) ||
00555 lexer_.first().is_a(a_word))
00556 {
00557 if (lexer_.first().is_a(krat_exp_lexing::dot))
00558 accept(krat_exp_lexing::dot);
00559 Element<S, T> rhs (exp.structure());
00560 parse_right_weighted(rhs);
00561 exp = exp * rhs;
00562 }
00563 trace("parse_term: End", exp);
00564 }
00565
00567 void parse_right_weighted(Element<S, T>& exp)
00568 {
00569 trace("parse_right_weighted: Start");
00570 parse_left_weighted(exp);
00571 while (lexer_.first().is_a(space))
00572 {
00573 accept(space);
00574 krat_exp_token_t tok = lexer_.first();
00575 accept(a_weight);
00576 exp = exp * semiring_elt_t (exp.structure().semiring(), tok.as_weight());
00577 }
00578 trace("parse_right_weighted: End", exp);
00579 }
00580
00582 void parse_left_weighted(Element<S, T>& exp)
00583 {
00584 trace("parse_left_weighted: Start");
00585
00586 if (lexer_.first().is_a(a_weight) &&
00587 (!lexer_.first().is_schrod() || lexer_.second().is_a(space)))
00588 {
00589
00590
00591
00592
00593 semiring_elt_t w (exp.structure().semiring(), lexer_.first().as_weight());
00594 bool just_a_weight = true;
00595 Element<S, T> m (exp.structure());
00596 if (lexer_.first().is_a(zero))
00597 {
00598 just_a_weight = false;
00599 m = zero_as<T>::of(exp.structure());
00600 }
00601 else if (lexer_.first().is_a(one))
00602 {
00603 just_a_weight = false;
00604 m = identity_as<T>::of(exp.structure());
00605 }
00606 else if (lexer_.first().is_a(a_word))
00607 {
00608 just_a_weight = false;
00609 m = Element<S, T> (exp.structure(), lexer_.first().as_word());
00610 }
00611 accept(a_weight);
00612 accept(space);
00613 if (just_a_weight ||
00614 !lexer_.first().is_a(a_weight) ||
00615 lexer_.second().is_a(space))
00616 {
00617
00618 Element<S, T> rhs (exp.structure());
00619 parse_left_weighted(rhs);
00620 exp = w * rhs;
00621 }
00622 else
00623 {
00624
00625 w = lexer_.first().as_weight();
00626 accept(a_weight);
00627 exp = m * w;
00628 }
00629 }
00630 else
00631
00632
00633 parse_stared(exp);
00634 trace("parse_left_weighted: End", exp);
00635 }
00636
00638 void parse_stared(Element<S, T>& exp)
00639 {
00640 trace("parse_stared: Start");
00641 parse_factor(exp);
00642 while (lexer_.first().is_a(e_star))
00643 {
00644 accept(e_star);
00645 exp.star();
00646 }
00647 trace("parse_stared: End", exp);
00648 }
00649
00651 void parse_factor(Element<S, T>& exp)
00652 {
00653 trace("parse_factor: Start");
00654 if (lexer_.first().is_a(one))
00655 {
00656 accept(one);
00657 exp = identity_as<T>::of(exp.structure());
00658 }
00659 else if (lexer_.first().is_a(zero))
00660 {
00661 accept(zero);
00662 exp = zero_as<T>::of(exp.structure());
00663 }
00664 else if (lexer_.first().is_a(a_word))
00665 {
00666 exp = monoid_elt_t (exp.structure().monoid(), lexer_.first().as_word());
00667 accept(a_word);
00668 }
00669 else if (lexer_.first().is_a(lparen))
00670 {
00671 accept(lparen);
00672 parse_exp(exp);
00673 accept(rparen);
00674 }
00675 else
00676 parse_error("waiting for a factor get a " +
00677 lexer_.first().to_string());
00678 trace("parse_factor: End", exp);
00679 }
00680
00681 Lexer<S, T>& lexer_;
00682 bool trace_;
00683 bool error_;
00684 std::string error_msg_;
00685 };
00686
00687 template <class S, class T>
00688 std::pair<bool, std::string>
00689 parse(const std::string& from,
00690 Element<S, T>& exp,
00691 bool lex_trace,
00692 bool parse_trace)
00693 {
00694 Lexer<S, T> lexer(lex_trace);
00695 lexer.lex(from, exp);
00696 Parser<S, T> parser(lexer, parse_trace);
00697 parser.parse(exp);
00698 return std::make_pair(parser.error(), parser.error_msg());
00699 }
00700
00704 }
00705
00706 }
00707
00708 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX