00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00018 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019
00020 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00021
00022 # include <vaucanson/algebra/concept/monoid_base.hh>
00023 # include <vaucanson/misc/usual_escaped_characters.hh>
00024
00025 # include <list>
00026 # include <set>
00027 # include <iostream>
00028
00029 namespace vcsn {
00030
00031 namespace algebra {
00032
00036
00037 namespace krat_exp_lexing {
00038
00047 enum token_e
00048 {
00049 lparen,
00050 rparen,
00051 space,
00052 plus,
00053 e_star,
00054 dot,
00055 zero,
00056 one,
00057 a_word,
00058 a_weight,
00059 eof
00060 };
00061
00062 }
00063
00064 using namespace krat_exp_lexing;
00065
00076 template <class MonoidValue, class SemiringEltValue>
00077 class KRatExpToken
00078 {
00079 public:
00090 struct token
00091 {
00092 token(const token_e& tok_type) :
00093 type(tok_type)
00094 {}
00095
00096 token(const token_e& tok_type, const MonoidValue& word)
00097 {
00098 m = word;
00099 type = tok_type;
00100 }
00101
00102 token(const token_e& tok_type, const SemiringEltValue& weight)
00103 {
00104 w = weight;
00105 type = tok_type;
00106 }
00107
00108 std::string to_string() const
00109 {
00110 switch (type)
00111 {
00112 case a_word : return "word";
00113 case a_weight : return "weight";
00114 case lparen : return "(";
00115 case rparen : return ")";
00116 case one : return "one";
00117 case zero : return "zero";
00118 case krat_exp_lexing::dot : return ".";
00119 case e_star : return "*";
00120 case krat_exp_lexing::plus : return "+";
00121 case space : return " ";
00122 case eof : return "EOF";
00123 }
00124 return "";
00125 }
00126
00127 token_e type;
00128 MonoidValue m;
00129 SemiringEltValue w;
00130 };
00131
00132 KRatExpToken(const token_e& tok)
00133 {
00134 tok_.push_back(tok);
00135 }
00136
00137 KRatExpToken()
00138 {
00139 }
00140
00141 std::string to_string() const
00142 {
00143 if (tok_.size() == 0)
00144 return "no token";
00145 else if (tok_.size() == 1)
00146 return tok_.front().to_string();
00147 else
00148 {
00149 std::string s("schrod(");
00150 typename std::list<token>::const_iterator i = tok_.begin();
00151 while (i != tok_.end())
00152 s += i->to_string() + (++i != tok_.end() ? " ": "");
00153 s += ")";
00154 return s;
00155 }
00156 }
00157
00158 bool is_just_a(const token_e tok)
00159 {
00160 return ((tok_.size() == 1) && (tok_.front().type == tok));
00161 }
00162
00163 bool is_defined()
00164 {
00165 return (tok_.size() > 0);
00166 }
00167
00168 bool is_schrod()
00169 {
00170 return (tok_.size() > 1);
00171 }
00172
00173 bool is_a(const token_e tok)
00174 {
00175 for (typename std::list<token>::const_iterator i = tok_.begin();
00176 i != tok_.end();
00177 ++i)
00178 if (i->type == tok)
00179 return true;
00180 return false;
00181 }
00182
00183 MonoidValue as_word()
00184 {
00185 for (typename std::list<token>::const_iterator i = tok_.begin();
00186 i != tok_.end();
00187 ++i)
00188 if (i->type == a_word)
00189 return i->m;
00190 unreachable("internal error in krat_exp parser");
00191 return MonoidValue();
00192 }
00193
00194 SemiringEltValue as_weight()
00195 {
00196 for (typename std::list<token>::const_iterator i = tok_.begin();
00197 i != tok_.end();
00198 ++i)
00199 if (i->type == a_weight)
00200 return i->w;
00201 unreachable("internal error in krat_exp parser.");
00202 return SemiringEltValue();
00203 }
00204
00205 KRatExpToken& operator=(const token_e tok)
00206 {
00207 tok_.push_back(token(tok));
00208 return *this;
00209 }
00210
00211 KRatExpToken& operator=(const MonoidValue& word_value)
00212 {
00213 tok_.push_back(token(a_word, word_value));
00214 return *this;
00215 }
00216
00217 KRatExpToken& operator=(const SemiringEltValue& weight_value)
00218 {
00219 tok_.push_back(token(a_weight, weight_value));
00220 return *this;
00221 }
00222
00223 void reset()
00224 {
00225 tok_.clear();
00226 }
00227
00228 private:
00229 std::list<token> tok_;
00230 };
00231
00237 template <class S, class T>
00238 struct Lexer
00239 {
00240 typedef KRatExpToken
00241 <
00242 typename Element<S, T>::monoid_elt_value_t,
00243 typename Element<S, T>::semiring_elt_value_t
00244 > krat_exp_token_t;
00245 typedef std::list<krat_exp_token_t> token_stream_t;
00246
00247 Lexer(bool trace = false) :
00248 tokens_ (),
00249 trace_ (trace),
00250 error_msg_ ("No error."),
00251 error_ (false)
00252 {
00253 }
00254
00256 void
00257 lex_error(const std::string& msg = "Lex error.")
00258 {
00259 error_ = true;
00260 error_msg_ = msg;
00261 }
00262
00270 void
00271 lex(const std::string& in, const Element<S, T>& e)
00272 {
00273 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00274 typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00275 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00276 typedef std::string::const_iterator iterator_t;
00277
00278 iterator_t i = in.begin();
00279 std::set<char> escaped = misc::usual_escaped_characters();
00280 int len = 0;
00281
00282 while (!error_ & (i != in.end()))
00283 {
00284 len = 0;
00285 krat_exp_token_t tok;
00286 switch (*i)
00287 {
00288
00289 case '(' : tok = lparen; len = 1; break;
00290 case ')' : tok = rparen; len = 1; break;
00291 case '+' : tok = krat_exp_lexing::plus; len = 1; break;
00292 case '*' : tok = e_star; len = 1; break;
00293 case '.' : tok = krat_exp_lexing::dot; len = 1; break;
00294 case ' ' : tok = space; len = 1; break;
00295 case '0' : tok = zero; len = 1; break;
00296 case '1' : tok = one; len = 1; break;
00297 }
00298
00299 iterator_t mli = i;
00300 monoid_elt_t w (e.structure().monoid());
00301 if (parse_word(w, in, mli, escaped))
00302 {
00303 if (mli - i > 1)
00304 tok.reset();
00305 tok = w.value();
00306 len = mli - i;
00307 }
00308
00309 iterator_t wli = i;
00310 semiring_elt_t ww (e.structure().semiring());
00311 if (parse_weight(ww, in, wli))
00312 {
00313 if (wli - i > len)
00314 tok.reset();
00315 if (wli - i >= len)
00316 {
00317 tok = ww.value();
00318 len = wli - i;
00319 }
00320 }
00321 if (len == 0)
00322 lex_error(std::string("Invalid token '") + *i + "'.");
00323 else
00324 tokens_.push_back(tok);
00325 i += len;
00326 }
00327 }
00328
00330 bool
00331 error() const
00332 {
00333 return error_;
00334 }
00335
00337 const std::string&
00338 error_msg() const
00339 {
00340 return error_msg_;
00341 }
00342
00344 krat_exp_token_t first() const
00345 {
00346 return (tokens_.size() > 0) ? tokens_.front() :
00347 krat_exp_token_t (eof);
00348 }
00349
00351 krat_exp_token_t second() const
00352 {
00353 return (tokens_.size() >= 2) ?
00354 *++tokens_.begin() : krat_exp_token_t (eof);
00355 }
00356
00363 void eat()
00364 {
00365 if (tokens_.size() > 0)
00366 {
00367 if (trace_)
00368 std::cerr << "LEXER: eating '" << tokens_.front().to_string()
00369 << "'." << std::endl;
00370 tokens_.pop_front();
00371 }
00372 }
00373
00374 void set_trace(bool trace)
00375 {
00376 trace_ = trace;
00377 }
00378
00379 protected:
00380 token_stream_t tokens_;
00381 bool trace_;
00382 std::string error_msg_;
00383 bool error_;
00384 };
00385
00420 template <class S, class T>
00421 struct Parser
00422 {
00423 typedef KRatExpToken
00424 <
00425 typename Element<S, T>::monoid_elt_value_t,
00426 typename Element<S, T>::semiring_elt_value_t
00427 > krat_exp_token_t;
00428 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00429 typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00430 typedef typename monoid_elt_t::set_t monoid_t;
00431 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00432 typedef typename semiring_elt_t::set_t semiring_t;
00433 typedef typename Element<S, T>::semiring_elt_value_t semiring_elt_value_t;
00434 typedef std::list<krat_exp_token_t> token_stream_t;
00435
00436 Parser(Lexer<S, T>& lexer, bool trace = false) :
00437 lexer_ (lexer),
00438 trace_ (trace),
00439 error_ (lexer.error()),
00440 error_msg_ (lexer.error_msg())
00441 {
00442 }
00443
00445 void parse(Element<S, T>& exp)
00446 {
00447 if (!error_)
00448 {
00449 trace("*** START ***");
00450 try
00451 {
00452 parse_exp(exp);
00453 accept(eof);
00454 }
00455 catch (const std::string& s)
00456 {
00457 error_ = true;
00458 error_msg_ = s;
00459 }
00460 trace("*** END ***");
00461 }
00462 }
00463
00465 bool error() const
00466 {
00467 return error_;
00468 }
00469
00471 const std::string& error_msg() const
00472 {
00473 return error_msg_;
00474 }
00475
00476 void set_trace(bool trace)
00477 {
00478 trace_ = trace;
00479 }
00480
00481 protected:
00482
00484
00485 void
00486 trace(const std::string& msg)
00487 {
00488 if (trace_)
00489 std::cerr << "PARSER: " << msg << '.' << std::endl;
00490 }
00491
00492 template <class Misc>
00493 void
00494 trace(const std::string& msg, const Misc& v)
00495 {
00496 if (trace_)
00497 std::cerr << "PARSER: " << msg << " (" << v << ")." << std::endl;
00498 }
00500
00502 void parse_error(const std::string& msg = "parse_error.")
00503 throw (const std::string&)
00504 {
00505 trace("Stop on error.", msg);
00506 throw msg;
00507 }
00508
00518 void accept(token_e tok)
00519 {
00520 krat_exp_token_t get_token = lexer_.first();
00521 typename krat_exp_token_t::token expected (tok);
00522
00523 trace("accept", expected.to_string());
00524 if (!get_token.is_a(tok))
00525 parse_error("waiting for a '" + expected.to_string() +
00526 "' get a '" + get_token.to_string() + "'.");
00527 else
00528 lexer_.eat();
00529 }
00530
00532 void parse_exp(Element<S, T>& exp)
00533 {
00534 trace("parse_exp: Start");
00535 parse_term (exp);
00536 while (lexer_.first().is_a(krat_exp_lexing::plus))
00537 {
00538 accept(krat_exp_lexing::plus);
00539 Element<S, T> rhs (exp.structure());
00540 parse_term(rhs);
00541 exp = exp + rhs;
00542 }
00543 trace("parse_exp: End", exp);
00544 }
00545
00547 void parse_term(Element<S, T>& exp)
00548 {
00549 trace("parse_term: Start");
00550 parse_right_weighted (exp);
00551 while (lexer_.first().is_a(krat_exp_lexing::dot) ||
00552 lexer_.first().is_a(a_weight) ||
00553 lexer_.first().is_a(lparen) ||
00554 lexer_.first().is_a(one) ||
00555 lexer_.first().is_a(zero) ||
00556 lexer_.first().is_a(a_word))
00557 {
00558 if (lexer_.first().is_a(krat_exp_lexing::dot))
00559 accept(krat_exp_lexing::dot);
00560 Element<S, T> rhs (exp.structure());
00561 parse_right_weighted(rhs);
00562 exp = exp * rhs;
00563 }
00564 trace("parse_term: End", exp);
00565 }
00566
00568 void parse_right_weighted(Element<S, T>& exp)
00569 {
00570 trace("parse_right_weighted: Start");
00571 parse_left_weighted(exp);
00572 while (lexer_.first().is_a(space))
00573 {
00574 accept(space);
00575 krat_exp_token_t tok = lexer_.first();
00576 accept(a_weight);
00577 exp = exp * semiring_elt_t (exp.structure().semiring(), tok.as_weight());
00578 }
00579 trace("parse_right_weighted: End", exp);
00580 }
00581
00583 void parse_left_weighted(Element<S, T>& exp)
00584 {
00585 trace("parse_left_weighted: Start");
00586
00587 if (lexer_.first().is_a(a_weight) &&
00588 (!lexer_.first().is_schrod() || lexer_.second().is_a(space)))
00589 {
00590
00591
00592
00593
00594 semiring_elt_t w (exp.structure().semiring(), lexer_.first().as_weight());
00595 bool just_a_weight = true;
00596 Element<S, T> m (exp.structure());
00597 if (lexer_.first().is_a(zero))
00598 {
00599 just_a_weight = false;
00600 m = zero_as<T>::of(exp.structure());
00601 }
00602 else if (lexer_.first().is_a(one))
00603 {
00604 just_a_weight = false;
00605 m = identity_as<T>::of(exp.structure());
00606 }
00607 else if (lexer_.first().is_a(a_word))
00608 {
00609 just_a_weight = false;
00610 m = Element<S, T> (exp.structure(), lexer_.first().as_word());
00611 }
00612 accept(a_weight);
00613 accept(space);
00614 if (just_a_weight ||
00615 !lexer_.first().is_a(a_weight) ||
00616 lexer_.second().is_a(space))
00617 {
00618
00619 Element<S, T> rhs (exp.structure());
00620 parse_left_weighted(rhs);
00621 exp = w * rhs;
00622 }
00623 else
00624 {
00625
00626 w = lexer_.first().as_weight();
00627 accept(a_weight);
00628 exp = m * w;
00629 }
00630 }
00631 else
00632
00633
00634 parse_stared(exp);
00635 trace("parse_left_weighted: End", exp);
00636 }
00637
00639 void parse_stared(Element<S, T>& exp)
00640 {
00641 trace("parse_stared: Start");
00642 parse_factor(exp);
00643 while (lexer_.first().is_a(e_star))
00644 {
00645 accept(e_star);
00646 exp.star();
00647 }
00648 trace("parse_stared: End", exp);
00649 }
00650
00652 void parse_factor(Element<S, T>& exp)
00653 {
00654 trace("parse_factor: Start");
00655 if (lexer_.first().is_a(one))
00656 {
00657 accept(one);
00658 exp = identity_as<T>::of(exp.structure());
00659 }
00660 else if (lexer_.first().is_a(zero))
00661 {
00662 accept(zero);
00663 exp = zero_as<T>::of(exp.structure());
00664 }
00665 else if (lexer_.first().is_a(a_word))
00666 {
00667 exp = monoid_elt_t (exp.structure().monoid(), lexer_.first().as_word());
00668 accept(a_word);
00669 }
00670 else if (lexer_.first().is_a(lparen))
00671 {
00672 accept(lparen);
00673 parse_exp(exp);
00674 accept(rparen);
00675 }
00676 else
00677 parse_error("waiting for a factor get a " +
00678 lexer_.first().to_string());
00679 trace("parse_factor: End", exp);
00680 }
00681
00682 Lexer<S, T>& lexer_;
00683 bool trace_;
00684 bool error_;
00685 std::string error_msg_;
00686 };
00687
00688 template <class S, class T>
00689 std::pair<bool, std::string>
00690 parse(const std::string& from,
00691 Element<S, T>& exp,
00692 bool lex_trace,
00693 bool parse_trace)
00694 {
00695 Lexer<S, T> lexer(lex_trace);
00696 lexer.lex(from, exp);
00697 Parser<S, T> parser(lexer, parse_trace);
00698 parser.parse(exp);
00699 return std::make_pair(parser.error(), parser.error_msg());
00700 }
00701
00705 }
00706
00707 }
00708
00709 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX