00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00020 # include <map>
00021 # include <queue>
00022 # include <set>
00023 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00024 # include <vaucanson/algebra/implementation/series/krat_exp_proxy.hh>
00025 # include <vaucanson/algebra/concept/monoid_base.hh>
00026 
00027 
00028 namespace yy
00029 {
00030   struct token_queue;
00031   struct krat_exp_parser
00032   {
00033     krat_exp_parser();
00034     void insert_word(vcsn::algebra::krat_exp_virtual* rexp);
00035     void insert_weight(vcsn::algebra::semiring_virtual* sem);
00036     void insert_one(vcsn::algebra::krat_exp_virtual* rexp);
00037     void insert_zero(vcsn::algebra::krat_exp_virtual* rexp);
00038     void insert_token(int i, std::string* str);
00039     int parse(vcsn::algebra::krat_exp_virtual& rexp, std::string& error);
00040 
00041     
00042     token_queue* tok_q_;
00043    }; 
00044 } 
00045 
00046 namespace vcsn
00047 {
00048   namespace algebra
00049   {
00050 
00051     template <class S, class T>
00052     struct Lexer
00053     {
00054       typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00055       typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00056       Lexer(const std::string& from,
00057             Element<S, T>& e,
00058             yy::krat_exp_parser& parser,
00059             bool lex_trace,
00060             const token_representation<typename S::monoid_t::letter_t> tok_rep,
00061             std::string& error) :
00062         from_(from),
00063         e_(e),
00064         parser_(parser),
00065         lex_trace_(lex_trace),
00066         close_weight_("}"),
00067         token_tab_(9),
00068         error_(error)
00069       {
00070         precondition(!tok_rep.open_par.empty());
00071         precondition(!tok_rep.close_par.empty());
00072         precondition(!tok_rep.plus.empty());
00073         precondition(!tok_rep.times.empty());
00074         precondition(!tok_rep.star.empty());
00075         precondition(!tok_rep.one.empty());
00076         precondition(!tok_rep.zero.empty());
00077         precondition(!tok_rep.open_weight.empty());
00078         precondition(!tok_rep.close_weight.empty());
00079 
00080         token_tab_[0] = tok_rep.open_par;
00081         token_tab_[1] = tok_rep.close_par;
00082         token_tab_[2] = tok_rep.plus;
00083         token_tab_[3] = tok_rep.times;
00084         token_tab_[4] = tok_rep.star;
00085         token_tab_[5] = tok_rep.one;
00086         token_tab_[6] = tok_rep.zero;
00087         token_tab_[7] = tok_rep.open_weight;
00088         close_weight_ = tok_rep.close_weight;
00089         for (unsigned i = 0; i < tok_rep.spaces.size(); i++)
00090         {
00091           assertion(!tok_rep.spaces[i].empty());
00092           token_tab_[8 + i] = tok_rep.spaces[i];
00093         }
00094 
00095         std::string::const_iterator sit;
00096         semiring_elt_t ww(e_.structure().semiring());
00097         sit = close_weight_.begin();
00098         if (parse_weight(ww, close_weight_, sit))
00099           error_ += "Warning : the token '" + close_weight_ +
00100                     + "' is already defined as a weight.\n";
00101         sit = token_tab_[7].begin();
00102         if (parse_weight(ww, token_tab_[7], sit))
00103           error_ += "Warning : the token '" + token_tab_[7]
00104                     + "' is already defined as a weight.\n";
00105         for (unsigned i = 0; i < token_tab_.size(); i++)
00106         {
00107           sit = token_tab_[i].begin();
00108           monoid_elt_t w(e_.structure().monoid());
00109           if (parse_word(w, token_tab_[i], sit, std::set<char>()))
00110             error_ +=  "Warning : the token '" + token_tab_[i]
00111                        + "' is already defined as a word.\n";
00112         }
00113 
00114       }
00115 
00116       bool
00117       lex()
00118       {
00119         size_t curr = 0;
00120         size_t size = from_.size();
00121         size_t it = curr;
00122         while (it < size)
00123         {
00124           for (size_t i = 0; i < token_tab_.size(); i++)
00125           {
00126             if (!from_.compare(it, token_tab_[i].size(), token_tab_[i]))
00127             {
00128               if (curr != it)
00129                 if (!insert_word(curr, it))
00130                   return false;
00131               if (i == 7)
00132               {
00133                 if (!insert_weight(it))
00134                   return false;
00135               }
00136               else
00137               {
00138                 if (i < 7)
00139                   insert_token(i);
00140                 it += token_tab_[i].size();
00141               }
00142               curr = it--;
00143               break;
00144             }
00145           }
00146           it++;
00147         }
00148         if (curr != it)
00149           if (!insert_word(curr, it))
00150             return false;
00151         return true;
00152       }
00153 
00154       private:
00155       bool
00156       insert_word(size_t curr, size_t it)
00157       {
00158         monoid_elt_t w(e_.structure().monoid());
00159         std::string s = from_.substr(curr, it - curr);
00160         std::string::const_iterator sit = s.begin();
00161         if (parse_word(w, s, sit, std::set<char>()))
00162         {
00163           Element<S, T> ww = Element<S, T>(e_.structure(), w.value());
00164           krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(ww);
00165           parser_.insert_word(rexp);
00166         }
00167         else
00168         {
00169           error_ += "Lexer error : " + s
00170                     + " some characters are not part of the alphabet\n";
00171           return false;
00172         }
00173         return true;
00174       }
00175 
00176       bool
00177       insert_weight(size_t& it)
00178       {
00179         it += token_tab_[7].size();
00180         size_t bg = it;
00181         size_t size = from_.size();
00182         unsigned cpt = 1;
00183         for (; it < size; it++)
00184         {
00185           if (!from_.compare(it, token_tab_[7].size(), token_tab_[7]))
00186             cpt++;
00187           else
00188             if (!from_.compare(it, close_weight_.size(), close_weight_))
00189             {
00190               if (cpt == 1)
00191               {
00192                 semiring_elt_t w(e_.structure().semiring());
00193                 std::string s = from_.substr(bg, it - bg);
00194                 std::string::const_iterator sit = s.begin();
00195                 if (parse_weight(w, s, sit))
00196                 {
00197                   semiring_proxy<S, T>* sem = new semiring_proxy<S, T>(w);
00198                   parser_.insert_weight(sem);
00199                 }
00200                 else
00201                 {
00202                   error_ += "Lexer error : " + s + " is not a weight\n";
00203                   return false;
00204                 }
00205                 it += close_weight_.size();
00206                 return true;
00207               }
00208               else
00209                 cpt--;
00210             }
00211         }
00212         error_ += "Lexer error : Expected " + close_weight_
00213                   + "instead of END\n";
00214         return false;
00215       }
00216 
00217       void
00218       insert_token(int i)
00219       {
00220         if (i == 5)
00221         {
00222           Element<S, T> w = identity_as<T>::of(e_.structure());
00223           krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w);
00224           parser_.insert_one(rexp);
00225         }
00226         else
00227           if (i == 6)
00228           {
00229             Element<S, T> w = zero_as<T>::of(e_.structure());
00230             krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w);
00231             parser_.insert_zero(rexp);
00232           }
00233           else
00234           {
00235             std::string* str = new std::string(token_tab_[i]);
00236             parser_.insert_token(i, str);
00237           }
00238       }
00239 
00240       const std::string& from_;
00241       Element<S, T>& e_;
00242       yy::krat_exp_parser& parser_;
00243       bool lex_trace_;
00244       std::string close_weight_;
00245       std::vector<std::string> token_tab_;
00246       std::string& error_;
00247     }; 
00248 
00249     template <class S, class T>
00250     std::pair<bool, std::string>
00251     parse(const std::string& from,
00252         Element<S, T>& exp,
00253         const token_representation<typename S::monoid_t::letter_t> tok_rep
00254         = token_representation<typename S::monoid_t::letter_t>(),
00255         bool lex_trace = false,
00256         bool parse_trace = false)
00257     {
00258       parse_trace = parse_trace;
00259       std::string error;
00260       yy::krat_exp_parser parser;
00261       Lexer<S, T> lex(from, exp, parser, lex_trace, tok_rep, error);
00262       if (!lex.lex())
00263         return std::make_pair(true, error);
00264       krat_exp_proxy<S, T> rexp(exp);
00265       if (parser.parse(rexp, error))
00266         return std::make_pair(true, error);
00267       exp = rexp.self;
00268       return std::make_pair(false, error);
00269     }
00270 
00271   } 
00272 
00273 } 
00274 
00275 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX