Vaucanson 1.4
|
00001 // krat_exp_parser.hxx: this file is part of the Vaucanson project. 00002 // 00003 // Vaucanson, a generic library for finite state machines. 00004 // 00005 // Copyright (C) 2008, 2009, 2011 The Vaucanson Group. 00006 // 00007 // This program is free software; you can redistribute it and/or 00008 // modify it under the terms of the GNU General Public License 00009 // as published by the Free Software Foundation; either version 2 00010 // of the License, or (at your option) any later version. 00011 // 00012 // The complete GNU General Public Licence Notice can be found as the 00013 // `COPYING' file in the root directory. 00014 // 00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file. 00016 // 00017 00018 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX 00019 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX 00020 # include <map> 00021 # include <queue> 00022 # include <set> 00023 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh> 00024 # include <vaucanson/algebra/concept/monoid_base.hh> 00025 # include <vaucanson/algebra/implementation/series/krat_exp_parser_private.hh> 00026 00027 namespace vcsn 00028 { 00029 namespace algebra 00030 { 00031 00032 template <class S, class T> 00033 struct Lexer 00034 { 00035 // Type helpers. 00036 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t; 00037 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t; 00038 typedef typename Element<S, T>::set_t::shared_series_rep_t 00039 shared_series_rep_t; 00040 00041 Lexer(const std::string& from, 00042 Element<S, T>& e, 00043 vcsnyy::krat_exp_parser& parser, 00044 bool lex_trace, 00045 std::string& error) : 00046 from_(from), 00047 e_(e), 00048 parser_(parser), 00049 lex_trace_(lex_trace), 00050 close_weight_("}"), 00051 // Reserve seven elements for the "visible" tokens. 00052 token_tab_(7), 00053 error_(error) 00054 { 00055 // Get the series representation. 00056 const shared_series_rep_t& rep = e.structure().representation(); 00057 00058 token_tab_[0] = rep->open_par; 00059 token_tab_[1] = rep->close_par; 00060 token_tab_[2] = rep->plus; 00061 token_tab_[3] = rep->times; 00062 token_tab_[4] = rep->star; 00063 token_tab_[5] = rep->zero; 00064 token_tab_[6] = rep->open_weight; 00065 close_weight_ = rep->close_weight; 00066 00067 // Concat the spaces representation vector. 00068 token_tab_.insert(token_tab_.end(), rep->spaces.begin(), rep->spaces.end()); 00069 00070 std::string::const_iterator sit; 00071 semiring_elt_t ww(e_.structure().semiring()); 00072 sit = close_weight_.begin(); 00073 if (parse_weight(ww, close_weight_, sit)) 00074 error_ += "Warning : the token '" + close_weight_ + 00075 + "' is already defined as a weight.\n"; 00076 sit = token_tab_[7].begin(); 00077 if (parse_weight(ww, token_tab_[7], sit)) 00078 error_ += "Warning : the token '" + token_tab_[7] 00079 + "' is already defined as a weight.\n"; 00080 for (unsigned i = 0; i < token_tab_.size(); i++) 00081 { 00082 monoid_elt_t w(e_.structure().monoid()); 00083 if (parse_word(w, token_tab_[i]).first) 00084 error_ += "Warning : the token '" + token_tab_[i] 00085 + "' is already defined as a word.\n"; 00086 } 00087 00088 } 00089 00090 bool 00091 lex() 00092 { 00093 size_t size = from_.size(); 00094 size_t it = 0; 00095 while (it < size) 00096 { 00097 // Try to recognize a word. 00098 monoid_elt_t w(e_.structure().monoid()); 00099 std::pair<bool, int> res = parse_word(w, from_.substr(it)); 00100 if (res.second > 0) 00101 { 00102 insert_word(w); 00103 // If the entire input was a word there is nothing else to lex. 00104 if (res.first) 00105 { 00106 assertion(it + res.second == size); 00107 break; 00108 } 00109 it += res.second; 00110 } 00111 00112 // Try to recognize a regex token. 00113 size_t i; 00114 for (i = 0; i < token_tab_.size(); i++) 00115 { 00116 if (!from_.compare(it, token_tab_[i].size(), token_tab_[i])) 00117 { 00118 if (i == 6) 00119 { 00120 if (!insert_weight(it)) 00121 return false; 00122 } 00123 else 00124 { 00125 if (i < 6) 00126 insert_token(i); 00127 it += token_tab_[i].size(); 00128 } 00129 break; 00130 } 00131 } 00132 if (i >= token_tab_.size()) 00133 { 00134 error_ += "Lexer error, unrecognized characters: " 00135 + from_.substr(it) + "\n"; 00136 return false; 00137 } 00138 } 00139 return true; 00140 } 00141 00142 private: 00143 void 00144 insert_word(monoid_elt_t& w) 00145 { 00146 Element<S, T> ww = (w.value().empty() ? 00147 identity_as<T>::of(e_.structure()) : 00148 Element<S, T>(e_.structure(), w.value())); 00149 00150 krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(ww); 00151 parser_.insert_word(rexp); 00152 } 00153 00154 bool 00155 insert_weight(size_t& it) 00156 { 00157 it += token_tab_[7].size(); 00158 size_t bg = it; 00159 size_t size = from_.size(); 00160 unsigned cpt = 1; 00161 for (; it < size; it++) 00162 { 00163 if (!from_.compare(it, token_tab_[7].size(), token_tab_[7])) 00164 cpt++; 00165 else 00166 if (!from_.compare(it, close_weight_.size(), close_weight_)) 00167 { 00168 if (cpt == 1) 00169 { 00170 semiring_elt_t w(e_.structure().semiring()); 00171 std::string s = from_.substr(bg, it - bg); 00172 std::string::const_iterator sit = s.begin(); 00173 if (parse_weight(w, s, sit)) 00174 { 00175 semiring_proxy<S, T>* sem = new semiring_proxy<S, T>(w); 00176 parser_.insert_weight(sem); 00177 } 00178 else 00179 { 00180 error_ += "Lexer error : " + s + " is not a weight\n"; 00181 return false; 00182 } 00183 it += close_weight_.size(); 00184 return true; 00185 } 00186 else 00187 cpt--; 00188 } 00189 } 00190 error_ += "Lexer error : Expected " + close_weight_ 00191 + "instead of END\n"; 00192 return false; 00193 } 00194 00195 void 00196 insert_token(int i) 00197 { 00198 if (i == 5) 00199 { 00200 Element<S, T> w = zero_as<T>::of(e_.structure()); 00201 krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w); 00202 parser_.insert_zero(rexp); 00203 } 00204 else 00205 { 00206 std::string* str = new std::string(token_tab_[i]); 00207 parser_.insert_token(i, str); 00208 } 00209 } 00210 00211 const std::string& from_; 00212 Element<S, T>& e_; 00213 vcsnyy::krat_exp_parser& parser_; 00214 bool lex_trace_; 00215 std::string close_weight_; 00216 std::vector<std::string> token_tab_; 00217 std::string& error_; 00218 }; // Lexer 00219 00220 template <class S, class T> 00221 std::pair<bool, std::string> 00222 parse(const std::string& from, Element<S, T>& exp, 00223 bool lex_trace, bool parse_trace) 00224 { 00225 parse_trace = parse_trace; 00226 std::string error; 00227 vcsnyy::krat_exp_parser parser; 00228 Lexer<S, T> lex(from, exp, parser, lex_trace, error); 00229 if (!lex.lex()) 00230 return std::make_pair(true, error); 00231 krat_exp_proxy<S, T> rexp(exp); 00232 if (parser.parse(rexp, error)) 00233 return std::make_pair(true, error); 00234 exp = rexp.self; 00235 return std::make_pair(false, error); 00236 } 00237 00238 } // algebra 00239 00240 } // vcsn 00241 00242 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX