00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00020 # include <map>
00021 # include <queue>
00022 # include <set>
00023 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00024 # include <vaucanson/algebra/implementation/series/krat_exp_proxy.hh>
00025 # include <vaucanson/algebra/concept/monoid_base.hh>
00026
00027
00028 namespace yy
00029 {
00030 struct token_queue;
00031 struct krat_exp_parser
00032 {
00033 krat_exp_parser();
00034 void insert_word(vcsn::algebra::krat_exp_virtual* rexp);
00035 void insert_weight(vcsn::algebra::semiring_virtual* sem);
00036 void insert_one(vcsn::algebra::krat_exp_virtual* rexp);
00037 void insert_zero(vcsn::algebra::krat_exp_virtual* rexp);
00038 void insert_token(int i, std::string* str);
00039 int parse(vcsn::algebra::krat_exp_virtual& rexp, std::string& error);
00040
00041
00042 token_queue* tok_q_;
00043 };
00044 }
00045
00046 namespace vcsn
00047 {
00048 namespace algebra
00049 {
00050
00051 template <class S, class T>
00052 struct Lexer
00053 {
00054 typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00055 typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00056 Lexer(const std::string& from,
00057 Element<S, T>& e,
00058 yy::krat_exp_parser& parser,
00059 bool lex_trace,
00060 const token_representation<typename S::monoid_t::letter_t> tok_rep,
00061 std::string& error) :
00062 from_(from),
00063 e_(e),
00064 parser_(parser),
00065 lex_trace_(lex_trace),
00066 close_weight_("}"),
00067 token_tab_(9),
00068 error_(error)
00069 {
00070 precondition(!tok_rep.open_par.empty() &&
00071 !tok_rep.close_par.empty() &&
00072 !tok_rep.plus.empty() &&
00073 !tok_rep.times.empty() &&
00074 !tok_rep.star.empty() &&
00075 !tok_rep.one.empty() &&
00076 !tok_rep.zero.empty() &&
00077 !tok_rep.open_weight.empty() &&
00078 !tok_rep.close_weight.empty());
00079
00080 token_tab_[0] = tok_rep.open_par;
00081 token_tab_[1] = tok_rep.close_par;
00082 token_tab_[2] = tok_rep.plus;
00083 token_tab_[3] = tok_rep.times;
00084 token_tab_[4] = tok_rep.star;
00085 token_tab_[5] = tok_rep.one;
00086 token_tab_[6] = tok_rep.zero;
00087 token_tab_[7] = tok_rep.open_weight;
00088 close_weight_ = tok_rep.close_weight;
00089 for (unsigned i = 0; i < tok_rep.spaces.size(); i++)
00090 {
00091 assertion(!tok_rep.spaces[i].empty());
00092 token_tab_[8 + i] = tok_rep.spaces[i];
00093 }
00094
00095 std::string::const_iterator sit;
00096 semiring_elt_t ww(e_.structure().semiring());
00097 sit = close_weight_.begin();
00098 if (parse_weight(ww, close_weight_, sit))
00099 error_ += "Warning : the token '" + close_weight_ +
00100 + "' is already defined as a weight.\n";
00101 sit = token_tab_[7].begin();
00102 if (parse_weight(ww, token_tab_[7], sit))
00103 error_ += "Warning : the token '" + token_tab_[7]
00104 + "' is already defined as a weight.\n";
00105 for (unsigned i = 0; i < token_tab_.size(); i++)
00106 {
00107 sit = token_tab_[i].begin();
00108 monoid_elt_t w(e_.structure().monoid());
00109 if (parse_word(w, token_tab_[i], sit, std::set<char>()))
00110 error_ += "Warning : the token '" + token_tab_[i]
00111 + "' is already defined as a word.\n";
00112 }
00113
00114 }
00115
00116 bool
00117 lex()
00118 {
00119 size_t curr = 0;
00120 size_t size = from_.size();
00121 size_t it = curr;
00122 while (it < size)
00123 {
00124 for (size_t i = 0; i < token_tab_.size(); i++)
00125 {
00126 if (!from_.compare(it, token_tab_[i].size(), token_tab_[i]))
00127 {
00128 if (curr != it)
00129 if (!insert_word(curr, it))
00130 return false;
00131 if (i == 7)
00132 {
00133 if (!insert_weight(it))
00134 return false;
00135 }
00136 else
00137 {
00138 if (i < 7)
00139 insert_token(i);
00140 it += token_tab_[i].size();
00141 }
00142 curr = it--;
00143 break;
00144 }
00145 }
00146 it++;
00147 }
00148 if (curr != it)
00149 if (!insert_word(curr, it))
00150 return false;
00151 return true;
00152 }
00153
00154 private:
00155 bool
00156 insert_word(size_t curr, size_t it)
00157 {
00158 monoid_elt_t w(e_.structure().monoid());
00159 std::string s = from_.substr(curr, it - curr);
00160 std::string::const_iterator sit = s.begin();
00161 if (parse_word(w, s, sit, std::set<char>()))
00162 {
00163 Element<S, T> ww = Element<S, T>(e_.structure(), w.value());
00164 krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(ww);
00165 parser_.insert_word(rexp);
00166 }
00167 else
00168 {
00169 error_ += "Lexer error : " + s
00170 + " some characters are not part of the alphabet\n";
00171 return false;
00172 }
00173 return true;
00174 }
00175
00176 bool
00177 insert_weight(size_t& it)
00178 {
00179 it += token_tab_[7].size();
00180 size_t bg = it;
00181 size_t size = from_.size();
00182 unsigned cpt = 1;
00183 for (; it < size; it++)
00184 {
00185 if (!from_.compare(it, token_tab_[7].size(), token_tab_[7]))
00186 cpt++;
00187 else
00188 if (!from_.compare(it, close_weight_.size(), close_weight_))
00189 {
00190 if (cpt == 1)
00191 {
00192 semiring_elt_t w(e_.structure().semiring());
00193 std::string s = from_.substr(bg, it - bg);
00194 std::string::const_iterator sit = s.begin();
00195 if (parse_weight(w, s, sit))
00196 {
00197 semiring_proxy<S, T>* sem = new semiring_proxy<S, T>(w);
00198 parser_.insert_weight(sem);
00199 }
00200 else
00201 {
00202 error_ += "Lexer error : " + s + " is not a weight\n";
00203 return false;
00204 }
00205 it += close_weight_.size();
00206 return true;
00207 }
00208 else
00209 cpt--;
00210 }
00211 }
00212 error_ += "Lexer error : Expected " + close_weight_
00213 + "instead of END\n";
00214 return false;
00215 }
00216
00217 void
00218 insert_token(int i)
00219 {
00220 if (i == 5)
00221 {
00222 Element<S, T> w = identity_as<T>::of(e_.structure());
00223 krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w);
00224 parser_.insert_one(rexp);
00225 }
00226 else
00227 if (i == 6)
00228 {
00229 Element<S, T> w = zero_as<T>::of(e_.structure());
00230 krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w);
00231 parser_.insert_zero(rexp);
00232 }
00233 else
00234 {
00235 std::string* str = new std::string(token_tab_[i]);
00236 parser_.insert_token(i, str);
00237 }
00238 }
00239
00240 const std::string& from_;
00241 Element<S, T>& e_;
00242 yy::krat_exp_parser& parser_;
00243 bool lex_trace_;
00244 std::string close_weight_;
00245 std::vector<std::string> token_tab_;
00246 std::string& error_;
00247 };
00248
00249 template <class S, class T>
00250 std::pair<bool, std::string>
00251 parse(const std::string& from,
00252 Element<S, T>& exp,
00253 const token_representation<typename S::monoid_t::letter_t> tok_rep
00254 = token_representation<typename S::monoid_t::letter_t>(),
00255 bool lex_trace = false,
00256 bool parse_trace = false)
00257 {
00258 parse_trace = parse_trace;
00259 std::string error;
00260 yy::krat_exp_parser parser;
00261 Lexer<S, T> lex(from, exp, parser, lex_trace, tok_rep, error);
00262 if (!lex.lex())
00263 return std::make_pair(true, error);
00264 krat_exp_proxy<S, T> rexp(exp);
00265 if (parser.parse(rexp, error))
00266 return std::make_pair(true, error);
00267 exp = rexp.self;
00268 return std::make_pair(false, error);
00269 }
00270
00271 }
00272
00273 }
00274
00275 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX