Vaucanson 1.4
|
00001 // search.hxx: this file is part of the Vaucanson project. 00002 // 00003 // Vaucanson, a generic library for finite state machines. 00004 // 00005 // Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2008 The Vaucanson Group. 00006 // 00007 // This program is free software; you can redistribute it and/or 00008 // modify it under the terms of the GNU General Public License 00009 // as published by the Free Software Foundation; either version 2 00010 // of the License, or (at your option) any later version. 00011 // 00012 // The complete GNU General Public Licence Notice can be found as the 00013 // `COPYING' file in the root directory. 00014 // 00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file. 00016 // 00017 #ifndef VCSN_ALGORITHMS_SEARCH_HXX 00018 # define VCSN_ALGORITHMS_SEARCH_HXX 00019 00020 # include <vaucanson/algorithms/search.hh> 00021 00022 # include <vaucanson/misc/window.hh> 00023 # include <vaucanson/misc/bitset.hh> 00024 00025 # include <vector> 00026 00027 namespace vcsn { 00028 00029 template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T> 00030 void 00031 search(const Element<Automata<Series, Kind>, T>& a, 00032 const InputIterator& begin, 00033 const InputIterator& end, 00034 typename Element<Automata<Series, Kind>, T>::letter_t eol, 00035 FoundFunctor& f) 00036 { 00037 BENCH_TASK_SCOPED("search"); 00038 FindBestSearch::search(a, begin, end, eol, f); 00039 } 00040 00041 template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T> 00042 void 00043 FindBestSearch::search(const Element<Automata<Series, Kind>, T>& a, 00044 const InputIterator& begin, 00045 const InputIterator& end, 00046 typename Element<Automata<Series, Kind>, T>:: 00047 letter_t eol, 00048 FoundFunctor& f) 00049 { 00050 WindowedBackSearch::search(a, begin, end, eol, f); 00051 } 00052 00063 template <typename Series, typename Kind, typename T, typename StatesSet> 00064 static 00065 unsigned int 00066 compute_distances(const Element<Automata<Series, Kind>, T>& a, 00067 std::vector<StatesSet>& distances) 00068 { 00069 precondition(a.initial().size() > 0); 00070 precondition(a.final().size() > 0); 00071 precondition(distances.size() == 0); 00072 00073 // Typedefs. 00074 typedef typename vcsn::Element<Automata<Series, Kind>, T> automaton_t; 00075 AUTOMATON_TYPES(automaton_t); 00076 00077 // Code. 00078 StatesSet s_new (a.states().back() + 1); 00079 StatesSet s_old (a.states().back() + 1); 00080 00081 s_old.insert(a.initial().begin(), a.initial().end()); 00082 for (unsigned int i = 0; true; ++i) 00083 { 00084 s_new.clear(); 00085 distances.push_back(s_old); 00086 if (i > 0) 00087 distances[i].insert(distances[i - 1].begin(), 00088 distances[i - 1].end()); 00089 for (typename StatesSet::const_iterator s = s_old.begin(); 00090 s != s_old.end(); 00091 ++s) 00092 { 00093 if (a.is_final(*s)) 00094 { 00095 postcondition(i == distances.size()); 00096 return i; 00097 } 00098 for (delta_iterator i(a.value(), *s); 00099 ! i.done(); 00100 i.next()) 00101 s_new.insert(a.dst_of(*i)); 00102 s_old = s_new; 00103 } 00104 } 00105 } 00106 00121 template <typename InputIterator, typename Series, typename Kind, typename T, typename StatesSet> 00122 static 00123 std::pair<bool, unsigned int> 00124 window_backsearch(const misc::Window<InputIterator, 00125 typename Element<Automata<Series, Kind>, T>::letter_t>& w, 00126 const Element<Automata<Series, Kind>, T>& a, 00127 const std::vector<StatesSet>& distances) 00128 { 00129 precondition(w.size() > 0); 00130 00131 // Typedefs. 00132 typedef typename vcsn::Element<Automata<Series, Kind>, T> automaton_t; 00133 AUTOMATON_TYPES(automaton_t); 00134 AUTOMATON_FREEMONOID_TYPES(automaton_t); 00135 typedef typename misc::Window<InputIterator, letter_t> window_t; 00136 typedef typename window_t::length_t length_t; 00137 typedef misc::Bitset bitset_t; 00138 00139 // Code. 00140 bitset_t s_new (a.states().back() + 1); 00141 bitset_t s_old (a.states().back() + 1); 00142 int pos = w.size(); 00143 length_t critpos = pos; 00144 00145 s_old.insert(distances[pos].begin(), distances[pos].end()); 00146 while ((--pos >= 0) && !s_old.empty()) 00147 { 00148 s_new.clear(); 00149 for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s) 00150 if (distances[pos + 1].find(*s) != distances[pos + 1].end()) 00151 { 00152 if (a.is_initial(*s)) 00153 critpos = pos + 1; 00154 std::insert_iterator<bitset_t> i(s_new, s_new.begin()); 00155 for (delta_iterator t(a.value(), *s); ! t.done(); t.next()) 00156 { 00157 monoid_elt_t w(a.series_of(*t).structure().monoid(), w[pos]); 00158 if (a.series_of(*t).get(w) != a.series().semiring().wzero_) 00159 *i++ = a.src_of(*t); 00160 } 00161 } 00162 std::swap(s_new, s_old); 00163 } 00164 if (pos < 0) 00165 for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s) 00166 if (a.is_initial(*s)) 00167 return std::make_pair(true, critpos); 00168 return std::make_pair(false, critpos); 00169 } 00170 00172 template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T> 00173 static 00174 InputIterator 00175 confirm_and_report_match(const misc::Window<InputIterator, 00176 typename Element<Automata<Series, Kind>, T>::letter_t>& w, 00177 const Element<Automata<Series, Kind>, T>& a, 00178 FoundFunctor& f) 00179 { 00180 // Typedefs. 00181 typedef typename vcsn::Element<Automata<Series, Kind>, T> automaton_t; 00182 AUTOMATON_TYPES(automaton_t); 00183 AUTOMATON_FREEMONOID_TYPES(automaton_t); 00184 typedef typename misc::Window<InputIterator, letter_t> window_t; 00185 typedef typename window_t::length_t length_t; 00186 typedef typename window_t::iterator_t iterator_t; 00187 typedef misc::Bitset bitset_t; 00188 00189 // Code. 00190 bitset_t s_old (a.states().back() + 1); 00191 bitset_t s_new (a.states().back() + 1); 00192 iterator_t pos = w.stream(); 00193 iterator_t last = pos; 00194 00195 s_old.insert(a.initial().begin(), a.initial().end()); 00196 while (!s_old.empty() && (*pos != w.eol_value())) 00197 { 00198 s_new.clear(); 00199 for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s) 00200 { 00201 if (a.is_final(*s)) 00202 last = pos - 1; 00203 std::insert_iterator<bitset_t> i(s_new, s_new.begin()); 00204 for (delta_iterator t(a.value(), *s); ! t.done(); t.next()) 00205 { 00206 monoid_elt_t w(a.series_of(*t).structure().monoid(), *pos); 00207 if (a.series_of(*t).get(w) != a.series().semiring().wzero_) 00208 *i++ = a.dst_of(*t); 00209 } 00210 } 00211 std::swap(s_old, s_new); 00212 ++pos; 00213 } 00214 for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s) 00215 if (a.is_final(*s)) 00216 last = pos - 1; 00217 if (last != pos) 00218 return f(w.begin(), w.stream(), last); 00219 else 00220 return w.stream(); 00221 } 00222 00225 template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T> 00226 void 00227 WindowedBackSearch::search(const Element<Automata<Series, Kind>, T>& a, 00228 const InputIterator& begin, 00229 const InputIterator& end, 00230 typename Element<Automata<Series, Kind>, T>:: 00231 letter_t eol, 00232 FoundFunctor& f) 00233 { 00234 // Typedefs. 00235 typedef typename vcsn::Element<Automata<Series, Kind>, T> automaton_t; 00236 AUTOMATON_TYPES(automaton_t); 00237 AUTOMATON_FREEMONOID_TYPES(automaton_t); 00238 typedef typename misc::Window<InputIterator, letter_t> window_t; 00239 typedef typename window_t::length_t length_t; 00240 typedef misc::Bitset bitset_t; 00241 00242 // Code. 00243 std::vector<bitset_t> distances; 00244 length_t wl = compute_distances(a, distances); 00245 00246 if (wl == 0) 00247 WARNING("Search match every position in the stream, ignored."); 00248 else 00249 { 00250 window_t w (begin, end, eol, wl); 00251 InputIterator it; 00252 00253 while (!w.eof()) 00254 { 00255 if (w.size() != wl) 00256 w.shift(); 00257 else 00258 { 00259 std::pair<bool, length_t> p = 00260 window_backsearch(w, a, distances); 00261 if (p.first && 00262 (it = confirm_and_report_match(w, a, f)) != w.stream()) 00263 w.moveto(it); 00264 else 00265 w.shift(p.second); 00266 } 00267 } 00268 } 00269 } 00270 00271 } // vcsn 00272 00273 #endif // ! VCSN_ALGORITHMS_SEARCH_HXX