Vaucanson 1.4
search.hxx
00001 // search.hxx: this file is part of the Vaucanson project.
00002 //
00003 // Vaucanson, a generic library for finite state machines.
00004 //
00005 // Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2008 The Vaucanson Group.
00006 //
00007 // This program is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU General Public License
00009 // as published by the Free Software Foundation; either version 2
00010 // of the License, or (at your option) any later version.
00011 //
00012 // The complete GNU General Public Licence Notice can be found as the
00013 // `COPYING' file in the root directory.
00014 //
00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file.
00016 //
00017 #ifndef VCSN_ALGORITHMS_SEARCH_HXX
00018 # define VCSN_ALGORITHMS_SEARCH_HXX
00019 
00020 # include <vaucanson/algorithms/search.hh>
00021 
00022 # include <vaucanson/misc/window.hh>
00023 # include <vaucanson/misc/bitset.hh>
00024 
00025 # include <vector>
00026 
00027 namespace vcsn {
00028 
00029   template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T>
00030   void
00031   search(const Element<Automata<Series, Kind>, T>& a,
00032          const InputIterator& begin,
00033          const InputIterator& end,
00034          typename Element<Automata<Series, Kind>, T>::letter_t eol,
00035          FoundFunctor& f)
00036   {
00037     BENCH_TASK_SCOPED("search");
00038     FindBestSearch::search(a, begin, end, eol, f);
00039   }
00040 
00041   template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T>
00042   void
00043   FindBestSearch::search(const Element<Automata<Series, Kind>, T>& a,
00044                          const InputIterator& begin,
00045                          const InputIterator& end,
00046                          typename Element<Automata<Series, Kind>, T>::
00047                          letter_t eol,
00048                          FoundFunctor& f)
00049   {
00050     WindowedBackSearch::search(a, begin, end, eol, f);
00051   }
00052 
00063   template <typename Series, typename Kind, typename T, typename StatesSet>
00064   static
00065   unsigned int
00066   compute_distances(const Element<Automata<Series, Kind>, T>& a,
00067                     std::vector<StatesSet>& distances)
00068   {
00069     precondition(a.initial().size() > 0);
00070     precondition(a.final().size() > 0);
00071     precondition(distances.size() == 0);
00072 
00073     // Typedefs.
00074     typedef typename vcsn::Element<Automata<Series, Kind>, T>   automaton_t;
00075     AUTOMATON_TYPES(automaton_t);
00076 
00077     // Code.
00078     StatesSet           s_new (a.states().back() + 1);
00079     StatesSet           s_old (a.states().back() + 1);
00080 
00081     s_old.insert(a.initial().begin(), a.initial().end());
00082     for (unsigned int i = 0; true; ++i)
00083     {
00084       s_new.clear();
00085       distances.push_back(s_old);
00086       if (i > 0)
00087         distances[i].insert(distances[i - 1].begin(),
00088                             distances[i - 1].end());
00089       for (typename StatesSet::const_iterator s = s_old.begin();
00090            s != s_old.end();
00091            ++s)
00092       {
00093         if (a.is_final(*s))
00094         {
00095           postcondition(i == distances.size());
00096           return i;
00097         }
00098         for (delta_iterator i(a.value(), *s);
00099              ! i.done();
00100              i.next())
00101           s_new.insert(a.dst_of(*i));
00102         s_old = s_new;
00103       }
00104     }
00105   }
00106 
00121   template <typename InputIterator, typename Series, typename Kind, typename T, typename StatesSet>
00122   static
00123   std::pair<bool, unsigned int>
00124   window_backsearch(const misc::Window<InputIterator,
00125                     typename Element<Automata<Series, Kind>, T>::letter_t>& w,
00126                     const Element<Automata<Series, Kind>, T>& a,
00127                     const std::vector<StatesSet>& distances)
00128   {
00129     precondition(w.size() > 0);
00130 
00131     // Typedefs.
00132     typedef typename vcsn::Element<Automata<Series, Kind>, T>           automaton_t;
00133     AUTOMATON_TYPES(automaton_t);
00134     AUTOMATON_FREEMONOID_TYPES(automaton_t);
00135     typedef typename misc::Window<InputIterator, letter_t>      window_t;
00136     typedef typename window_t::length_t                         length_t;
00137     typedef misc::Bitset                                        bitset_t;
00138 
00139     // Code.
00140     bitset_t    s_new (a.states().back() + 1);
00141     bitset_t    s_old (a.states().back() + 1);
00142     int         pos = w.size();
00143     length_t    critpos = pos;
00144 
00145     s_old.insert(distances[pos].begin(), distances[pos].end());
00146     while ((--pos >= 0) && !s_old.empty())
00147       {
00148         s_new.clear();
00149         for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s)
00150           if (distances[pos + 1].find(*s) != distances[pos + 1].end())
00151             {
00152               if (a.is_initial(*s))
00153                 critpos = pos + 1;
00154               std::insert_iterator<bitset_t> i(s_new, s_new.begin());
00155               for (delta_iterator t(a.value(), *s); ! t.done(); t.next())
00156               {
00157                 monoid_elt_t w(a.series_of(*t).structure().monoid(), w[pos]);
00158                 if (a.series_of(*t).get(w) != a.series().semiring().wzero_)
00159                   *i++ = a.src_of(*t);
00160               }
00161             }
00162         std::swap(s_new, s_old);
00163       }
00164     if (pos < 0)
00165       for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s)
00166         if (a.is_initial(*s))
00167           return std::make_pair(true, critpos);
00168     return std::make_pair(false, critpos);
00169   }
00170 
00172   template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T>
00173   static
00174   InputIterator
00175   confirm_and_report_match(const misc::Window<InputIterator,
00176                            typename Element<Automata<Series, Kind>, T>::letter_t>& w,
00177                            const Element<Automata<Series, Kind>, T>& a,
00178                            FoundFunctor& f)
00179   {
00180     // Typedefs.
00181     typedef typename vcsn::Element<Automata<Series, Kind>, T>           automaton_t;
00182     AUTOMATON_TYPES(automaton_t);
00183     AUTOMATON_FREEMONOID_TYPES(automaton_t);
00184     typedef typename misc::Window<InputIterator, letter_t>      window_t;
00185     typedef typename window_t::length_t                         length_t;
00186     typedef typename window_t::iterator_t                       iterator_t;
00187     typedef misc::Bitset                                        bitset_t;
00188 
00189     // Code.
00190     bitset_t    s_old (a.states().back() + 1);
00191     bitset_t    s_new (a.states().back() + 1);
00192     iterator_t  pos = w.stream();
00193     iterator_t  last = pos;
00194 
00195     s_old.insert(a.initial().begin(), a.initial().end());
00196     while (!s_old.empty() && (*pos != w.eol_value()))
00197       {
00198         s_new.clear();
00199         for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s)
00200           {
00201             if (a.is_final(*s))
00202               last = pos - 1;
00203             std::insert_iterator<bitset_t> i(s_new, s_new.begin());
00204             for (delta_iterator t(a.value(), *s); ! t.done(); t.next())
00205             {
00206               monoid_elt_t w(a.series_of(*t).structure().monoid(), *pos);
00207               if (a.series_of(*t).get(w) != a.series().semiring().wzero_)
00208                 *i++ = a.dst_of(*t);
00209             }
00210           }
00211         std::swap(s_old, s_new);
00212         ++pos;
00213       }
00214     for (bitset_t::const_iterator s = s_old.begin(); s != s_old.end(); ++s)
00215       if (a.is_final(*s))
00216         last = pos - 1;
00217     if (last != pos)
00218       return f(w.begin(), w.stream(), last);
00219     else
00220       return w.stream();
00221   }
00222 
00225   template <typename InputIterator, typename FoundFunctor, typename Series, typename Kind, typename T>
00226   void
00227   WindowedBackSearch::search(const Element<Automata<Series, Kind>, T>& a,
00228                              const InputIterator& begin,
00229                              const InputIterator& end,
00230                              typename Element<Automata<Series, Kind>, T>::
00231                              letter_t eol,
00232                              FoundFunctor& f)
00233   {
00234     // Typedefs.
00235     typedef typename vcsn::Element<Automata<Series, Kind>, T>           automaton_t;
00236     AUTOMATON_TYPES(automaton_t);
00237     AUTOMATON_FREEMONOID_TYPES(automaton_t);
00238     typedef typename misc::Window<InputIterator, letter_t>      window_t;
00239     typedef typename window_t::length_t                         length_t;
00240     typedef misc::Bitset                                        bitset_t;
00241 
00242     // Code.
00243     std::vector<bitset_t>       distances;
00244     length_t                    wl = compute_distances(a, distances);
00245 
00246     if (wl == 0)
00247       WARNING("Search match every position in the stream, ignored.");
00248     else
00249       {
00250         window_t                w (begin, end, eol, wl);
00251         InputIterator           it;
00252 
00253         while (!w.eof())
00254           {
00255             if (w.size() != wl)
00256               w.shift();
00257             else
00258               {
00259                 std::pair<bool, length_t> p =
00260                   window_backsearch(w, a, distances);
00261                 if (p.first &&
00262                     (it = confirm_and_report_match(w, a, f)) != w.stream())
00263                   w.moveto(it);
00264                 else
00265                   w.shift(p.second);
00266               }
00267           }
00268       }
00269   }
00270 
00271 } // vcsn
00272 
00273 #endif // ! VCSN_ALGORITHMS_SEARCH_HXX