Vcsn  2.3
Be Rational
efsm.cc
Go to the documentation of this file.
1 #include <fstream>
2 #include <set>
3 #include <string>
4 
5 #include <boost/algorithm/string/erase.hpp>
6 #include <boost/algorithm/string/predicate.hpp> // starts_with
7 #include <boost/algorithm/string/replace.hpp> // replace_all_copy
8 
9 #include <lib/vcsn/algos/fwd.hh>
12 #include <vcsn/dyn/registries.hh>
13 #include <vcsn/dyn/algos.hh>
14 #include <vcsn/dyn/automaton.hh>
15 #include <vcsn/misc/getargs.hh>
16 #include <vcsn/misc/symbol.hh>
17 #include <vcsn/misc/regex.hh>
18 
19 namespace vcsn
20 {
21  namespace dyn
22  {
23  namespace
24  {
27  std::string
28  read_here_doc(std::istream& is)
29  {
30  static std::regex re("cat >\\$medir/([a-z]+)\\.[a-z]* <<\\\\EOFSM",
31  std::regex::extended);
32  std::string line;
33  std::smatch res;
34  while (is.good())
35  {
36  std::getline(is, line, '\n');
37  if (std::regex_match(line, res, re))
38  return res[1];
39  }
40  raise("invalid efsm file: missing \"cat\" symbol");
41  }
42 
46  std::string
47  read_symbol_table(std::istream& is)
48  {
49  std::string res;
50  std::string line;
51  std::string val;
52  while (is.good())
53  {
54  std::getline(is, line, '\n');
55  std::istringstream ss{line};
56  ss >> res;
57  if (ss.fail())
58  continue;
59  ss >> val;
60  if (ss.fail())
61  raise("invalid efsm file");
62  if (val == "0" || res == "EOFSM")
63  break;
64  }
65 
66  while (line != "EOFSM" && is.good())
67  std::getline(is, line, '\n');
68 
69  require(line == "EOFSM",
70  "invalid efsm file: missing closing EOFSM");
71  return res;
72  }
73 
76  read_weightset_type(std::istream& is)
77  {
78  using weightset_type = lazy_automaton_editor::weightset_type;
79  std::string line;
80  while (is.good())
81  {
82  std::getline(is, line, '\n');
83  if (boost::starts_with(line, "arc_type="))
84  {
85  boost::algorithm::erase_first(line, "arc_type=");
86  static auto map = getarg<weightset_type>
87  {
88  "arc type",
89  {
90  {"log", weightset_type::logarithmic},
91  {"log64", weightset_type::logarithmic},
92  {"standard", weightset_type::tropical},
93  }
94  };
95  return map[line];
96  }
97  }
98  raise("invalid efsm file: missing \"arc_type=\"");
99  }
100  }
101 
102  automaton
103  read_efsm(std::istream& is)
104  {
105  std::string file = "file.efsm";
106  using string_t = symbol;
107 
108  // Whether has both isysmbols and osymbols.
109  bool is_transducer = false;
110 
111  // Look for the arc type, which describes the weightset.
112  auto weightset = read_weightset_type(is);
113 
114  // Look for the symbol table.
115  auto isyms = read_here_doc(is);
116  // The single piece of information we need from the symbol
117  // table: the representation of the empty word.
118  std::string ione = read_symbol_table(is);
119 
120  // If we had "isymbols", we now expect "osymbols".
121  std::string oone = ione;
122  if (isyms == "isymbols")
123  {
124  is_transducer = true;
125  auto osyms = read_here_doc(is);
126  require(osyms == "osymbols",
127  "invalid efsm file: expected osymbols: ", osyms);
128  oone = read_symbol_table(is);
129  }
130 
131  auto edit = vcsn::lazy_automaton_editor{};
132  edit.open(true);
133  edit.weightset(weightset);
134 
135  // The first transition also provides the initial state.
136  bool first = true;
137  auto trans = read_here_doc(is);
138  require(trans == "transitions",
139  "invalid efsm file: expected transitions: ", trans);
140  // Line: Source Dest ILabel [OLabel] [Weight].
141  // Line: FinalState [Weight].
142  std::string line;
143  while (is.good())
144  {
145  std::getline(is, line, '\n');
146  if (line == "EOFSM")
147  break;
148  std::istringstream ss{line};
149  string_t s, d, l1, l2, w;
150  ss >> s >> d >> l1 >> l2 >> w;
151  if (first)
152  edit.add_initial(s);
153  if (l1.get().empty())
154  // FinalState [Weight]
155  edit.add_final(s, d);
156  else
157  {
158  if (l1 == ione)
159  l1 = "\\e";
160  if (is_transducer)
161  {
162  if (l2 == oone)
163  l2 = "\\e";
164  edit.add_transition(s, d, l1, l2, w);
165  }
166  else
167  {
168  // l2 is actually the weight.
169  edit.add_transition(s, d, l1, l2);
170  }
171  }
172  first = false;
173  }
174 
175  require(line == "EOFSM",
176  file, ": bad input format, missing EOFSM");
177  // Flush till EOF.
178  while (is.get() != EOF)
179  continue;
180 
181  // We don't want to read it as a `law<char>` automaton, as for
182  // OpenFST, these "words" are insecable. The proper
183  // interpretation is lal<string> (or lan<string>).
184  using boost::algorithm::replace_all_copy;
185  auto ctx = replace_all_copy(edit.result_context(),
186  "law<char>", "lan<string>");
187  return edit.result(ctx);
188  }
189  }
190 }
weightset_type
Weightset types.
boost::flyweight< std::string, boost::flyweights::no_tracking, boost::flyweights::intermodule_holder > symbol
An internalized string.
Definition: symbol.hh:23
void require(Bool b, Args &&...args)
If b is not verified, raise an error with args as message.
Definition: raise.hh:91
return res
Definition: multiply.hh:398
bool open(bool o)
Whether unknown letters should be added, or rejected.
Definition: a-star.hh:8
auto map(const std::tuple< Ts... > &ts, Fun f) -> decltype(map_tuple_(f, ts, make_index_sequence< sizeof...(Ts)>()))
Map a function on a tuple, return tuple of the results.
Definition: tuple.hh:177
automaton read_efsm(std::istream &is)
Definition: efsm.cc:103
symbol string_t
Definition: parse.hh:66
Build an automaton with unknown context.