dees/main.cpp

1324 lines
39 KiB
C++

/***************************************************************************
main.C - description
-------------------
begin : mer oct 9 19:14:09 CEST 2002
copyright : (C) 2002 by Yann Esposito
email : esposito@cmi.univ-mrs.fr
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include "interface.H"
#include "test.H"
#include "main.H"
// #define MAXFLOAT 3.40282347e+38F
/// ########################################################################## ///
// --- Les variables globales d'entrées qui parametre l'expérimentation
// Les fichier
string temporary_directory= "/tmp/dees/"; // le répertoire temporaire
string ficffaA = temporary_directory + "A"; // Fichier du ffa A
string ficffaB = temporary_directory + "B"; // Fichier du ffa B
string ficdotA = temporary_directory + "A.dot"; // Le fichier dot associé à A
string ficdotB = temporary_directory + "B.dot"; // Le fichier dot associé à B
string ficech = temporary_directory + "A.ech"; // Le fichier échantillon généré par A
string ficechpre = temporary_directory + "A.ecp"; // Le fichier échantillon préfixe généré par A
string ficpsA = temporary_directory + "A.ps"; // La fichier de visualisation de A
string ficpsB = temporary_directory + "B.ps"; // Le fichier de visualisation de B
// Les Èchantillons
Sample S; // main sample
Sample Stest; // test sample
int num_ech = 0; // Le numero de l'Èchantillon
int num_ech_test = 1; // Le numbero de l'Èchantillon test
int taille_ech = 1000; // La taille de l'Èchantillon
int taille_ech_test = 1000; // La taille de l'Èchantillon test
// Le PFA alÈatoire
int numPFA = 0; // Le numero du PFA gÈnÈrÈ
int nb_etats = 5; // Le nombre d'Ètats du PFA gÈnÈrÈ
int nb_lettres = 2; // Le nombre de lettres du PFA gÈnÈrÈ
// Les prÈcisions
double precision = 0.5; // la prÈcision (0 -> prefix tree, 1 -> single state)
double precpow = -1; // La prÈcision en mode exponentiel
double epsprime = 0; // La valeur de suppression des transitions (0 delete nothing, 1 delete all)
unsigned int seuil = 0; // le seuil en dessous duquel un prefixe n'est plus pris en compte dans l'apprentissage
double seuilbm = .0001 ; // Le seuil pour le test de convergence de BM
unsigned int maxmots = INT_MAX; // le nombre maximal de mots à ajouter à l'ensemble de test à chaque étapes
// L'affichage
string nom_A = "A"; // le nom du PFA A
string nom_B = "B"; // le nom du PFA B
bool affiche_A = true; // affiche l'automate source ou non
bool affiche_B = true; // affiche l'automate destination
bool verbose = false; // mode verbose pour le test
bool affiche_etats = false; // affiche les Ètats dans le .dot
int viewprec=3; // precision des parametres a l'affichage
bool viewstates=false; // on met le numero des etats
bool viewmultipletransitions=false; // vrai pour ne plus regrouper les transitions
bool interactive=false; // interactive mode
// L'apprentissage
T_ModeReturn moderet = ::end; // Le mode de retour des arÍtes (end ou begin)
T_ModeEpsilon modeeps = variable; // Le mode d'apprentissage avec epsilon fixe ou variable (epsfixed ou variable)
float test_proportion = 0.2; // la proportion de l'echantillon attribuee au test
int nb_tours = 100; // Le nombre d'itÈrations pour Baum Welch
int nb_essais = 10; // le nombre d'essais pour le best learning
int max_states = INT_MAX; // le nombre maximal d'etats pour DEES
bool stepssave = false; // vrai si on veut enregistrer toutes les étapes de l'apprentissage.
// L'exportation de fichier
string format = "ffa";
// les distances
bool smooth = false;
string cmd;
PPRFA A;
PPRFA B;
stringstream convert; // variable qui permet de convertir les stream en entiers ou flottants
/// ########################################################################## ///
// affiche un mot
string
affiche (const Word & w, const Dictionnaire & alph)
{
Word::const_iterator a;
string res;
for (a = w.begin (); a != w.end (); ++a)
{
res += alph.find (*a)->second;
res += ' ';
}
return res;
}
void show(MA A) {
string dotfile;
string pdffile;
stringstream trad;
trad << temporary_directory << "pfa"<< int(time(NULL));
trad >> pdffile;
dotfile=pdffile;
pdffile += ".pdf";
dotfile += ".dot";
A.save_dot(dotfile, viewprec, viewstates, viewmultipletransitions);
string cmd;
cmd += "dot -Tepdf ";
cmd += dotfile;
cmd += ">";
cmd += pdffile;
system(cmd.c_str());
show_ps(pdffile);
}
// La procÈdure qui initialise le random
int
randomize (void)
{
// mise a jour du random
unsigned int i = 0;
char c;
ifstream dev_rnd ("/dev/urandom");
if (dev_rnd != NULL)
{
for (int k = 0; k < 4; k++)
{
dev_rnd.get (c);
i = i << 8 | c;
}
srand (i);
return i;
}
else
{
srand (num_ech);
return num_ech;
}
}
int
affiche_utilisation (void)
{
string fichelp = "./help/";
fichelp += "help.txt";
ifstream fh (fichelp.c_str ());
if (fh == NULL)
{
cout << "DEES : ";
cout << "WARNING: " << fichelp << " not found." << endl;
}
else
{
cout << fh.rdbuf ();
fh.close ();
cout << endl;
}
return 0;
}
int
affiche_options (void)
{
string fichelp = "/Users/esposito/dees/help/";
fichelp += "/help_options.txt";
ifstream fh (fichelp.c_str ());
if (fh == NULL)
{
cout << "DEES : ";
cout << "WARNING: " << fichelp << " not found." << endl;
}
else
{
cout << fh.rdbuf ();
fh.close ();
cout << endl;
}
return 0;
}
RESULT
export_to_pdf (const MA & A,
const string & fic_pdf,
const string & fic_tmp,
const int precision,
const bool noms_etats,
const bool multiples_trans)
{
A.save_dot (fic_tmp, precision, noms_etats, multiples_trans); // On l'enregistre dans un fichier dot
// on Ècrit la commande
string cmd;
cmd = "dot -Tepdf ";
cmd += fic_tmp;
cmd += " > ";
cmd += fic_pdf;
// on execute la transformation
system (cmd.c_str ());
return 0;
}
void
show_ps (string & fic_pdf)
{
string cmd;
cmd += "open ";
cmd += fic_pdf;
cmd += "&";
system (cmd.c_str ());
}
// Affiche toutes les distances entre A et B
void
affiche_distance (PFA & A, const PFA & B, const int taille_ech_test,
const string & flag)
{
double pA, pB;
A.genere_echantillon (taille_ech_test, S, num_ech_test);
cerr << "Test_Sample_size\t";
cerr << "precision\t";
cerr << "perplex A\t";
cerr << "perplex B\t";
cerr << "KL div\t";
cerr << "d_2\t";
cerr << "L_1\t";
cerr << "d_2(nlog)\t";
cerr << "L_1(nlog)\t";
cout << taille_ech_test << "\t";
cout << precision << "\t";
cout << (pA = A.perplexite (S)) << "\t";
cout << (pB = B.perplexite (S)) << "\t";
cout << pB - pA << "\t";
cout << A.d_2 (B, S) << "\t";
cout << A.L_1 (B, S) << "\t";
cout << A.d_2nlog (B, S) << "\t";
cout << A.L_1nlog (B, S) << "\t";
cout << flag << endl;
}
void
affiche_distance (PFA & A, const PFA & B, const int taille_ech,
const char *flag)
{
string s_flag = flag;
affiche_distance (A, B, taille_ech, s_flag);
}
void
affiche_classe (PFA & A)
{
if (!A.coherent ())
{
cout << "uncoherent MA" << endl;
}
else if (!A.isSPFA ())
{
cout << "MA" << endl;
}
else if (!A.isPFA ())
{
cout << "SPFA" << endl;
}
else if (!A.isPRFA ())
{
cout << "PFA" << endl;
}
else if (!A.isPDFA ())
{
cout << "PRFA" << endl;
}
else
{
cout << "PDFA" << endl;
}
}
// Crossed validation learning
// Apprentissage avec validation croisÈe
void Alergia(PFA &A, float prec, string ficsample, bool verbose) {
stringstream AL;
string ficsampletmp=temporary_directory + "tmpsample.alergia";
Sample S;
S.load(ficsample);
S.save(ficsampletmp, alergia);
AL << "alergia";
AL << " -a " << prec;
AL << " -o " << ficffaA;
AL << " -f " << ficsampletmp;
if (verbose) {
cout << ":: " << AL.str() << endl;
}
system(AL.str().c_str());
stringstream MOD;
string fictmp=temporary_directory + "alergia.tmp";
MOD << "echo %Alergia > " << fictmp;
MOD << " && cat " << ficffaA << " >> " << fictmp;
if (verbose) {
cout << "transformation de l'automate avec :\n";
cout << MOD.str() << endl;
}
system(MOD.str().c_str()); // on ajoute %Alergia au dÈbut du fichier
if (verbose) {
cout << "on charge le fichier : " << fictmp << endl;
}
A.load(fictmp);
}
void MDI(PFA &A, float prec, string ficsample, bool verbose) {
string ficsampletmp=temporary_directory + "tmpsample.mdi";
Sample S;
S.load(ficsample);
S.save(ficsampletmp, mdi);
stringstream MDI;
MDI << "mdi";
MDI << " -b";
if (verbose) {
MDI << " -v";
}
MDI << " -c " << prec / double(taille_ech);
MDI << " -o " << ficffaA;
MDI << " -i " << ficsampletmp;
if (verbose) {
cout << ":: " << MDI.str() << endl;
}
system(MDI.str().c_str());
stringstream MOD;
string fictmp=temporary_directory + "mdi.tmp";
MOD << "echo %MDI > " << fictmp;
MOD << " && cat " << ficffaA << " >> " << fictmp;
if (verbose) {
cout << "transformation de l'automate avec :\n";
cout << MOD.str() << endl;
}
system(MOD.str().c_str()); // on ajoute %MDI au dÈbut du fichier
if (verbose) {
cout << "on charge le fichier : " << fictmp << endl;
}
A.load(fictmp);
}
// --- La procedure pour debbugger ---
float fonction (float val, float moy, float ecart)
{
float constante = 1/(ecart * sqrt(2 * 3.1415));
return constante * exp(- ((val - moy)*(val - moy)) / ecart);
}
void BaumWelch(PFA A, Sample S, int nb_turns, string output) {
TransitionFunction T;
SFunc Iota, Tau;
double val, oldval;
A.allTransitions(T);
A.allStates(Iota);
A.allStates(Tau);
oldval=A.Likelihood(S);
for (int i=0 ; i < nb_turns ; i++) {
A.BaumWelch(S,T,Iota,Tau,3,true);
val=A.Likelihood(S);
//if (abs(val-oldval)<.1) {
// break;
//}
oldval=val;
if (verbose) {
cout << "Likelihood: " << A.Likelihood(S) << "\n";
}
}
cout << endl;
A.erase_bad_states();
A.save(output);
}
// génère un PA complet
RESULT generate_complete_random_pfa(int argc, char **argv, int i) {
if (argc < i + 2) {
cerr << "dees --grp nbstates nbletters file" << endl;
cerr << "generate a complete random automaton with 'nbstates' states\n";
cerr << "'nbletters' letters and save it in file\n";
cerr << "usefull option:\n";
cerr << "--numPFA : the number of the ma by default random number is choosen.\n";
return -1;
}
int nbstates=atoi(argv[i++]);
int nbletters=atoi(argv[i++]);
string output=argv[i++];
int num_graphe=0; //cout << "num graphe: "; cin >> num_graphe;
float min_trans=0.01; //cout << "min_trans: "; cin >> min_trans;
float max_trans=1; //cout << "max_trans: "; cin >> max_trans;
float prob_init=1; //cout << "prob_init: "; cin >> prob_init;
float prob_trans=1; //cout << "prob_trans: "; cin >> prob_trans;
float prob_term=1; //cout << "prob_term: "; cin >> prob_term;
if (verbose) {
cout << "num graphe: " << num_graphe << endl;
cout << "prob_trans: " << prob_trans << endl;
cout << "prob_init: " << prob_init << endl;
cout << "prob_term: " << prob_term << endl;
cout << "min_trans: " << min_trans << endl;
cout << "max_trans: " << max_trans << endl;
}
A.becomeRandom (nbstates, nbletters, numPFA, prob_trans, prob_init, prob_term, min_trans, max_trans);
return A.save (output);
}
RESULT generate_random_pfa(int argc, char **argv, int i) {
if (argc < i+3) {
cerr << "dees --random nbstates nbletters file\n";
cerr << "generate a random automaton of class 'class' with 'nbstates' states\n";
cerr << "'nbletters' letters and save it in file\n";
cerr << "usefull option:\n";
cerr << "--numPFA : the number of the ma by default random number is choosen.\n";
cerr << "--interactive : ask you to enter values\n";
return -1;
}
int nbstates=atoi(argv[i++]);
int nbletters=atoi(argv[i++]);
string output=argv[i++];
int num_graphe=0;
int max_nb_succ=nbstates;
int max_nb_init=nbstates;
int max_nb_term=nbstates;
float min_trans=0;
float max_trans=1;
float min_iota=0;
float max_iota=1;
float min_tau=0;
float max_tau=1;
float normalization=1;
float prob_init=.3;
float prob_trans=.3;
float prob_term=.3;
if (interactive) {
cout << "num graphe (0=random): "; cin >> num_graphe;
cout << "max_nb_succ (width of the graph): "; cin >> max_nb_succ;
cout << "max_nb_init: "; cin >> max_nb_init;
cout << "max_nb_term: "; cin >> max_nb_term;
cout << "min_trans (minimal value of a transition): "; cin >> min_trans;
cout << "max_trans (maximal value of a transition): "; cin >> max_trans;
cout << "min_iota (minimal initial value): "; cin >> min_iota;
cout << "max_iota (maximal initial value): "; cin >> max_iota;
cout << "min_tau (minimal terminaison value): "; cin >> min_tau;
cout << "max_tau (maximal terminaison value): "; cin >> max_tau;
cout << "normalization (value of normalisation mostly 1): "; cin >> normalization;
cout << "prob_init (probability for a state to be initial: "; cin >> prob_init;
cout << "prob_trans (probability for a terminaison to araise): "; cin >> prob_trans;
cout << "prob_term (probability for a state to be terminal): "; cin >> prob_term;
}
A.becomeRandomControl (nbstates, nbletters, num_graphe, max_nb_succ, max_nb_init, max_nb_term, min_trans, max_trans, min_iota, max_iota, min_tau, max_tau, normalization, prob_init, prob_trans, prob_term);
return A.save(output);
}
RESULT show_sample_forward(int argc, char**argv, int i) {
if (argc < i+2) {
cerr << "usage : --forward PFA sample\n";
cerr << "return a list of couple word and its probability for the PFA\n";
throw 2;
}
A.load(argv[i++]);
S.load(argv[i++]);
if (A.dictionnaire() != S.dictionnaire()) {
cerr << "Alphabets of the PFA and of the sample differ !" << endl;
Dictionnaire D;
Dictionnaire::const_iterator x;
cerr << "dictionnaire of the PFA\n";
D=A.dictionnaire();
for (x=D.begin() ; x != D.end() ; x++) {
cout << x->first << ":" << x->second << "\n";
}
cerr << "dictionnaire of the Sample\n";
D=S.dictionnaire();
for (x=D.begin() ; x != D.end() ; x++) {
cout << x->first << ":" << x->second << "\n";
}
throw 2;
}
Sample::const_iterator w;
double somme=0 ;
double logsomme = 0;
double val ;
for (w=S.begin() ; w != S.end() ; w++) {
cout << A.affiche(w->first) << " : ";
cout.flush();
val = A.p_bar(w->first);
somme += val;
cout << val ;
val = A.plog_bar(w->first);
logsomme += val;
cout << ", log forward : " << val;
cout << ", exp(log forward) :" << exp(val);
cout << ", pBarDirect : " << A.p_bar_directe(w->first);
cout << endl;
}
cout << "sum = " << somme << ", somme de exp(log(forward)) : " << logsomme << endl;
return VAL(0);
}
RESULT show_sample_proba(int argc, char **argv, int i){
if (argc < i+2) {
cerr << "usage : --proba PFA sample\n";
cerr << "return a list of couple word and its probability for the PFA\n";
throw 2;
}
A.load(argv[i++]);
S.load(argv[i++]);
if (A.dictionnaire() != S.dictionnaire()) {
cerr << "Alphabets of the PFA and of the sample differ !" << endl;
Dictionnaire D;
Dictionnaire::const_iterator x;
cerr << "dictionnaire of the PFA\n";
D=A.dictionnaire();
for (x=D.begin() ; x != D.end() ; x++) {
cout << x->first << ":" << x->second << "\n";
}
cerr << "dictionnaire of the Sample\n";
D=S.dictionnaire();
for (x=D.begin() ; x != D.end() ; x++) {
cout << x->first << ":" << x->second << "\n";
}
throw 2;
}
Sample::const_iterator w;
double somme=0 ;
double logsomme = 0;
double val ;
for (w=S.begin() ; w != S.end() ; w++) {
cout << A.affiche(w->first) << " : ";
cout.flush();
val = A.p_directe(w->first);
somme += val;
cout << val ;
val = A.plog(w->first);
logsomme += val;
cout << ", log p : " << val;
cout << ", exp(log p) :" << exp(val);
cout << endl;
}
cout << "sum = " << somme << ", somme de exp(log(p)) : " << logsomme << endl;
return VAL(0);
}
void affiche_PSe(void) {
SFunc PSe;
SFunc::const_iterator q;
int res;
res = A.val_PSe(PSe);
cout << "res = " << res << endl;
for (q=PSe.begin() ; q != PSe.end() ; q++) {
cout << "P_" << q->first << "(\\Se) = " << q-> second << endl;
}
}
/// ############################################################################# ///
/// ### INIT COMMAND LINE + MAIN #### ///
/// ############################################################################# ///
// initialisation des arguments (ligne de commande)
int
initialise_arguments (int argc, char *argv[])
{
string arg;
int i = 1;
while ((i < argc) && (argv[i][0] == '-'))
{ // pour chaque argument commenÁant par -
arg = argv[i];
if ((arg == "-h") || (arg == "--help"))
{ // affiche l'aide
throw 1;
}
// options muettes
else if (
(arg == "--export") || // exporte vers des formats d'automates differents
(arg == "--convert") || // convertit des echantillons
(arg == "--dist") || // affiche la distance entre deux ffa
(arg == "--class") || // affiche la classe d'un pfa
(arg == "--sample") || // genere un Èchantillon
(arg == "--grp") || // genere un automate probabiliste
(arg == "--gcrp") || // genere un automate probabiliste complet
(arg == "--mdi") || // algorithme mdi
(arg == "--alergia") || // algorithme alergia
(arg == "--bm") || // algorithme Baum Welch
(arg == "--affiche") || // affiche un automate sur la sortie standard
(arg == "-H") || // affiche l'aide Ètendue (options)
(arg == "-Y" || arg == "-I") || // envoie la fonction Y (mode interactif)
(arg == "--test") || // fait un test ‡ partir de l'automate
(arg == "--random") || // generate a random automaton
(arg == "--proba") || // show probabilities of words of a sample
(arg == "--forward") || // show forward values of words of a sample
(arg == "--deletetransitions") || // delete transitions of some value
((arg == "-P") || (arg == "--showps")) || // affichage postscript
(arg == "--PSe") // Affichage des valeurs de P(\Se) pour chaque état
)
{
}
else if (
(arg == "--dees") || // algorithme dees (cible PRFA)
(arg == "--deesha") || // algorithme deesha (cible MA)
(arg == "--deesdet") ) // algorithme basé sur dees mais se limitant aux solution déterministes
{
precision=1;
} else if (arg == "--deesbm") // algorithme dees Baum Welch
{
precision=2;
}
else if ((arg == "--DEBUG")
|| (arg == "-d") || (arg == "--debug"))
{
// PFA_DEBUG=true;
}
else if (arg == "--interactive")
{
interactive=true;
++i;
}
else if (arg == "--format")
{
format = argv[++i];
}
else if ((arg == "--VERBOSE") || (arg == "-v") || (arg == "--verbose"))
{
verbose = true;
// PFA_VERBOSE=true;
}
else if ((arg == "--MUTE") || (arg == "-m") || (arg == "--mute"))
{
// PFA_VERBOSE=false;
verbose = false;
}
else if ((arg == "--SAFE") || (arg == "--safe"))
{
// PFA_SAFE=true;
}
else if ((arg == "--UNSAFE") || (arg == "--unsafe"))
{
// PFA_SAFE=false;
}
else if ((arg == "--taille_ech") || (arg == "-te"))
{
convert << argv[++i];
convert >> taille_ech;
}
else if (arg == "--maxmots") {
convert << argv[++i];
convert >> maxmots;
}
else if (arg == "--num_ech")
{
num_ech = atoi (argv[++i]);
}
else if (arg == "--num_ech_test")
{
num_ech_test = atoi (argv[++i]);
}
else if ((arg == "--ficffaA") || (arg == "-i"))
{
ficffaA = argv[++i];
}
else if (arg == "--ficffaB")
{
ficffaB = argv[++i];
}
else if (arg == "-o")
{
ficffaA = ficech = ficffaB = ficdotB = ficpsB = argv[++i];
}
else if (arg == "--moderet")
{
arg = argv[++i];
if (arg == "begin")
moderet = ::begin;
else
moderet = ::end;
}
else if (arg == "--modeeps")
{
arg = argv[++i];
if (arg == "fixed")
modeeps = epsfixed;
else if (arg == "variable")
modeeps = variable;
else
cerr << "option for --modeeps can be 'fixed' or 'variable', here it is '" << arg << endl;
}
else if (arg == "--ficdotA")
{
ficdotA = argv[++i];
}
else if (arg == "--ficdotB")
{
ficdotB = argv[++i];
}
else if (arg == "--ficech")
{
ficech = argv[++i];
}
else if (arg == "--ficechpre")
{
ficechpre = argv[++i];
}
else if ((arg == "--precision") || (arg == "-p"))
{
precision = atof (argv[++i]);
}
else if (arg == "--epsprime")
{
epsprime = atof (argv[++i]);
}
else if (arg == "--seuil")
{
seuil = atoi (argv[++i]);
}
else if (arg == "--numPFA")
{
numPFA = atoi (argv[++i]);
}
else if (arg == "--seuilbm")
{
seuilbm = atof (argv[++i]);
}
else if (arg == "--nb_tours")
{
nb_tours = atoi (argv[++i]);
}
else if (arg == "--ficps")
{
ficpsA = argv[++i];
}
else if (arg == "--ficpsB")
{
ficpsB = argv[++i];
}
else if (arg == "--stepssave")
{
stepssave=true ;
}
else if (arg == "--nb_etats")
{
nb_etats = atoi (argv[++i]);
}
else if (arg == "--nb_essais") {
nb_essais = atoi (argv[++i]);
}
else if (arg == "--max_states") {
max_states = atoi (argv[++i]);
}
else if (arg == "--nb_lettres")
{
nb_lettres = atoi (argv[++i]);
}
else if (arg == "--precpow")
{
precpow = atof (argv[++i]);
}
else if (arg == "--affiche_A")
{
affiche_A = true;
}
else if (arg == "--affiche_B")
{
affiche_B = true;
}
else if (arg == "--blind")
{
affiche_A = affiche_B = false;
}
else if ((arg == "--taille_ech_test") || (arg == "-tt"))
{
taille_ech_test = atoi (argv[++i]);
}
else if (arg == "--name")
{
nom_A = argv[++i];
}
else if (arg == "--nom_B")
{
nom_B = argv[++i];
}
else if (arg == "--test_proportion")
{
test_proportion = atof (argv[++i]);
}
else if (arg == "--smooth") {
smooth=true;
}
else if (arg == "--rand") {
srand( atoi(argv[++i]) );
}
else if (arg == "--viewprec") {
viewprec=atoi(argv[++i]);
}
else if (arg == "--viewstates") {
viewstates=true;
}
else if (arg == "--viewmultipletransitions") {
viewmultipletransitions=true;
}
else
{
cerr << "unknow option : " << arg << endl;
}
i++;
}
return i;
}
// La procedure principale
int
main (int argc, char *argv[])
{
int i = 0;
string commande;
commande = "mkdir " + temporary_directory + ">/dev/null 2>&1";
system(commande.c_str());
try
{
if (argc < 2)
throw 1;
randomize ();
i = initialise_arguments (argc, argv);
string arg;
arg = argv[1];
if (arg == "-h" || arg == "--help")
{ // ---------- help ---------------
throw 1;
}
else if (arg == "-H")
{ // ----------------- HELP with options -------
affiche_utilisation ();
affiche_options ();
cout << "Report bugs or suggestions to Yann Esposito <esposito@cmi.univ-mrs.fr>." << endl;
}
else if ((arg == "-Y") || (arg == "-I"))
{ // -------------- interactive mode -------------
Y ();
}
else if (arg == "--test")
{
test();
}
else if (arg == "--PSe")
{
ficffaA = argv[i];
if (!OK(A.load(ficffaA))) {
throw string ("Erreur d'ouverture du fichier ") + ficffaA;
}
affiche_PSe();
}
else if (arg == "--export")
{ // ---------------- exportation----------------
if (argc < i + 3) {
cerr << "export command export some automaton file to another format" << endl;
cerr << "available format are dot, pdf and ffa." << endl;
cerr << "usage: dees --export [OPTIONS] format input output" << endl;
throw 2;
}
format = argv[i];
ficffaA = argv[i+1];
ficffaB = argv[i + 2];
if (!OK(A.load (ficffaA))) {
throw string ("Erreur d'ouverture du fichier ") + ficffaA;
}
A.name = nom_A;
if (format == "dot")
{ // exportation to dot format
A.save_dot (ficffaB);
}
else if (format == "pdf")
{ // exportation to ps format
export_to_pdf (A, ficffaB, temporary_directory + "tmp.dot");
}
else if (format == "ffa")
{ // exportation ffa
A.save (ficffaB);
}
else
{
cerr << "export command export some automaton file to another format" << endl;
cerr << "available format are dot, ps and ffa." << endl;
cerr << "usage: dees --export [OPTIONS] format input output" << endl;
throw 4;
}
}
else if (arg == "--affiche")
{ // ------------------------ affichage -------------------
if (argc < i + 1) {
cerr << "--affiche command show the automaton on standart output" << endl;
cerr << "usage: dees --affiche [OPTIONS] automaton" << endl;
throw 2;
}
A.load (argv[i]);
A.affiche ();
}
else if ((arg == "--showps") || (arg == "-P"))
{ // --- affichage du postscript ------
if (argc < i + 1) {
cerr << "showps or -P command show the automaton in gv" << endl;
cerr << "you must have dot (part of Graphviz project) and a script named open" << endl;
cerr << "which is a pdf viewer installed" << endl;
cerr << "usage: dees <--showps or -P> [OPTIONS] automaton" << endl;
cerr << "usefull option: --name name" << endl;
throw 2; // pas assez d'arguments
}
A.load (argv[i]);
A.name = nom_A;
show(A);
}
else if (arg == "--dist")
{ // ----------------------- calcul des distances ------------
if (argc < i + 2) {
cerr << "dist command show some distances between two automata." << endl;
cerr << "usage: dees --dist [OPTIONS] automaton_A automaton_B" << endl;
cerr << "usefull option: --taille_ech_test size" << endl;
cerr << " --smooth or --no_smooth" << endl;
throw 2;
}
A.load (argv[i]);
B.load (argv[i + 1]);
if (smooth == true) {
B.lisse ();
}
affiche_distance (A, B, taille_ech_test, "");
}
else if (arg == "--deletetransitions") {
if (argc < i + 3) {
cerr << "dees --deletetransitions input_pfa output_pfa min max\n";
cerr << "delete transitions of the input_pfa which values are between min and max\n";
cerr << "renormalise and save it in output_pfa file.\n";
return -1;
}
PFA A;
string input=argv[i++];
string output=argv[i++];
double min=atof(argv[i++]);
double max=atof(argv[i++]);
A.load(input);
if (max < min) {
double tmp = max;
max=min;
min=tmp;
cerr << "Warning max < min !" << endl;
}
A.erase_transitions(max,min);
A.rend_PFA();
A.save(output);
}
else if (arg == "--sample")
{ // --------------------- generation d'un echantillon ---
if (argc < i + 3) {
cerr << "sample command generate a sample from an automaton." << endl;
cerr << "usage: dees --sample [OPTIONS] automaton size sample" << endl;
cerr << "usefull options:" << endl;
cerr << "--format <ffa, alergia or mdi>" << endl;
cerr << "--num_ech num_sample" << endl;
throw 2;
}
ficffaA = argv[i];
taille_ech = atoi (argv[i + 1]);
ficech = argv[i + 2];
A.load (ficffaA);
A.genere_echantillon (taille_ech, S, num_ech);
if (format == "ffa")
S.save (ficech, ffa);
else if (format == "alergia")
S.save (ficech, alergia);
else if (format == "mdi")
S.save (ficech, mdi);
else
throw 4;
}
else if (arg == "--mdi") {
if (argc > i)
{
ficech = argv[i];
}
else
{
throw 1;
}
if (verbose)
cout << "loading " << ficech << "..." << endl;
MDI(A,precision,ficech,verbose);
if (verbose)
cout << "saving A in " << ficffaA << endl;
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if (arg == "--alergia") {
if (argc <= i)
throw 1;
ficech = argv[i];
if (verbose)
cout << "loading " << ficech << "..." << endl;
Alergia(A,precision,ficech,verbose);
if (verbose)
cout << "saving A in " << ficffaA << endl;
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if (arg == "--deesdet")
{
if (argc > i)
{
ficech = argv[i];
}
else
{
cerr << "--dees command learn an automaton using dees." << endl;
cerr << "usage: dees --dees [OPTIONS] sample" << endl;
cerr << "usefull options:" << endl;
cerr << "-p or --precision float\tthe precision parameter" << endl;
cerr << "--epsprime float\tnumber under which a transition is deleted" << endl;
cerr << "-v or --verbose" << endl;
cerr << "--moderet [begin or end]" << endl;
cerr << "--max_states number" << endl;
cerr << "--seuil seuil" << endl;
cerr << "--stepssave\tsave steps of the algorithm" << endl;
throw 2;
}
if (verbose)
cout << "loading " << ficech << "..." << endl;
if (!OK (S.load (ficech.c_str ())))
{
cerr << "impossible d'ouvrir " << ficech << endl;
throw 5;
}
epsprime = precision*pow(double(S.size()),-0.25)/5;
epsprime = min(sqrt(precision),0.1);
A.DEES (determinist, S, precision, epsprime, verbose, moderet, modeeps, max_states, seuil,10,0,true,stepssave);
if (verbose)
cout << "saving A in " << ficffaA << endl;
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if (arg == "--dees")
{
if (argc > i)
{
ficech = argv[i];
}
else
{
cerr << "--dees command learn an automaton using dees." << endl;
cerr << "usage: dees --dees [OPTIONS] sample" << endl;
cerr << "usefull options:" << endl;
cerr << "-p or --precision float\tthe precision parameter" << endl;
cerr << "--epsprime float\tnumber under which a transition is deleted" << endl;
cerr << "-v or --verbose" << endl;
cerr << "--moderet [begin or end]" << endl;
cerr << "--max_states number" << endl;
cerr << "--seuil seuil" << endl;
cerr << "--stepssave\tsave steps of the algorithm" << endl;
throw 2;
}
if (verbose)
cout << "loading " << ficech << "..." << endl;
if (!OK (S.load (ficech.c_str ())))
{
cerr << "impossible d'ouvrir " << ficech << endl;
throw 5;
}
epsprime = precision*pow(double(S.size()),-0.25)/5;
epsprime = min(sqrt(precision),0.1);
A.DEES (positive, S, precision, epsprime, verbose, moderet, modeeps, max_states, seuil,10,0,true,stepssave);
if (verbose)
cout << "saving A in " << ficffaA << endl;
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if (arg == "--deesbm") {
// --- DEES version Baum Welch ---
if (argc > i)
{
ficech = argv[i];
}
else
{
cerr << "--deesbm command learn an automaton using deesbm." << endl;
cerr << "usage: dees --deesbm [OPTIONS] sample" << endl;
cerr << "usefull options:" << endl;
cerr << "-p or --precision number" << endl;
cerr << "-v or --verbose" << endl;
cerr << "--max_states number\tmaximal number of states" << endl;
cerr << "--seuil number\tminimal number of suffix do use residual" << endl;
cerr << "--nb_tours number\tmax number of turns for Baum Welch" << endl;
cerr << "--seuilbm double\tprecision under which Baum Welch is considered to found the Max Likelihood Model" << endl;
cerr << "-o or --ficffaA file\ttarget file" << endl;
throw 2;
}
if (verbose)
cout << "loading " << ficech << "..." << endl;
if (!OK (S.load (ficech.c_str ())))
{
cerr << "impossible d'ouvrir " << ficech <<
endl;
throw 5;
}
epsprime = precision*pow(double(S.size()),-0.25)/5;
A.DEESBM (S, precision, epsprime, verbose, max_states, seuil, seuilbm, nb_tours);
if (verbose)
cout << "saving A in " << ficffaA << endl;
// A.emmonde();
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if (arg == "--deesha")
{
if (argc > i)
{
ficech = argv[i];
}
else
{
throw 1;
}
if (verbose)
cout << "loading " << ficech << "..." << endl;
if (!OK (S.load (ficech.c_str ())))
{
cerr << "impossible d'ouvrir " << ficech <<
endl;
throw 5;
}
// epsprime = precision/(2*sqrt(sqrt(n)))
// epsprime est donc un o(precision)
epsprime = precision*pow(double(S.size()),-0.25)/5;
A.DEES (nonconstrained, S, precision, epsprime, verbose, moderet,modeeps, max_states, seuil, maxmots);
if (verbose)
cout << "saving A in " << ficffaA << endl;
// A.emmonde();
A.save (ficffaA);
// affichage de l'automate
if (affiche_A)
{
show(A);
}
}
else if ((arg == "--generate_complete_random_pfa") || (arg == "--gcrp"))
{ // --- generation d'un pfa aléatoire complet ---
generate_complete_random_pfa(argc, argv, i);
}
else if ((arg == "--generate_random_pfa") || (arg == "--grp"))
{
generate_random_pfa(argc, argv, i);
}
else if (arg == "--proba") { // show values of words for an MA
show_sample_proba(argc, argv, i);
}
else if (arg == "--forward") { // show prefix values of words for an MA
show_sample_forward(argc, argv, i);
}
else if (arg == "--class")
{ // --- show the class of the automaton
if (argc < 3)
throw 2;
A.load (argv[2]);
affiche_classe (A);
}
else if (arg == "--bm") { // Baum Welch Algorithm
if (argc < i+4) {
cerr << "dees --bm pfa sample number_of_turns output_pfa\n";
cerr << "work baum welch using the pfa for initialisation (structure + parameters)\n";
cerr << "remember Baum Welch never change a 0 valued parameter\n";
return -1;
}
string pfa=argv[i++];
string sample=argv[i++];
int nb_turns=atoi(argv[i++]);
string output=argv[i++];
A.load(pfa);
S.load(sample);
BaumWelch(A,S,nb_turns,output);
}
else if (arg == "--convert") {
if (argc < i+3) {
cerr << "dees --convert format sample output\n";
cerr << "Change the format of the sample to output.\n";
cerr << "format could be dees for internal, alergia or mdi\n";
return -1;
}
string format=argv[++i];
string input=argv[++i];
string output=argv[++i];
S.load(input);
if (format == "alergia") {
S.save(output, alergia);
}
else if (format == "mdi") {
S.save(output, mdi);
}
else {
S.save(output);
}
}
else
{ // ---------- choix non traitÈ -------------
throw 1;
}
return EXIT_SUCCESS;
}
catch (int err)
{
switch (err)
{
case 1:
affiche_utilisation ();
cout << "Report bugs or suggestions to Yann Esposito <esposito@cmi.univ-mrs.fr>." << endl;
break;
case 2:
cerr << "\nNo enougth arguments" << endl;
break;
case 3:
cerr << "Input error, I cannot read the file." <<
argv[i] << endl;
break;
case 4:
cerr << format << "is an unknown format." << endl;
break;
case 5:
cerr << "I/O Error !!!" << endl;
break;
default:
cerr << "unknown error occured !" << endl;
}
if (err != 1)
{
cerr << "use -h, --help or -H option to display help" << endl;
}
}
catch (string erreur) {
cerr << erreur << endl;
}
catch (...)
{
cerr << "Unknown error !!!" << endl;
affiche_utilisation ();
return -1;
}
}