src/tinygettext/tinygettext.cpp

   1 //  $Id$
   2 //
   3 //  TinyGetText
   4 //  Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
   5 //
   6 //  This program is free software; you can redistribute it and/or
   7 //  modify it under the terms of the GNU General Public License
   8 //  as published by the Free Software Foundation; either version 2
   9 //  of the License, or (at your option) any later version.
  10 //
  11 //  This program is distributed in the hope that it will be useful,
  12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 //  GNU General Public License for more details.
  15 //
  16 //  You should have received a copy of the GNU General Public License
  17 //  along with this program; if not, write to the Free Software
  18 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 #include <config.h>
  21
  22 #include <sys/types.h>
  23 #include <fstream>
  24 #include <iostream>
  25 #include <algorithm>
  26 #include <ctype.h>
  27 #include <errno.h>
  28
  29 #include <SDL.h>
  30
  31 #ifndef GP2X
  32 #include <SDL_stdinc.h>
  33 #endif
  34
  35 #include "tinygettext.hpp"
  36 #include "log.hpp"
  37 #include "physfs/physfs_stream.hpp"
  38 #include "findlocale.hpp"
  39
  40 //#define TRANSLATION_DEBUG
  41
  42 namespace TinyGetText {
  43
  44 /** Convert \a which is in \a from_charset to \a to_charset and return it */
  45 std::string convert(const std::string& text,
  46                     const std::string& from_charset,
  47                     const std::string& to_charset)
  48 {
  49 #ifndef GP2X
  50   if (from_charset == to_charset)
  51     return text;
  52
  53   char *in = new char[text.length() + 1];
  54   strcpy(in, text.c_str());
  55   char *out = SDL_iconv_string(to_charset.c_str(), from_charset.c_str(), in, text.length() + 1);
  56   delete[] in;
  57   if(out == 0)
  58   {
  59     log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " failed" << std::endl;
  60     return "";
  61   }
  62   std::string ret(out);
  63   SDL_free(out);
  64   return ret;
  65 #else
  66   log_warning << "FIXME: Char conversion not supported on GP2X!" << std::endl;
  67   return "";
  68 #endif
  69 #if 0
  70   iconv_t cd = SDL_iconv_open(to_charset.c_str(), from_charset.c_str());
  71
  72   size_t in_len = text.length();
  73   size_t out_len = text.length()*3; // FIXME: cross fingers that this is enough
  74
  75   char*  out_orig = new char[out_len];
  76   char*  in_orig  = new char[in_len+1];
  77   strcpy(in_orig, text.c_str());
  78
  79   char* out = out_orig;
  80   ICONV_CONST char* in  = in_orig;
  81   size_t out_len_temp = out_len; // iconv is counting down the bytes it has
  82                                  // written from this...
  83
  84   size_t retval = SDL_iconv(cd, &in, &in_len, &out, &out_len_temp);
  85   out_len -= out_len_temp; // see above
  86   if (retval == (size_t) -1)
  87     {
  88       log_warning << strerror(errno) << std::endl;
  89       log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " went wrong: " << retval << std::endl;
  90       return "";
  91     }
  92   SDL_iconv_close(cd);
  93
  94   std::string ret(out_orig, out_len);
  95   delete[] out_orig;
  96   delete[] in_orig;
  97   return ret;
  98 #endif
  99 }
 100
 101 bool has_suffix(const std::string& lhs, const std::string rhs)
 102 {
 103   if (lhs.length() < rhs.length())
 104     return false;
 105   else
 106     return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
 107 }
 108
 109 bool has_prefix(const std::string& lhs, const std::string rhs)
 110 {
 111   if (lhs.length() < rhs.length())
 112     return false;
 113   else
 114     return lhs.compare(0, rhs.length(), rhs) == 0;
 115 }
 116
 117 int plural1(int )     { return 0; }
 118 int plural2_1(int n)  { return (n != 1); }
 119 int plural2_2(int n)  { return (n > 1); }
 120 int plural3_lv(int n) { return (n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
 121 int plural3_ga(int n) { return n==1 ? 0 : n==2 ? 1 : 2; }
 122 int plural3_lt(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
 123 int plural3_1(int n)  { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
 124 int plural3_sk(int n) { return (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2; }
 125 int plural3_pl(int n) { return (n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
 126 int plural3_sl(int n) { return (n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
 127
 128 /** Language Definitions */
 129 //*{
 130 LanguageDef lang_hu("hu", "Hungarian",         1, plural1); // "nplurals=1; plural=0;"
 131 LanguageDef lang_ja("ja", "Japanese",          1, plural1); // "nplurals=1; plural=0;"
 132 LanguageDef lang_ko("ko", "Korean",            1, plural1); // "nplurals=1; plural=0;"
 133 LanguageDef lang_tr("tr", "Turkish",           1, plural1); // "nplurals=1; plural=0;"
 134 LanguageDef lang_da("da", "Danish",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
 135 LanguageDef lang_nl("nl", "Dutch",             2, plural2_1); // "nplurals=2; plural=(n != 1);"
 136 LanguageDef lang_en("en", "English",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 137 LanguageDef lang_fo("fo", "Faroese",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 138 LanguageDef lang_de("de", "German",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
 139 LanguageDef lang_nb("nb", "Norwegian Bokmal",  2, plural2_1); // "nplurals=2; plural=(n != 1);"
 140 LanguageDef lang_no("no", "Norwegian",         2, plural2_1); // "nplurals=2; plural=(n != 1);"
 141 LanguageDef lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
 142 LanguageDef lang_sv("sv", "Swedish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 143 LanguageDef lang_et("et", "Estonian",          2, plural2_1); // "nplurals=2; plural=(n != 1);"
 144 LanguageDef lang_fi("fi", "Finnish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 145 LanguageDef lang_el("el", "Greek",             2, plural2_1); // "nplurals=2; plural=(n != 1);"
 146 LanguageDef lang_he("he", "Hebrew",            2, plural2_1); // "nplurals=2; plural=(n != 1);"
 147 LanguageDef lang_it("it", "Italian",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 148 LanguageDef lang_pt("pt", "Portuguese",        2, plural2_1); // "nplurals=2; plural=(n != 1);"
 149 LanguageDef lang_es("es", "Spanish",           2, plural2_1); // "nplurals=2; plural=(n != 1);"
 150 LanguageDef lang_eo("eo", "Esperanto",         2, plural2_1); // "nplurals=2; plural=(n != 1);"
 151 LanguageDef lang_fr("fr", "French",            2, plural2_2); // "nplurals=2; plural=(n > 1);"
 152 LanguageDef lang_pt_BR("pt_BR", "Brazilian",   2, plural2_2); // "nplurals=2; plural=(n > 1);"
 153 LanguageDef lang_lv("lv", "Latvian",           3, plural3_lv); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
 154 LanguageDef lang_ga("ga", "Irish",             3, plural3_ga); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
 155 LanguageDef lang_lt("lt", "Lithuanian",        3, plural3_lt); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
 156 LanguageDef lang_hr("hr", "Croatian",          3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
 157 LanguageDef lang_cs("cs", "Czech",             3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
 158 LanguageDef lang_ru("ru", "Russian",           3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
 159 LanguageDef lang_uk("uk", "Ukrainian",         3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
 160 LanguageDef lang_sk("sk", "Slovak",            3, plural3_sk); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
 161 LanguageDef lang_pl("pl", "Polish",            3, plural3_pl); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
 162 LanguageDef lang_sl("sl", "Slovenian",         3, plural3_sl); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
 163 //*}
 164
 165 LanguageDef&
 166 get_language_def(const std::string& name)
 167 {
 168   if (name == "hu") return lang_hu;
 169   else if (name == "ja") return lang_ja;
 170   else if (name == "ko") return lang_ko;
 171   else if (name == "tr") return lang_tr;
 172   else if (name == "da") return lang_da;
 173   else if (name == "nl") return lang_nl;
 174   else if (name == "en") return lang_en;
 175   else if (name == "fo") return lang_fo;
 176   else if (name == "de") return lang_de;
 177   else if (name == "nb") return lang_nb;
 178   else if (name == "no") return lang_no;
 179   else if (name == "nn") return lang_nn;
 180   else if (name == "sv") return lang_sv;
 181   else if (name == "et") return lang_et;
 182   else if (name == "fi") return lang_fi;
 183   else if (name == "el") return lang_el;
 184   else if (name == "he") return lang_he;
 185   else if (name == "it") return lang_it;
 186   else if (name == "pt") return lang_pt;
 187   else if (name == "es") return lang_es;
 188   else if (name == "eo") return lang_eo;
 189   else if (name == "fr") return lang_fr;
 190   else if (name == "pt_BR") return lang_pt_BR;
 191   else if (name == "lv") return lang_lv;
 192   else if (name == "ga") return lang_ga;
 193   else if (name == "lt") return lang_lt;
 194   else if (name == "hr") return lang_hr;
 195   else if (name == "cs") return lang_cs;
 196   else if (name == "ru") return lang_ru;
 197   else if (name == "uk") return lang_uk;
 198   else if (name == "sk") return lang_sk;
 199   else if (name == "pl") return lang_pl;
 200   else if (name == "sl") return lang_sl;
 201   else return lang_en;
 202 }
 203
 204 DictionaryManager::DictionaryManager()
 205   : current_dict(&empty_dict)
 206 {
 207   parseLocaleAliases();
 208   // Environment variable SUPERTUX_LANG overrides language settings.
 209   const char* lang = getenv( "SUPERTUX_LANG" );
 210   if( lang ){
 211     set_language( lang );
 212     return;
 213   }
 214   // use findlocale to setup language
 215   FL_Locale *locale;
 216   FL_FindLocale( &locale, FL_MESSAGES );
 217   if(locale->lang) {
 218     if (locale->country) {
 219       set_language( std::string(locale->lang)+"_"+std::string(locale->country) );
 220     } else {
 221       set_language( std::string(locale->lang) );
 222     }
 223   }
 224   FL_FreeLocale( &locale );
 225 }
 226
 227 void
 228 DictionaryManager::parseLocaleAliases()
 229 {
 230   // try to parse language alias list
 231   std::ifstream in("/usr/share/locale/locale.alias");
 232
 233   char c = ' ';
 234   while(in.good() && !in.eof()) {
 235     while(isspace(static_cast<unsigned char>(c)) && !in.eof())
 236       in.get(c);
 237
 238     if(c == '#') { // skip comments
 239       while(c != '\n' && !in.eof())
 240         in.get(c);
 241       continue;
 242     }
 243
 244     std::string alias;
 245     while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
 246       alias += c;
 247       in.get(c);
 248     }
 249     while(isspace(static_cast<unsigned char>(c)) && !in.eof())
 250       in.get(c);
 251     std::string language;
 252     while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
 253       language += c;
 254       in.get(c);
 255     }
 256
 257     if(in.eof())
 258       break;
 259     set_language_alias(alias, language);
 260   }
 261 }
 262
 263 Dictionary&
 264 DictionaryManager::get_dictionary(const std::string& spec)
 265 {
 266
 267   //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
 268
 269   std::string lang = get_language_from_spec(spec);
 270
 271   //log_debug << "...normalized as \"" << lang << "\"" << std::endl;
 272
 273   Dictionaries::iterator i = dictionaries.find(get_language_from_spec(lang));
 274   if (i != dictionaries.end())
 275     {
 276       return i->second;
 277     }
 278   else // Dictionary for languages lang isn't loaded, so we load it
 279     {
 280       //log_debug << "get_dictionary: " << lang << std::endl;
 281       Dictionary& dict = dictionaries[lang];
 282
 283       dict.set_language(get_language_def(lang));
 284       if(charset != "")
 285         dict.set_charset(charset);
 286
 287       for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
 288         {
 289           char** files = PHYSFS_enumerateFiles(p->c_str());
 290           if(!files)
 291             {
 292               log_warning << "Error: enumerateFiles() failed on " << *p << std::endl;
 293             }
 294           else
 295             {
 296               for(const char* const* filename = files;
 297                       *filename != 0; filename++) {
 298
 299                 // check if filename matches requested language
 300                 std::string fname = std::string(*filename);
 301                 std::string load_from_file = "";
 302                 if(fname == lang + ".po") {
 303                   load_from_file = fname;
 304                 } else {
 305                   std::string::size_type s = lang.find("_");
 306                   if(s != std::string::npos) {
 307                     std::string lang_short = std::string(lang, 0, s);
 308                     if (fname == lang_short + ".po") {
 309                       load_from_file = lang_short;
 310                     }
 311                   }
 312                 }
 313
 314                 // if it matched, load dictionary
 315                 if (load_from_file != "") {
 316                   //log_debug << "Loading dictionary for language \"" << lang << "\" from \"" << filename << "\"" << std::endl;
 317                   std::string pofile = *p + "/" + *filename;
 318                   try {
 319                       IFileStream in(pofile);
 320                       read_po_file(dict, in);
 321                   } catch(std::exception& e) {
 322                       log_warning << "Error: Failure file opening: " << pofile << std::endl;
 323                       log_warning << e.what() << "" << std::endl;
 324                   }
 325                 }
 326
 327               }
 328               PHYSFS_freeList(files);
 329             }
 330         }
 331
 332       return dict;
 333     }
 334 }
 335
 336 std::set<std::string>
 337 DictionaryManager::get_languages()
 338 {
 339   std::set<std::string> languages;
 340
 341   for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
 342     {
 343       char** files = PHYSFS_enumerateFiles(p->c_str());
 344       if (!files)
 345         {
 346           log_warning << "Error: opendir() failed on " << *p << std::endl;
 347         }
 348       else
 349         {
 350           for(const char* const* file = files; *file != 0; file++) {
 351               if(has_suffix(*file, ".po")) {
 352                   std::string filename = *file;
 353                   languages.insert(filename.substr(0, filename.length()-3));
 354               }
 355           }
 356           PHYSFS_freeList(files);
 357         }
 358     }
 359   return languages;
 360 }
 361
 362 void
 363 DictionaryManager::set_language(const std::string& lang)
 364 {
 365   //log_debug << "set_language \"" << lang << "\"" << std::endl;
 366   language = get_language_from_spec(lang);
 367   //log_debug << "==> \"" << language << "\"" << std::endl;
 368   current_dict = & (get_dictionary(language));
 369 }
 370
 371 const std::string&
 372 DictionaryManager::get_language() const
 373 {
 374   return language;
 375 }
 376
 377 void
 378 DictionaryManager::set_charset(const std::string& charset)
 379 {
 380   dictionaries.clear(); // changing charset invalidates cache
 381   this->charset = charset;
 382   set_language(language);
 383 }
 384
 385 void
 386 DictionaryManager::set_language_alias(const std::string& alias,
 387     const std::string& language)
 388 {
 389   language_aliases.insert(std::make_pair(alias, language));
 390 }
 391
 392 std::string
 393 DictionaryManager::get_language_from_spec(const std::string& spec)
 394 {
 395   std::string lang = spec;
 396   Aliases::iterator i = language_aliases.find(lang);
 397   if(i != language_aliases.end()) {
 398     lang = i->second;
 399   }
 400
 401   std::string::size_type s = lang.find(".");
 402   if(s != std::string::npos) {
 403     lang = std::string(lang, 0, s);
 404   }
 405
 406   s = lang.find("_");
 407   if(s == std::string::npos) {
 408     std::string lang_big = lang;
 409     std::transform (lang_big.begin(), lang_big.end(), lang_big.begin(), toupper);
 410     lang += "_" + lang_big;
 411   }
 412
 413   return lang;
 414
 415 }
 416
 417 void
 418 DictionaryManager::add_directory(const std::string& pathname)
 419 {
 420   dictionaries.clear(); // adding directories invalidates cache
 421   search_path.push_back(pathname);
 422   set_language(language);
 423 }
 424
 425 //---------------------------------------------------------------------------
 426
 427 Dictionary::Dictionary(const LanguageDef& language_, const std::string& charset_)
 428   : language(language_), charset(charset_)
 429 {
 430 }
 431
 432 Dictionary::Dictionary()
 433   : language(lang_en)
 434 {
 435 }
 436
 437 std::string
 438 Dictionary::get_charset() const
 439 {
 440   return charset;
 441 }
 442
 443 void
 444 Dictionary::set_charset(const std::string& charset_)
 445 {
 446   charset = charset_;
 447 }
 448
 449 void
 450 Dictionary::set_language(const LanguageDef& lang)
 451 {
 452   language = lang;
 453 }
 454
 455 std::string
 456 Dictionary::translate(const std::string& msgid, const std::string& msgid2, int num)
 457 {
 458   PluralEntries::iterator i = plural_entries.find(msgid);
 459   std::map<int, std::string>& msgstrs = i->second;
 460
 461   if (i != plural_entries.end() && !msgstrs.empty())
 462     {
 463       int g = language.plural(num);
 464       std::map<int, std::string>::iterator j = msgstrs.find(g);
 465       if (j != msgstrs.end())
 466         {
 467           return j->second;
 468         }
 469       else
 470         {
 471           // Return the first translation, in case we can't translate the specific number
 472           return msgstrs.begin()->second;
 473         }
 474     }
 475   else
 476     {
 477 #ifdef TRANSLATION_DEBUG
 478       log_warning << "Couldn't translate: " << msgid << std::endl;
 479       log_warning << "Candidates: " << std::endl;
 480       for (PluralEntries::iterator i = plural_entries.begin(); i != plural_entries.end(); ++i)
 481         log_debug << "'" << i->first << "'" << std::endl;
 482 #endif
 483
 484       if (plural2_1(num)) // default to english rules
 485         return msgid2;
 486       else
 487         return msgid;
 488     }
 489 }
 490
 491 const char*
 492 Dictionary::translate(const char* msgid)
 493 {
 494   Entries::iterator i = entries.find(msgid);
 495   if (i != entries.end() && !i->second.empty())
 496     {
 497       return i->second.c_str();
 498     }
 499   else
 500     {
 501 #ifdef TRANSLATION_DEBUG
 502       log_warning << "Couldn't translate: " << msgid << std::endl;
 503 #endif
 504       return msgid;
 505     }
 506 }
 507
 508 std::string
 509 Dictionary::translate(const std::string& msgid)
 510 {
 511   Entries::iterator i = entries.find(msgid);
 512   if (i != entries.end() && !i->second.empty())
 513     {
 514       return i->second;
 515     }
 516   else
 517     {
 518 #ifdef TRANSLATION_DEBUG
 519       log_warning << "Couldn't translate: " << msgid << std::endl;
 520 #endif
 521       return msgid;
 522     }
 523 }
 524
 525 void
 526 Dictionary::add_translation(const std::string& msgid, const std::string& ,
 527                             const std::map<int, std::string>& msgstrs)
 528 {
 529   // Do we need msgid2 for anything? its after all supplied to the
 530   // translate call, so we just throw it away
 531   plural_entries[msgid] = msgstrs;
 532 }
 533
 534 void
 535 Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
 536 {
 537   entries[msgid] = msgstr;
 538 }
 539
 540 class POFileReader
 541 {
 542 private:
 543   Dictionary& dict;
 544   std::istream& in;
 545
 546   std::string from_charset;
 547   std::string to_charset;
 548
 549   int line_num;
 550   int c; //TODO: char c? unsigned char c?
 551   enum Token {
 552       TOKEN_KEYWORD, //msgstr, msgid, etc.
 553       TOKEN_CONTENT, //string literals, concatenated ("" "foo\n" "bar\n" -> "foo\nbar\n")
 554       TOKEN_EOF      //ran out of tokens
 555   };
 556   Token token;
 557   std::string tokenContent; //current contents of the keyword or string literal(s)
 558
 559 public:
 560   POFileReader(std::istream& in_, Dictionary& dict_)
 561     : in(in_), dict(dict_)
 562   {
 563     line_num = 0;
 564     nextChar();
 565     if(c == 0xef) { // skip UTF-8 intro that some text editors produce
 566         nextChar();
 567         nextChar();
 568         nextChar();
 569     }
 570     tokenize_po();
 571   }
 572
 573   void parse_header(const std::string& header)
 574   {
 575     // Separate the header in lines
 576     typedef std::vector<std::string> Lines;
 577     Lines lines;
 578
 579     std::string::size_type start = 0;
 580     for(std::string::size_type i = 0; i < header.length(); ++i)
 581       {
 582         if (header[i] == '\n')
 583           {
 584             lines.push_back(header.substr(start, i - start));
 585             start = i+1;
 586           }
 587       }
 588
 589     for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
 590       {
 591         if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
 592           from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
 593         }
 594       }
 595
 596     if (from_charset.empty() || from_charset == "CHARSET")
 597       {
 598         log_warning << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
 599         from_charset = "ISO-8859-1";
 600       }
 601
 602     to_charset = dict.get_charset();
 603     if (to_charset.empty())
 604       { // No charset requested from the dict, use utf-8
 605         to_charset = "utf-8";
 606         dict.set_charset(from_charset);
 607       }
 608   }
 609
 610   inline void nextChar()
 611   {
 612     c = in.get();
 613     if (c == '\n')
 614       line_num++;
 615   }
 616
 617   inline void skipSpace()
 618   {
 619     if(c == EOF)
 620       return;
 621
 622     while(isspace(static_cast<unsigned char>(c))) nextChar();
 623
 624     // Comments are whitespace too (remove if we ever parse comments)
 625     if (c == '#')
 626       {
 627         do {
 628             nextChar();
 629         } while(c != '\n' && c != EOF);
 630       }
 631   }
 632
 633   inline bool expectToken(std::string type, Token wanted) {
 634      if(token != wanted) {
 635         log_warning << "Expected " << type << ", got ";
 636         if(token == TOKEN_EOF)
 637           log_warning << "EOF";
 638         else if(token == TOKEN_KEYWORD)
 639           log_warning << "keyword '" << tokenContent << "'";
 640         else
 641           log_warning << "string \"" << tokenContent << '"';
 642
 643         log_warning << " at line " << line_num << std::endl;
 644         return false;
 645      }
 646      return true;
 647   }
 648
 649   inline bool expectContent(std::string type, std::string wanted) {
 650      if(tokenContent != wanted) {
 651         log_warning << "Expected " << type << ", got ";
 652         if(token == TOKEN_EOF)
 653           log_warning << "EOF";
 654         else if(token == TOKEN_KEYWORD)
 655           log_warning << "keyword '" << tokenContent << "'";
 656         else
 657           log_warning << "string \"" << tokenContent << '"';
 658
 659         log_warning << " at line " << line_num << std::endl;
 660         return false;
 661      }
 662      return true;
 663   }
 664
 665   void tokenize_po()
 666     {
 667       while((token = nextToken()) != TOKEN_EOF)
 668         {
 669           if(!expectToken("'msgid' keyword", TOKEN_KEYWORD) || !expectContent("'msgid' keyword", "msgid")) break;
 670
 671           token = nextToken();
 672           if(!expectToken("name after msgid", TOKEN_CONTENT)) break;
 673           std::string current_msgid = tokenContent;
 674
 675           token = nextToken();
 676           if(!expectToken("msgstr or msgid_plural", TOKEN_KEYWORD)) break;
 677           if(tokenContent == "msgid_plural")
 678             {
 679               //Plural form
 680               token = nextToken();
 681               if(!expectToken("msgid_plural content", TOKEN_CONTENT)) break;
 682               std::string current_msgid_plural = tokenContent;
 683
 684               std::map<int, std::string> msgstr_plural;
 685               while((token = nextToken()) == TOKEN_KEYWORD && has_prefix(tokenContent, "msgstr["))
 686                 {
 687                   int num;
 688                   if (sscanf(tokenContent.c_str(), "msgstr[%d]", &num) != 1)
 689                     {
 690                       log_warning << "Error: Couldn't parse: " << tokenContent << std::endl;
 691                     }
 692
 693                   token = nextToken();
 694                   if(!expectToken("msgstr[x] content", TOKEN_CONTENT)) break;
 695                   msgstr_plural[num] = convert(tokenContent, from_charset, to_charset);
 696                 }
 697               dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
 698             }
 699           else
 700             {
 701               // "Ordinary" translation
 702               if(!expectContent("'msgstr' keyword", "msgstr")) break;
 703
 704               token = nextToken();
 705               if(!expectToken("translation in msgstr", TOKEN_CONTENT)) break;
 706
 707               if (current_msgid == "")
 708                 { // .po Header is hidden in the msgid with the empty string
 709                   parse_header(tokenContent);
 710                 }
 711               else
 712                 {
 713                   dict.add_translation(current_msgid, convert(tokenContent, from_charset, to_charset));
 714                 }
 715             }
 716         }
 717     }
 718
 719   Token nextToken()
 720   {
 721     if(c == EOF)
 722       return TOKEN_EOF;
 723
 724     //Clear token contents
 725     tokenContent = "";
 726
 727     skipSpace();
 728
 729     if(c != '"')
 730       {
 731         // Read a keyword
 732         do {
 733           tokenContent += c;
 734           nextChar();
 735         } while(c != EOF && !isspace(static_cast<unsigned char>(c)));
 736         return TOKEN_KEYWORD;
 737       }
 738     else
 739       {
 740         do {
 741           nextChar();
 742           // Read content
 743           while(c != EOF && c != '"') {
 744             if (c == '\\') {
 745               nextChar();
 746               if (c == 'n') c = '\n';
 747               else if (c == 't') c = '\t';
 748               else if (c == 'r') c = '\r';
 749               else if (c == '"') c = '"';
 750               else if (c == '\\') c = '\\';
 751               else
 752                 {
 753                   log_warning << "Unhandled escape character: " << char(c) << std::endl;
 754                   c = ' ';
 755                 }
 756             }
 757             tokenContent += c;
 758             nextChar();
 759           }
 760           if(c == EOF) {
 761             log_warning << "Unclosed string literal: " << tokenContent << std::endl;
 762             return TOKEN_CONTENT;
 763           }
 764
 765           // Read more strings?
 766           skipSpace();
 767         } while(c == '"');
 768         return TOKEN_CONTENT;
 769       }
 770   }
 771 };
 772
 773 void read_po_file(Dictionary& dict_, std::istream& in)
 774 {
 775   POFileReader reader(in, dict_);
 776 }
 777
 778 } // namespace TinyGetText
 779
 780 /* EOF */