4 // Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 #include <sys/types.h>
32 #include <SDL_stdinc.h>
35 #include "tinygettext.hpp"
37 #include "physfs/physfs_stream.hpp"
38 #include "findlocale.hpp"
40 //#define TRANSLATION_DEBUG
42 namespace TinyGetText {
44 /** Convert \a which is in \a from_charset to \a to_charset and return it */
45 std::string convert(const std::string& text,
46 const std::string& from_charset,
47 const std::string& to_charset)
50 if (from_charset == to_charset)
53 char *in = new char[text.length() + 1];
54 strcpy(in, text.c_str());
55 char *out = SDL_iconv_string(to_charset.c_str(), from_charset.c_str(), in, text.length() + 1);
59 log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " failed" << std::endl;
66 log_warning << "FIXME: Char conversion not supported on GP2X!" << std::endl;
70 iconv_t cd = SDL_iconv_open(to_charset.c_str(), from_charset.c_str());
72 size_t in_len = text.length();
73 size_t out_len = text.length()*3; // FIXME: cross fingers that this is enough
75 char* out_orig = new char[out_len];
76 char* in_orig = new char[in_len+1];
77 strcpy(in_orig, text.c_str());
80 ICONV_CONST char* in = in_orig;
81 size_t out_len_temp = out_len; // iconv is counting down the bytes it has
82 // written from this...
84 size_t retval = SDL_iconv(cd, &in, &in_len, &out, &out_len_temp);
85 out_len -= out_len_temp; // see above
86 if (retval == (size_t) -1)
88 log_warning << strerror(errno) << std::endl;
89 log_warning << "Error: conversion from " << from_charset << " to " << to_charset << " went wrong: " << retval << std::endl;
94 std::string ret(out_orig, out_len);
101 bool has_suffix(const std::string& lhs, const std::string rhs)
103 if (lhs.length() < rhs.length())
106 return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
109 bool has_prefix(const std::string& lhs, const std::string rhs)
111 if (lhs.length() < rhs.length())
114 return lhs.compare(0, rhs.length(), rhs) == 0;
117 int plural1(int ) { return 0; }
118 int plural2_1(int n) { return (n != 1); }
119 int plural2_2(int n) { return (n > 1); }
120 int plural3_lv(int n) { return (n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
121 int plural3_ga(int n) { return n==1 ? 0 : n==2 ? 1 : 2; }
122 int plural3_lt(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
123 int plural3_1(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
124 int plural3_sk(int n) { return (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2; }
125 int plural3_pl(int n) { return (n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
126 int plural3_sl(int n) { return (n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
128 /** Language Definitions */
130 LanguageDef lang_hu("hu", "Hungarian", 1, plural1); // "nplurals=1; plural=0;"
131 LanguageDef lang_ja("ja", "Japanese", 1, plural1); // "nplurals=1; plural=0;"
132 LanguageDef lang_ko("ko", "Korean", 1, plural1); // "nplurals=1; plural=0;"
133 LanguageDef lang_tr("tr", "Turkish", 1, plural1); // "nplurals=1; plural=0;"
134 LanguageDef lang_da("da", "Danish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
135 LanguageDef lang_nl("nl", "Dutch", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
136 LanguageDef lang_en("en", "English", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
137 LanguageDef lang_fo("fo", "Faroese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
138 LanguageDef lang_de("de", "German", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
139 LanguageDef lang_nb("nb", "Norwegian Bokmal", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
140 LanguageDef lang_no("no", "Norwegian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
141 LanguageDef lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
142 LanguageDef lang_sv("sv", "Swedish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
143 LanguageDef lang_et("et", "Estonian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
144 LanguageDef lang_fi("fi", "Finnish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
145 LanguageDef lang_el("el", "Greek", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
146 LanguageDef lang_he("he", "Hebrew", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
147 LanguageDef lang_it("it", "Italian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
148 LanguageDef lang_pt("pt", "Portuguese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
149 LanguageDef lang_es("es", "Spanish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
150 LanguageDef lang_eo("eo", "Esperanto", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
151 LanguageDef lang_fr("fr", "French", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
152 LanguageDef lang_pt_BR("pt_BR", "Brazilian", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
153 LanguageDef lang_lv("lv", "Latvian", 3, plural3_lv); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
154 LanguageDef lang_ga("ga", "Irish", 3, plural3_ga); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
155 LanguageDef lang_lt("lt", "Lithuanian", 3, plural3_lt); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
156 LanguageDef lang_hr("hr", "Croatian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
157 LanguageDef lang_cs("cs", "Czech", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
158 LanguageDef lang_ru("ru", "Russian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
159 LanguageDef lang_uk("uk", "Ukrainian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
160 LanguageDef lang_sk("sk", "Slovak", 3, plural3_sk); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
161 LanguageDef lang_pl("pl", "Polish", 3, plural3_pl); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
162 LanguageDef lang_sl("sl", "Slovenian", 3, plural3_sl); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
166 get_language_def(const std::string& name)
168 if (name == "hu") return lang_hu;
169 else if (name == "ja") return lang_ja;
170 else if (name == "ko") return lang_ko;
171 else if (name == "tr") return lang_tr;
172 else if (name == "da") return lang_da;
173 else if (name == "nl") return lang_nl;
174 else if (name == "en") return lang_en;
175 else if (name == "fo") return lang_fo;
176 else if (name == "de") return lang_de;
177 else if (name == "nb") return lang_nb;
178 else if (name == "no") return lang_no;
179 else if (name == "nn") return lang_nn;
180 else if (name == "sv") return lang_sv;
181 else if (name == "et") return lang_et;
182 else if (name == "fi") return lang_fi;
183 else if (name == "el") return lang_el;
184 else if (name == "he") return lang_he;
185 else if (name == "it") return lang_it;
186 else if (name == "pt") return lang_pt;
187 else if (name == "es") return lang_es;
188 else if (name == "eo") return lang_eo;
189 else if (name == "fr") return lang_fr;
190 else if (name == "pt_BR") return lang_pt_BR;
191 else if (name == "lv") return lang_lv;
192 else if (name == "ga") return lang_ga;
193 else if (name == "lt") return lang_lt;
194 else if (name == "hr") return lang_hr;
195 else if (name == "cs") return lang_cs;
196 else if (name == "ru") return lang_ru;
197 else if (name == "uk") return lang_uk;
198 else if (name == "sk") return lang_sk;
199 else if (name == "pl") return lang_pl;
200 else if (name == "sl") return lang_sl;
204 DictionaryManager::DictionaryManager()
205 : current_dict(&empty_dict)
207 parseLocaleAliases();
208 // Environment variable SUPERTUX_LANG overrides language settings.
209 const char* lang = getenv( "SUPERTUX_LANG" );
211 set_language( lang );
214 // use findlocale to setup language
216 FL_FindLocale( &locale, FL_MESSAGES );
218 if (locale->country) {
219 set_language( std::string(locale->lang)+"_"+std::string(locale->country) );
221 set_language( std::string(locale->lang) );
224 FL_FreeLocale( &locale );
228 DictionaryManager::parseLocaleAliases()
230 // try to parse language alias list
231 std::ifstream in("/usr/share/locale/locale.alias");
234 while(in.good() && !in.eof()) {
235 while(isspace(static_cast<unsigned char>(c)) && !in.eof())
238 if(c == '#') { // skip comments
239 while(c != '\n' && !in.eof())
245 while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
249 while(isspace(static_cast<unsigned char>(c)) && !in.eof())
251 std::string language;
252 while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
259 set_language_alias(alias, language);
264 DictionaryManager::get_dictionary(const std::string& spec)
267 //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
269 std::string lang = get_language_from_spec(spec);
271 //log_debug << "...normalized as \"" << lang << "\"" << std::endl;
273 Dictionaries::iterator i = dictionaries.find(get_language_from_spec(lang));
274 if (i != dictionaries.end())
278 else // Dictionary for languages lang isn't loaded, so we load it
280 //log_debug << "get_dictionary: " << lang << std::endl;
281 Dictionary& dict = dictionaries[lang];
283 dict.set_language(get_language_def(lang));
285 dict.set_charset(charset);
287 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
289 char** files = PHYSFS_enumerateFiles(p->c_str());
292 log_warning << "Error: enumerateFiles() failed on " << *p << std::endl;
296 for(const char* const* filename = files;
297 *filename != 0; filename++) {
299 // check if filename matches requested language
300 std::string fname = std::string(*filename);
301 std::string load_from_file = "";
302 if(fname == lang + ".po") {
303 load_from_file = fname;
305 std::string::size_type s = lang.find("_");
306 if(s != std::string::npos) {
307 std::string lang_short = std::string(lang, 0, s);
308 if (fname == lang_short + ".po") {
309 load_from_file = lang_short;
314 // if it matched, load dictionary
315 if (load_from_file != "") {
316 //log_debug << "Loading dictionary for language \"" << lang << "\" from \"" << filename << "\"" << std::endl;
317 std::string pofile = *p + "/" + *filename;
319 IFileStream in(pofile);
320 read_po_file(dict, in);
321 } catch(std::exception& e) {
322 log_warning << "Error: Failure file opening: " << pofile << std::endl;
323 log_warning << e.what() << "" << std::endl;
328 PHYSFS_freeList(files);
336 std::set<std::string>
337 DictionaryManager::get_languages()
339 std::set<std::string> languages;
341 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
343 char** files = PHYSFS_enumerateFiles(p->c_str());
346 log_warning << "Error: opendir() failed on " << *p << std::endl;
350 for(const char* const* file = files; *file != 0; file++) {
351 if(has_suffix(*file, ".po")) {
352 std::string filename = *file;
353 languages.insert(filename.substr(0, filename.length()-3));
356 PHYSFS_freeList(files);
363 DictionaryManager::set_language(const std::string& lang)
365 //log_debug << "set_language \"" << lang << "\"" << std::endl;
366 language = get_language_from_spec(lang);
367 //log_debug << "==> \"" << language << "\"" << std::endl;
368 current_dict = & (get_dictionary(language));
372 DictionaryManager::get_language() const
378 DictionaryManager::set_charset(const std::string& charset)
380 dictionaries.clear(); // changing charset invalidates cache
381 this->charset = charset;
382 set_language(language);
386 DictionaryManager::set_language_alias(const std::string& alias,
387 const std::string& language)
389 language_aliases.insert(std::make_pair(alias, language));
393 DictionaryManager::get_language_from_spec(const std::string& spec)
395 std::string lang = spec;
396 Aliases::iterator i = language_aliases.find(lang);
397 if(i != language_aliases.end()) {
401 std::string::size_type s = lang.find(".");
402 if(s != std::string::npos) {
403 lang = std::string(lang, 0, s);
407 if(s == std::string::npos) {
408 std::string lang_big = lang;
409 std::transform (lang_big.begin(), lang_big.end(), lang_big.begin(), toupper);
410 lang += "_" + lang_big;
418 DictionaryManager::add_directory(const std::string& pathname)
420 dictionaries.clear(); // adding directories invalidates cache
421 search_path.push_back(pathname);
422 set_language(language);
425 //---------------------------------------------------------------------------
427 Dictionary::Dictionary(const LanguageDef& language_, const std::string& charset_)
428 : language(language_), charset(charset_)
432 Dictionary::Dictionary()
438 Dictionary::get_charset() const
444 Dictionary::set_charset(const std::string& charset_)
450 Dictionary::set_language(const LanguageDef& lang)
456 Dictionary::translate(const std::string& msgid, const std::string& msgid2, int num)
458 PluralEntries::iterator i = plural_entries.find(msgid);
459 std::map<int, std::string>& msgstrs = i->second;
461 if (i != plural_entries.end() && !msgstrs.empty())
463 int g = language.plural(num);
464 std::map<int, std::string>::iterator j = msgstrs.find(g);
465 if (j != msgstrs.end())
471 // Return the first translation, in case we can't translate the specific number
472 return msgstrs.begin()->second;
477 #ifdef TRANSLATION_DEBUG
478 log_warning << "Couldn't translate: " << msgid << std::endl;
479 log_warning << "Candidates: " << std::endl;
480 for (PluralEntries::iterator i = plural_entries.begin(); i != plural_entries.end(); ++i)
481 log_debug << "'" << i->first << "'" << std::endl;
484 if (plural2_1(num)) // default to english rules
492 Dictionary::translate(const char* msgid)
494 Entries::iterator i = entries.find(msgid);
495 if (i != entries.end() && !i->second.empty())
497 return i->second.c_str();
501 #ifdef TRANSLATION_DEBUG
502 log_warning << "Couldn't translate: " << msgid << std::endl;
509 Dictionary::translate(const std::string& msgid)
511 Entries::iterator i = entries.find(msgid);
512 if (i != entries.end() && !i->second.empty())
518 #ifdef TRANSLATION_DEBUG
519 log_warning << "Couldn't translate: " << msgid << std::endl;
526 Dictionary::add_translation(const std::string& msgid, const std::string& ,
527 const std::map<int, std::string>& msgstrs)
529 // Do we need msgid2 for anything? its after all supplied to the
530 // translate call, so we just throw it away
531 plural_entries[msgid] = msgstrs;
535 Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
537 entries[msgid] = msgstr;
546 std::string from_charset;
547 std::string to_charset;
550 int c; //TODO: char c? unsigned char c?
552 TOKEN_KEYWORD, //msgstr, msgid, etc.
553 TOKEN_CONTENT, //string literals, concatenated ("" "foo\n" "bar\n" -> "foo\nbar\n")
554 TOKEN_EOF //ran out of tokens
557 std::string tokenContent; //current contents of the keyword or string literal(s)
560 POFileReader(std::istream& in_, Dictionary& dict_)
561 : in(in_), dict(dict_)
565 if(c == 0xef) { // skip UTF-8 intro that some text editors produce
573 void parse_header(const std::string& header)
575 // Separate the header in lines
576 typedef std::vector<std::string> Lines;
579 std::string::size_type start = 0;
580 for(std::string::size_type i = 0; i < header.length(); ++i)
582 if (header[i] == '\n')
584 lines.push_back(header.substr(start, i - start));
589 for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
591 if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
592 from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
596 if (from_charset.empty() || from_charset == "CHARSET")
598 log_warning << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
599 from_charset = "ISO-8859-1";
602 to_charset = dict.get_charset();
603 if (to_charset.empty())
604 { // No charset requested from the dict, use utf-8
605 to_charset = "utf-8";
606 dict.set_charset(from_charset);
610 inline void nextChar()
617 inline void skipSpace()
622 while(c == '#' || isspace(static_cast<unsigned char>(c))) {
624 while(c != '\n' && c != EOF) nextChar();
630 inline bool expectToken(std::string type, Token wanted) {
631 if(token != wanted) {
632 log_warning << "Expected " << type << ", got ";
633 if(token == TOKEN_EOF)
634 log_warning << "EOF";
635 else if(token == TOKEN_KEYWORD)
636 log_warning << "keyword '" << tokenContent << "'";
638 log_warning << "string \"" << tokenContent << '"';
640 log_warning << " at line " << line_num << std::endl;
646 inline bool expectContent(std::string type, std::string wanted) {
647 if(tokenContent != wanted) {
648 log_warning << "Expected " << type << ", got ";
649 if(token == TOKEN_EOF)
650 log_warning << "EOF";
651 else if(token == TOKEN_KEYWORD)
652 log_warning << "keyword '" << tokenContent << "'";
654 log_warning << "string \"" << tokenContent << '"';
656 log_warning << " at line " << line_num << std::endl;
664 while((token = nextToken()) != TOKEN_EOF)
666 if(!expectToken("'msgid' keyword", TOKEN_KEYWORD) || !expectContent("'msgid' keyword", "msgid")) break;
669 if(!expectToken("name after msgid", TOKEN_CONTENT)) break;
670 std::string current_msgid = tokenContent;
673 if(!expectToken("msgstr or msgid_plural", TOKEN_KEYWORD)) break;
674 if(tokenContent == "msgid_plural")
678 if(!expectToken("msgid_plural content", TOKEN_CONTENT)) break;
679 std::string current_msgid_plural = tokenContent;
681 std::map<int, std::string> msgstr_plural;
682 while((token = nextToken()) == TOKEN_KEYWORD && has_prefix(tokenContent, "msgstr["))
685 if (sscanf(tokenContent.c_str(), "msgstr[%d]", &num) != 1)
687 log_warning << "Error: Couldn't parse: " << tokenContent << std::endl;
691 if(!expectToken("msgstr[x] content", TOKEN_CONTENT)) break;
692 msgstr_plural[num] = convert(tokenContent, from_charset, to_charset);
694 dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
698 // "Ordinary" translation
699 if(!expectContent("'msgstr' keyword", "msgstr")) break;
702 if(!expectToken("translation in msgstr", TOKEN_CONTENT)) break;
704 if (current_msgid == "")
705 { // .po Header is hidden in the msgid with the empty string
706 parse_header(tokenContent);
710 dict.add_translation(current_msgid, convert(tokenContent, from_charset, to_charset));
721 //Clear token contents
732 } while(c != EOF && !isspace(static_cast<unsigned char>(c)));
733 return TOKEN_KEYWORD;
740 while(c != EOF && c != '"') {
743 if (c == 'n') c = '\n';
744 else if (c == 't') c = '\t';
745 else if (c == 'r') c = '\r';
746 else if (c == '"') c = '"';
747 else if (c == '\\') c = '\\';
750 log_warning << "Unhandled escape character: " << char(c) << std::endl;
758 log_warning << "Unclosed string literal: " << tokenContent << std::endl;
759 return TOKEN_CONTENT;
762 // Read more strings?
765 return TOKEN_CONTENT;
770 void read_po_file(Dictionary& dict_, std::istream& in)
772 POFileReader reader(in, dict_);
775 } // namespace TinyGetText