3 // TinyGetText - A small flexible gettext() replacement
4 // Copyright (C) 2004 Ingo Ruhnke <grumbel@gmx.de>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 #include <sys/types.h>
28 #include "tinygettext.hpp"
29 #include "physfs/physfs_stream.hpp"
31 //#define TRANSLATION_DEBUG
33 namespace TinyGetText {
35 /** Convert \a which is in \a from_charset to \a to_charset and return it */
36 std::string convert(const std::string& text,
37 const std::string& from_charset,
38 const std::string& to_charset)
40 if (from_charset == to_charset)
43 iconv_t cd = iconv_open(to_charset.c_str(), from_charset.c_str());
45 size_t in_len = text.length();
46 size_t out_len = text.length()*3; // FIXME: cross fingers that this is enough
48 char* out_orig = new char[out_len];
49 char* in_orig = new char[in_len+1];
50 strcpy(in_orig, text.c_str());
53 ICONV_CONST char* in = in_orig;
54 size_t out_len_temp = out_len; // iconv is counting down the bytes it has
55 // written from this...
57 size_t retval = iconv(cd, &in, &in_len, &out, &out_len_temp);
58 out_len -= out_len_temp; // see above
59 if (retval == (size_t) -1)
61 std::cerr << strerror(errno) << std::endl;
62 std::cerr << "Error: conversion from " << from_charset
63 << " to " << to_charset << " went wrong: " << retval << std::endl;
68 std::string ret(out_orig, out_len);
74 bool has_suffix(const std::string& lhs, const std::string rhs)
76 if (lhs.length() < rhs.length())
79 return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
82 bool has_prefix(const std::string& lhs, const std::string rhs)
84 if (lhs.length() < rhs.length())
87 return lhs.compare(0, rhs.length(), rhs) == 0;
90 int plural1(int ) { return 0; }
91 int plural2_1(int n) { return (n != 1); }
92 int plural2_2(int n) { return (n > 1); }
93 int plural3_lv(int n) { return (n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
94 int plural3_ga(int n) { return n==1 ? 0 : n==2 ? 1 : 2; }
95 int plural3_lt(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
96 int plural3_1(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
97 int plural3_sk(int n) { return (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2; }
98 int plural3_pl(int n) { return (n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
99 int plural3_sl(int n) { return (n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
101 /** Language Definitions */
103 LanguageDef lang_hu("hu", "Hungarian", 1, plural1); // "nplurals=1; plural=0;"
104 LanguageDef lang_ja("ja", "Japanese", 1, plural1); // "nplurals=1; plural=0;"
105 LanguageDef lang_ko("ko", "Korean", 1, plural1); // "nplurals=1; plural=0;"
106 LanguageDef lang_tr("tr", "Turkish", 1, plural1); // "nplurals=1; plural=0;"
107 LanguageDef lang_da("da", "Danish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
108 LanguageDef lang_nl("nl", "Dutch", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
109 LanguageDef lang_en("en", "English", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
110 LanguageDef lang_fo("fo", "Faroese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
111 LanguageDef lang_de("de", "German", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
112 LanguageDef lang_nb("nb", "Norwegian Bokmal", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
113 LanguageDef lang_no("no", "Norwegian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
114 LanguageDef lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
115 LanguageDef lang_sv("sv", "Swedish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
116 LanguageDef lang_et("et", "Estonian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
117 LanguageDef lang_fi("fi", "Finnish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
118 LanguageDef lang_el("el", "Greek", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
119 LanguageDef lang_he("he", "Hebrew", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
120 LanguageDef lang_it("it", "Italian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
121 LanguageDef lang_pt("pt", "Portuguese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
122 LanguageDef lang_es("es", "Spanish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
123 LanguageDef lang_eo("eo", "Esperanto", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
124 LanguageDef lang_fr("fr", "French", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
125 LanguageDef lang_pt_BR("pt_BR", "Brazilian", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
126 LanguageDef lang_lv("lv", "Latvian", 3, plural3_lv); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
127 LanguageDef lang_ga("ga", "Irish", 3, plural3_ga); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
128 LanguageDef lang_lt("lt", "Lithuanian", 3, plural3_lt); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
129 LanguageDef lang_hr("hr", "Croatian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
130 LanguageDef lang_cs("cs", "Czech", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
131 LanguageDef lang_ru("ru", "Russian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
132 LanguageDef lang_uk("uk", "Ukrainian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
133 LanguageDef lang_sk("sk", "Slovak", 3, plural3_sk); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
134 LanguageDef lang_pl("pl", "Polish", 3, plural3_pl); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
135 LanguageDef lang_sl("sl", "Slovenian", 3, plural3_sl); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
139 get_language_def(const std::string& name)
141 if (name == "hu") return lang_hu;
142 else if (name == "ja") return lang_ja;
143 else if (name == "ko") return lang_ko;
144 else if (name == "tr") return lang_tr;
145 else if (name == "da") return lang_da;
146 else if (name == "nl") return lang_nl;
147 else if (name == "en") return lang_en;
148 else if (name == "fo") return lang_fo;
149 else if (name == "de") return lang_de;
150 else if (name == "nb") return lang_nb;
151 else if (name == "no") return lang_no;
152 else if (name == "nn") return lang_nn;
153 else if (name == "sv") return lang_sv;
154 else if (name == "et") return lang_et;
155 else if (name == "fi") return lang_fi;
156 else if (name == "el") return lang_el;
157 else if (name == "he") return lang_he;
158 else if (name == "it") return lang_it;
159 else if (name == "pt") return lang_pt;
160 else if (name == "es") return lang_es;
161 else if (name == "eo") return lang_eo;
162 else if (name == "fr") return lang_fr;
163 else if (name == "pt_BR") return lang_pt_BR;
164 else if (name == "lv") return lang_lv;
165 else if (name == "ga") return lang_ga;
166 else if (name == "lt") return lang_lt;
167 else if (name == "hr") return lang_hr;
168 else if (name == "cs") return lang_cs;
169 else if (name == "ru") return lang_ru;
170 else if (name == "uk") return lang_uk;
171 else if (name == "sk") return lang_sk;
172 else if (name == "pl") return lang_pl;
173 else if (name == "sl") return lang_sl;
177 DictionaryManager::DictionaryManager()
178 : current_dict(&empty_dict)
180 parseLocaleAliases();
181 // setup language from environment vars
182 const char* lang = getenv("LC_ALL");
184 lang = getenv("LC_MESSAGES");
186 lang = getenv("LANG");
193 DictionaryManager::parseLocaleAliases()
195 // try to parse language alias list
196 std::ifstream in("/usr/share/locale/locale.alias");
199 while(in.good() && !in.eof()) {
200 while(isspace(c) && !in.eof())
203 if(c == '#') { // skip comments
204 while(c != '\n' && !in.eof())
210 while(!isspace(c) && !in.eof()) {
214 while(isspace(c) && !in.eof())
216 std::string language;
217 while(!isspace(c) && !in.eof()) {
224 set_language_alias(alias, language);
229 DictionaryManager::get_dictionary(const std::string& spec)
231 std::string lang = get_language_from_spec(spec);
232 Dictionaries::iterator i = dictionaries.find(get_language_from_spec(lang));
233 if (i != dictionaries.end())
237 else // Dictionary for languages lang isn't loaded, so we load it
239 //std::cout << "get_dictionary: " << lang << std::endl;
240 Dictionary& dict = dictionaries[lang];
242 dict.set_language(get_language_def(lang));
244 dict.set_charset(charset);
246 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
248 char** files = PHYSFS_enumerateFiles(p->c_str());
251 std::cerr << "Error: enumerateFiles() failed on " << *p << std::endl;
255 for(const char* const* filename = files;
256 *filename != 0; filename++) {
257 if(std::string(*filename) == lang + ".po") {
258 std::string pofile = *p + "/" + *filename;
260 IFileStream in(pofile);
261 read_po_file(dict, in);
262 } catch(std::exception& e) {
263 std::cerr << "Error: Failure file opening: " << pofile << std::endl;
264 std::cerr << e.what() << "\n";
268 PHYSFS_freeList(files);
276 std::set<std::string>
277 DictionaryManager::get_languages()
279 std::set<std::string> languages;
281 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
283 char** files = PHYSFS_enumerateFiles(p->c_str());
286 std::cerr << "Error: opendir() failed on " << *p << std::endl;
290 for(const char* const* file = files; *file != 0; file++) {
291 if(has_suffix(*file, ".po")) {
292 std::string filename = *file;
293 languages.insert(filename.substr(0, filename.length()-3));
296 PHYSFS_freeList(files);
303 DictionaryManager::set_language(const std::string& lang)
305 language = get_language_from_spec(lang);
306 current_dict = & (get_dictionary(language));
310 DictionaryManager::get_language() const
316 DictionaryManager::set_charset(const std::string& charset)
318 dictionaries.clear(); // changing charset invalidates cache
319 this->charset = charset;
320 set_language(language);
324 DictionaryManager::set_language_alias(const std::string& alias,
325 const std::string& language)
327 language_aliases.insert(std::make_pair(alias, language));
331 DictionaryManager::get_language_from_spec(const std::string& spec)
333 std::string lang = spec;
334 Aliases::iterator i = language_aliases.find(lang);
335 if(i != language_aliases.end()) {
339 std::string::size_type s = lang.find_first_of("_.");
340 if(s == std::string::npos)
343 return std::string(lang, 0, s);
347 DictionaryManager::add_directory(const std::string& pathname)
349 dictionaries.clear(); // adding directories invalidates cache
350 search_path.push_back(pathname);
351 set_language(language);
354 //---------------------------------------------------------------------------
356 Dictionary::Dictionary(const LanguageDef& language_, const std::string& charset_)
357 : language(language_), charset(charset_)
361 Dictionary::Dictionary()
367 Dictionary::get_charset() const
373 Dictionary::set_charset(const std::string& charset_)
379 Dictionary::set_language(const LanguageDef& lang)
385 Dictionary::translate(const std::string& msgid, const std::string& msgid2, int num)
387 PluralEntries::iterator i = plural_entries.find(msgid);
388 std::map<int, std::string>& msgstrs = i->second;
390 if (i != plural_entries.end() && !msgstrs.empty())
392 int g = language.plural(num);
393 std::map<int, std::string>::iterator j = msgstrs.find(g);
394 if (j != msgstrs.end())
400 // Return the first translation, in case we can't translate the specific number
401 return msgstrs.begin()->second;
406 #ifdef TRANSLATION_DEBUG
407 std::cerr << "Warning: Couldn't translate: " << msgid << std::endl;
408 std::cerr << "Candidates: " << std::endl;
409 for (PluralEntries::iterator i = plural_entries.begin(); i != plural_entries.end(); ++i)
410 std::cout << "'" << i->first << "'" << std::endl;
413 if (plural2_1(num)) // default to english rules
421 Dictionary::translate(const char* msgid)
423 Entries::iterator i = entries.find(msgid);
424 if (i != entries.end() && !i->second.empty())
426 return i->second.c_str();
430 #ifdef TRANSLATION_DBEUG
431 std::cout << "Error: Couldn't translate: " << msgid << std::endl;
438 Dictionary::translate(const std::string& msgid)
440 Entries::iterator i = entries.find(msgid);
441 if (i != entries.end() && !i->second.empty())
447 #ifdef TRANSLATION_DBEUG
448 std::cout << "Error: Couldn't translate: " << msgid << std::endl;
455 Dictionary::add_translation(const std::string& msgid, const std::string& ,
456 const std::map<int, std::string>& msgstrs)
458 // Do we need msgid2 for anything? its after all supplied to the
459 // translate call, so we just throw it away
460 plural_entries[msgid] = msgstrs;
464 Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
466 entries[msgid] = msgstr;
480 std::string from_charset;
481 std::string to_charset;
483 std::string current_msgid;
484 std::string current_msgid_plural;
485 std::map<int, std::string> msgstr_plural;
489 enum { WANT_MSGID, WANT_MSGSTR, WANT_MSGSTR_PLURAL, WANT_MSGID_PLURAL } state;
492 POFileReader(std::istream& in, Dictionary& dict_)
498 if(c == (char) 0xef) { // skip UTF-8 intro that some texteditors produce
507 void parse_header(const std::string& header)
509 // Seperate the header in lines
510 typedef std::vector<std::string> Lines;
513 std::string::size_type start = 0;
514 for(std::string::size_type i = 0; i < header.length(); ++i)
516 if (header[i] == '\n')
518 lines.push_back(header.substr(start, i - start));
523 for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
525 if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
526 from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
530 if (from_charset.empty() || from_charset == "CHARSET")
532 std::cerr << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
533 from_charset = "ISO-8859-1";
536 to_charset = dict.get_charset();
537 if (to_charset.empty())
538 { // No charset requested from the dict, use utf-8
539 to_charset = "utf-8";
540 dict.set_charset(from_charset);
544 void add_token(const Token& token)
549 if (token.keyword == "msgid")
551 current_msgid = token.content;
552 state = WANT_MSGID_PLURAL;
554 else if (token.keyword.empty())
556 //std::cerr << "Got EOF, everything looks ok." << std::endl;
560 std::cerr << "tinygettext: expected 'msgid' keyword, got " << token.keyword
561 << " at line " << line_num << std::endl;
565 case WANT_MSGID_PLURAL:
566 if (token.keyword == "msgid_plural")
568 current_msgid_plural = token.content;
569 state = WANT_MSGSTR_PLURAL;
579 if (token.keyword == "msgstr")
581 if (current_msgid == "")
582 { // .po Header is hidden in the msgid with the empty string
583 parse_header(token.content);
587 dict.add_translation(current_msgid, convert(token.content, from_charset, to_charset));
593 std::cerr << "tinygettext: expected 'msgstr' keyword, got " << token.keyword
594 << " at line " << line_num << std::endl;
598 case WANT_MSGSTR_PLURAL:
599 if (has_prefix(token.keyword, "msgstr["))
602 if (sscanf(token.keyword.c_str(), "msgstr[%d]", &num) != 1)
604 std::cerr << "Error: Couldn't parse: " << token.keyword << std::endl;
608 msgstr_plural[num] = convert(token.content, from_charset, to_charset);
613 dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
622 inline int getchar(std::istream& in)
630 void tokenize_po(std::istream& in)
632 enum State { READ_KEYWORD,
634 READ_CONTENT_IN_STRING,
637 State state = READ_KEYWORD;
641 while((c = getchar(in)) != EOF)
643 //std::cout << "Lexing char: " << char(c) << " " << state << std::endl;
649 state = SKIP_COMMENT;
658 } while((c = getchar(in)) != EOF && !isspace(c));
661 state = READ_CONTENT;
666 while((c = getchar(in)) != EOF)
669 // Found start of content
670 state = READ_CONTENT_IN_STRING;
672 } else if (isspace(c)) {
674 } else { // Read something that may be a keyword
676 state = READ_KEYWORD;
683 case READ_CONTENT_IN_STRING:
688 if (c == 'n') token.content += '\n';
689 else if (c == 't') token.content += '\t';
690 else if (c == 'r') token.content += '\r';
691 else if (c == '"') token.content += '"';
694 std::cout << "Unhandled escape character: " << char(c) << std::endl;
699 std::cout << "Unterminated string" << std::endl;
701 } else if (c == '"') { // Content string is terminated
702 state = READ_CONTENT;
710 state = READ_KEYWORD;
718 void read_po_file(Dictionary& dict_, std::istream& in)
720 POFileReader reader(in, dict_);
723 } // namespace TinyGetText