1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'an': (u'Aragonese', 2, '(n != 1)'),
34 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
35 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
36 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'),
37 'az': (u'Azerbaijani', 2, '(n != 1)'),
38 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'bg': (u'Bulgarian', 2, '(n != 1)'),
40 'bn': (u'Bengali', 2, '(n != 1)'),
41 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
42 'bo': (u'Tibetan', 1, '0'),
43 'br': (u'Breton', 2, 'n > 1'),
44 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
46 'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'),
47 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
48 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
49 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
50 'da': (u'Danish', 2, '(n != 1)'),
51 'de': (u'German', 2, '(n != 1)'),
52 'dz': (u'Dzongkha', 1, '0'),
53 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
54 'en': (u'English', 2, '(n != 1)'),
55 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
56 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
57 'eo': (u'Esperanto', 2, '(n != 1)'),
58 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
59 'et': (u'Estonian', 2, '(n != 1)'),
60 'eu': (u'Basque', 2, '(n != 1)'),
61 'fa': (u'Persian', 1, '0'),
62 'fi': (u'Finnish', 2, '(n != 1)'),
63 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
64 'fo': (u'Faroese', 2, '(n != 1)'),
65 'fr': (u'French', 2, '(n > 1)'),
66 'fur': (u'Friulian', 2, '(n != 1)'),
67 'fy': (u'Frisian', 2, '(n != 1)'),
68 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
69 'gl': (u'Galician', 2, '(n != 1)'),
70 'gu': (u'Gujarati', 2, '(n != 1)'),
71 'gun': (u'Gun', 2, '(n > 1)'),
72 'ha': (u'Hausa', 2, '(n != 1)'),
73 'he': (u'Hebrew', 2, '(n != 1)'),
74 'hi': (u'Hindi', 2, '(n != 1)'),
75 'hy': (u'Armenian', 1, '0'),
76 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
77 'hu': (u'Hungarian', 2, '(n != 1)'),
78 'id': (u'Indonesian', 1, '0'),
79 'is': (u'Icelandic', 2, '(n != 1)'),
80 'it': (u'Italian', 2, '(n != 1)'),
81 'ja': (u'Japanese', 1, '0'),
82 'jv': (u'Javanese', 2, '(n != 1)'),
83 'ka': (u'Georgian', 1, '0'),
84 'kk': (u'Kazakh', 1, '0'),
85 'km': (u'Central Khmer', 1, '0'),
86 'kn': (u'Kannada', 2, '(n != 1)'),
87 'ko': (u'Korean', 1, '0'),
88 'ku': (u'Kurdish', 2, '(n != 1)'),
89 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
90 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
91 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
92 'ln': (u'Lingala', 2, '(n > 1)'),
93 'lo': (u'Lao', 1, '0'),
94 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
95 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
96 'mg': (u'Malagasy', 2, '(n > 1)'),
97 'mi': (u'Maori', 2, '(n > 1)'),
98 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
99 'ml': (u'Malayalam', 2, '(n != 1)'),
100 'mn': (u'Mongolian', 2, '(n != 1)'),
101 'mr': (u'Marathi', 2, '(n != 1)'),
102 'ms': (u'Malay', 1, '0'),
103 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
104 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
105 'nap': (u'Neapolitan', 2, '(n != 1)'),
106 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
107 'ne': (u'Nepali', 2, '(n != 1)'),
108 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
109 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
110 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
111 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
112 'or': (u'Oriya', 2, '(n != 1)'),
113 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
114 'pap': (u'Papiamento', 2, '(n != 1)'),
115 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
116 'pms': (u'Piemontese', 2, '(n != 1)'),
117 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
118 'pt': (u'Portuguese', 2, '(n != 1)'),
119 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
120 'rm': (u'Romansh', 2, '(n != 1)'),
121 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
122 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
123 'sco': (u'Scots', 2, '(n != 1)'),
124 'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'),
125 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
126 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
127 'so': (u'Somali', 2, '(n != 1)'),
128 'sq': (u'Albanian', 2, '(n != 1)'),
129 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
130 'st': (u'Sotho, Southern', 2, '(n != 1)'),
131 'su': (u'Sundanese', 1, '0'),
132 'sv': (u'Swedish', 2, '(n != 1)'),
133 'sw': (u'Swahili', 2, '(n != 1)'),
134 'ta': (u'Tamil', 2, '(n != 1)'),
135 'te': (u'Telugu', 2, '(n != 1)'),
136 'tg': (u'Tajik', 2, '(n != 1)'),
137 'ti': (u'Tigrinya', 2, '(n > 1)'),
138 'th': (u'Thai', 1, '0'),
139 'tk': (u'Turkmen', 2, '(n != 1)'),
140 'tr': (u'Turkish', 1, '0'),
141 'tt': (u'Tatar', 1, '0'),
142 'ug': (u'Uighur; Uyghur', 1, '0'),
143 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
144 'vi': (u'Vietnamese', 1, '0'),
145 'wa': (u'Walloon', 2, '(n > 1)'),
146
147
148
149 'zh_CN': (u'Chinese (China)', 1, '0'),
150 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
151 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
152 'zu': (u'Zulu', 2, '(n != 1)'),
153 }
154 """Dictionary of language data.
155 The language code is the dictionary key (which may contain country codes and modifiers).
156 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
157
158 Note that the English names should not be used in user facing places - it
159 should always be passed through the function returned from tr_lang(), or at
160 least passed through _fix_language_name()."""
161
162 _fixed_names = {
163 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
164 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
165 u"Catalan; Valencian": u"Catalan",
166 u"Central Khmer": u"Khmer",
167 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
168 u"Divehi; Dhivehi; Maldivian": u"Divehi",
169 u"Dutch; Flemish": u"Dutch",
170 u"Filipino; Pilipino": u"Filipino",
171 u"Greek, Modern (1453-)": u"Greek",
172 u"Kirghiz; Kyrgyz": u"Kirghiz",
173 u"Klingon; tlhIngan-Hol": u"Klingon",
174 u"Limburgan; Limburger; Limburgish": u"Limburgish",
175 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
176 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
177 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
178 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
179 u"Occitan (post 1500)": u"Occitan",
180 u"Panjabi; Punjabi": u"Punjabi",
181 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
182 u"Pushto; Pashto": u"Pashto",
183 u"Sinhala; Sinhalese": u"Sinhala",
184 u"Sotho, Southern": u"Sotho",
185 u"Spanish; Castilian": u"Spanish",
186 u"Uighur; Uyghur": u"Uighur",
187 }
188
190 """This attempts to simplify the given language code by ignoring country
191 codes, for example.
192
193 @see:
194 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
195 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
196 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
197 - U{http://www.w3.org/International/articles/language-tags/}
198 """
199 if not code:
200 return code
201
202 normalized = normalize_code(code)
203 separator = normalized.rfind('-')
204 if separator >= 0:
205 return code[:separator]
206 else:
207 return ""
208
209
210 expansion_factors = {
211 'af': 0.1,
212 'ar': -0.09,
213 'es': 0.21,
214 'fr': 0.28,
215 'it': 0.2,
216 }
217 """Source to target string length expansion factors."""
218
219 import gettext
220 import locale
221 import re
222 import os
223
224 iso639 = {}
225 """ISO 639 language codes"""
226 iso3166 = {}
227 """ISO 3166 country codes"""
228
229 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
230 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
231
233 """matches a languagecode to another, ignoring regions in the second"""
234 if languagecode is None:
235 return langcode_re.match(otherlanguagecode)
236 return languagecode == otherlanguagecode or \
237 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
238
239 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]+)\)$")
240
242 """Gives a function that can translate a language name, even in the form C{"language (country)"},
243 into the language with iso code langcode, or the system language if no language is specified."""
244 langfunc = gettext_lang(langcode)
245 countryfunc = gettext_country(langcode)
246
247 def handlelanguage(name):
248 match = dialect_name_re.match(name)
249 if match:
250 language, country = match.groups()
251 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
252 else:
253 return _fix_language_name(langfunc(name))
254
255 return handlelanguage
256
258 """Identify and replace some unsightly names present in iso-codes.
259
260 If the name is present in _fixed_names we assume it is untranslated and
261 we replace it with a more usable rendering. If the remaining part is long
262 and includes a semi-colon, we only take the text up to the semi-colon to
263 keep things neat."""
264 if name in _fixed_names:
265 return _fixed_names[name]
266 elif len(name) > 11:
267
268
269 split_point = name[5:].find(u';')
270 if split_point >= 0:
271 return name[:5+split_point]
272 return name
273
274
275 -def gettext_lang(langcode=None):
276 """Returns a gettext function to translate language names into the given
277 language, or the system language if no language is specified."""
278 if not langcode in iso639:
279 if not langcode:
280 langcode = ""
281 if os.name == "nt":
282
283 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
284 else:
285 t = gettext.translation('iso_639', fallback=True)
286 else:
287 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
288 iso639[langcode] = t.ugettext
289 return iso639[langcode]
290
291 -def gettext_country(langcode=None):
292 """Returns a gettext function to translate country names into the given
293 language, or the system language if no language is specified."""
294 if not langcode in iso3166:
295 if not langcode:
296 langcode = ""
297 if os.name == "nt":
298
299 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
300 else:
301 t = gettext.translation('iso_3166', fallback=True)
302 else:
303 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
304 iso3166[langcode] = t.ugettext
305 return iso3166[langcode]
306
308 """Return a unicode string in its normalized form
309
310 @param string: The string to be normalized
311 @param normal_form: NFC (default), NFD, NFKC, NFKD
312 @return: Normalized string
313 """
314 if string is None:
315 return None
316 else:
317 return unicodedata.normalize(normal_form, string)
318
320 """Ensures that the string is in unicode.
321
322 @param string: A text string
323 @type string: Unicode, String
324 @return: String converted to Unicode and normalized as needed.
325 @rtype: Unicode
326 """
327 if string is None:
328 return None
329 if isinstance(string, str):
330 encoding = getattr(string, "encoding", "utf-8")
331 string = string.decode(encoding)
332 elif isinstance(string, StringElem):
333 string = unicode(string)
334 return string
335
337 """Forces the string to unicode and does normalization."""
338 return normalize(forceunicode(string))
339
344
346 """Simplify language code to the most commonly used form for the
347 language, stripping country information for languages that tend
348 not to be localized differently for different countries"""
349 simpler = simplercode(language_code)
350 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
351 return language_code
352 else:
353 return simplify_to_common(simpler)
354