1 __doc__ = """GNUmed client internationalization/localization.
2
3 All i18n/l10n issues should be handled through this modules.
4
5 Theory of operation:
6
7 To activate proper locale settings and translation services you need to
8
9 - import this module
10 - call activate_locale()
11 - call install_domain()
12
13 The translating method gettext.gettext() will then be
14 installed into the global (!) namespace as _(). Your own
15 modules thus need not do _anything_ (not even import gmI18N)
16 to have _() available to them for translating strings. You
17 need to make sure, however, that gmI18N is imported in your
18 main module before any of the modules using it. In order to
19 resolve circular references involving modules that
20 absolutely _have_ to be imported before this module you can
21 explicitly import gmI18N into them at the very beginning.
22
23 The text domain (i.e. the name of the message catalog file)
24 is derived from the name of the main executing script unless
25 explicitly passed to install_domain(). The language you
26 want to translate to is derived from environment variables
27 by the locale system unless explicitly passed to
28 install_domain().
29
30 This module searches for message catalog files in 3 main locations:
31
32 - standard POSIX places (/usr/share/locale/ ...)
33 - below "${YOURAPPNAME_DIR}/po/"
34 - below "<directory of binary of your app>/../po/"
35
36 For DOS/Windows I don't know of standard places so probably
37 only the last option will work. I don't know a thing about
38 classic Mac behaviour. New Macs are POSIX, of course.
39
40 It will then try to install candidates and *verify* whether
41 the translation works by checking for the translation of a
42 tag within itself (this is similar to the self-compiling
43 compiler inserting a backdoor into its self-compiled
44 copies).
45
46 If none of this works it will fall back to making _() a noop.
47
48 @copyright: authors
49 """
50
51 __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>"
52 __license__ = "GPL v2 or later (details at http://www.gnu.org)"
53
54
55
56 import sys
57 import os.path
58 import os
59 import locale
60 import gettext
61 import logging
62 import codecs
63 import builtins
64 import re as regex
65
66
67 builtins._ = lambda x:x
68
69 _log = logging.getLogger('gm.i18n')
70
71 system_locale = ''
72 system_locale_level = {}
73
74 _translate_original = lambda x:x
75 _substitutes_regex = regex.compile(r'%\(.+?\)s')
76
77
78
79
80
81
82 __orig_tag__ = 'Translate this or i18n into <en_EN> will not work properly !'
83
84
85
86
106
107
109 _setlocale_categories = {}
110 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
111 try:
112 _setlocale_categories[category] = getattr(locale, category)
113 except:
114 _log.warning('this OS does not have locale.%s', category)
115
116 _getlocale_categories = {}
117 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
118 try:
119 _getlocale_categories[category] = getattr(locale, category)
120 except:
121 pass
122
123 if message is not None:
124 _log.debug(message)
125
126 _log.debug('current locale settings:')
127 _log.debug('locale.getlocale(): %s' % str(locale.getlocale()))
128 for category in _getlocale_categories.keys():
129 _log.debug('locale.getlocale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category])))
130
131 for category in _setlocale_categories.keys():
132 _log.debug('(locale.setlocale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category])))
133
134 try:
135 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale()))
136 except ValueError:
137 _log.exception('the OS locale setup seems faulty')
138
139 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):')
140 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False)
141 loc_enc = locale.getlocale()[1]
142 py_str_enc = sys.getdefaultencoding()
143 sys_fs_enc = sys.getfilesystemencoding()
144 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc)
145 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc)
146 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc)
147 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc)
148 if loc_enc is not None:
149 loc_enc = loc_enc.upper()
150 loc_enc_compare = loc_enc.replace('-', '')
151 else:
152 loc_enc_compare = loc_enc
153 if pref_loc_enc.upper().replace('-', '') != loc_enc_compare:
154 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc))
155 _log.warning('this might lead to encoding errors')
156 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]:
157 if enc is not None:
158 try:
159 codecs.lookup(enc)
160 _log.debug('<codecs> module CAN handle encoding [%s]' % enc)
161 except LookupError:
162 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc)
163 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"')
164
165 _log.debug('locale related environment variables (${LANG} is typically used):')
166 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split():
167 try:
168 _log.debug('${%s}=%s' % (var, os.environ[var]))
169 except KeyError:
170 _log.debug('${%s} not set' % (var))
171
172 _log.debug('database of locale conventions:')
173 data = locale.localeconv()
174 for key in data.keys():
175 if loc_enc is None:
176 _log.debug('locale.localeconv(%s): %s', key, data[key])
177 else:
178 try:
179 _log.debug('locale.localeconv(%s): %s', key, str(data[key]))
180 except UnicodeDecodeError:
181 _log.debug('locale.localeconv(%s): %s', key, str(data[key], loc_enc))
182 _nl_langinfo_categories = {}
183 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split():
184 try:
185 _nl_langinfo_categories[category] = getattr(locale, category)
186 except:
187 _log.warning('this OS does not support nl_langinfo category locale.%s' % category)
188 try:
189 for category in _nl_langinfo_categories.keys():
190 if loc_enc is None:
191 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category])))
192 else:
193 try:
194 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category])))
195 except UnicodeDecodeError:
196 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc))
197 except:
198 _log.exception('this OS does not support nl_langinfo')
199
200 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
201
202
204 """This wraps _().
205
206 It protects against translation errors such as a different number of "%s".
207 """
208 translation = _translate_original(term)
209
210
211 if translation.count('%s') != term.count('%s'):
212 _log.error('count("%s") mismatch, returning untranslated string')
213 _log.error('original : %s', term)
214 _log.error('translation: %s', translation)
215 return term
216
217 substitution_keys_in_original = set(_substitutes_regex.findall(term))
218 substitution_keys_in_translation = set(_substitutes_regex.findall(translation))
219
220 if not substitution_keys_in_translation.issubset(substitution_keys_in_original):
221 _log.error('"%(...)s" keys in translation not a subset of keys in original, returning untranslated string')
222 _log.error('original : %s', term)
223 _log.error('translation: %s', translation)
224 return term
225
226 return translation
227
228
229
230
232 """Get system locale from environment."""
233 global system_locale
234
235 __log_locale_settings('unmodified startup locale settings (should be [C])')
236
237
238 loc, enc = None, None
239 try:
240
241 loc, loc_enc = locale.getlocale()
242 if loc is None:
243 loc = locale.setlocale(locale.LC_ALL, '')
244 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc)
245 else:
246 _log.info('user-default locale already activated')
247 loc, loc_enc = locale.getlocale()
248 except AttributeError:
249 _log.exception('Windows does not support locale.LC_ALL')
250 except:
251 _log.exception('error activating user-default locale')
252
253 __log_locale_settings('locale settings after activating user-default locale')
254
255
256 if loc in [None, 'C']:
257 _log.error('the current system locale is still [None] or [C], assuming [en_EN]')
258 system_locale = "en_EN"
259 else:
260 system_locale = loc
261
262
263 __split_locale_into_levels()
264
265 return True
266
267
268 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
269 """Install a text domain suitable for the main script."""
270
271
272 if domain is None:
273 _log.info('domain not specified, deriving from script name')
274
275 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0]
276 _log.info('text domain is [%s]' % domain)
277
278
279 _log.debug('searching message catalog file for system locale [%s]' % system_locale)
280
281 _log.debug('checking process environment:')
282 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']:
283 tmp = os.getenv(env_var)
284 if env_var is None:
285 _log.debug(' ${%s} not set' % env_var)
286 else:
287 _log.debug(' ${%s} = [%s]' % (env_var, tmp))
288
289
290 lang_candidates = []
291
292
293
294 lang_candidates.append(language)
295 if language is not None:
296 _log.info('explicit request for target language [%s]' % language)
297
298 lang_candidates.append(None)
299
300
301 if locale.getlocale()[0] not in lang_candidates:
302 lang_candidates.append(locale.getlocale()[0])
303
304
305 if locale.getdefaultlocale()[0] not in lang_candidates:
306 lang_candidates.append(locale.getdefaultlocale()[0])
307
308 _log.debug('languages to try for translation: %s (None: implicit system default)', lang_candidates)
309 initial_lang = os.getenv('LANG')
310 _log.info('initial ${LANG} setting: %s', initial_lang)
311
312
313 for lang_candidate in lang_candidates:
314
315 _log.debug('resetting ${LANG} to initial user default [%s]', initial_lang)
316 if initial_lang is None:
317 del os.environ['LANG']
318 lang2log = '$LANG=<>'
319 else:
320 os.environ['LANG'] = initial_lang
321 lang2log = '$LANG(default)=%s' % initial_lang
322
323 if lang_candidate is not None:
324 _log.info('explicitely overriding system locale language [%s] by setting ${LANG} to [%s]', initial_lang, lang_candidate)
325 os.environ['LANG'] = lang_candidate
326 lang2log = '$LANG(explicit)=%s' % lang_candidate
327
328 if __install_domain(domain = domain, prefer_local_catalog = prefer_local_catalog, language = lang2log):
329 return True
330
331
332 _log.warning("falling back to NullTranslations() class")
333
334 dummy = gettext.NullTranslations()
335 dummy.install()
336 return True
337
338
339 -def __install_domain(domain, prefer_local_catalog, language='?'):
340
341
342
343 candidate_PO_dirs = []
344
345
346 if prefer_local_catalog:
347 _log.debug('prioritizing local message catalog')
348
349
350
351
352 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
353 _log.debug('looking one level above binary install directory: %s', loc_dir)
354 candidate_PO_dirs.append(loc_dir)
355
356 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po'))
357 _log.debug('looking in binary install directory: %s', loc_dir)
358 candidate_PO_dirs.append(loc_dir)
359
360
361 if os.name == 'posix':
362 _log.debug('system is POSIX, looking in standard locations (see Python Manual)')
363
364
365 candidate_PO_dirs.append(gettext.bindtextdomain(domain))
366 else:
367 _log.debug('No use looking in standard POSIX locations - not a POSIX system.')
368
369
370 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper()
371 _log.debug('looking at ${%s}' % env_key)
372 if env_key in os.environ:
373 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po'))
374 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir))
375 candidate_PO_dirs.append(loc_dir)
376 else:
377 _log.info("${%s} not set" % env_key)
378
379
380 if not prefer_local_catalog:
381
382
383
384
385 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
386 _log.debug('looking above binary install directory [%s]' % loc_dir)
387 candidate_PO_dirs.append(loc_dir)
388
389 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' ))
390 _log.debug('looking in binary install directory [%s]' % loc_dir)
391 candidate_PO_dirs.append(loc_dir)
392
393
394 for candidate_PO_dir in candidate_PO_dirs:
395 _log.debug('trying with (base=%s, %s, domain=%s)', candidate_PO_dir, language, domain)
396 _log.debug(' -> %s.mo', os.path.join(candidate_PO_dir, language, domain))
397 if not os.path.exists(candidate_PO_dir):
398 continue
399 try:
400 gettext.install(domain, candidate_PO_dir)
401 except:
402 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate_PO_dir)
403 continue
404 global _
405
406 if _(__orig_tag__) == __orig_tag__:
407 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
408 continue
409 else:
410 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
411 global _translate_original
412 _translate_original = builtins._
413 builtins._ = _translate_protected
414 return True
415
416 return False
417
418
419 _encoding_mismatch_already_logged = False
420 _current_encoding = None
421
423 """Try to get a sane encoding.
424
425 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not
426 have the desired effect, so that locale.getlocale()[1]
427 still returns None. So in that case try to fallback to
428 locale.getpreferredencoding().
429
430 <sys.getdefaultencoding()>
431 - what Python itself uses to convert string <-> unicode
432 when no other encoding was specified
433 - ascii by default
434 - can be set in site.py and sitecustomize.py
435 <locale.getlocale()[1]>
436 - what the current locale is *actually* using
437 as the encoding for text conversion
438 <locale.getpreferredencoding()>
439 - what the current locale would *recommend* using
440 as the encoding for text conversion
441 """
442 global _current_encoding
443 if _current_encoding is not None:
444 return _current_encoding
445
446 enc = sys.getdefaultencoding()
447 if enc != 'ascii':
448 _current_encoding = enc
449 return _current_encoding
450
451 enc = locale.getlocale()[1]
452 if enc is not None:
453 _current_encoding = enc
454 return _current_encoding
455
456 global _encoding_mismatch_already_logged
457 if not _encoding_mismatch_already_logged:
458 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale')
459 _encoding_mismatch_already_logged = True
460
461 return locale.getpreferredencoding(do_setlocale=False)
462
463
464
465
466 if __name__ == "__main__":
467
468 if len(sys.argv) == 1:
469 sys.exit()
470
471 if sys.argv[1] != 'test':
472 sys.exit()
473
474 logging.basicConfig(level = logging.DEBUG)
475
477 candidates = [
478
479
480
481
482
483
484 ('\u270d', '\u270d'),
485 ('4', '\u270d' + '4'),
486 ('4.4', '\u270d' + '4.4'),
487 ('44', '\u270d' + '44'),
488 ('4', '\u270d' + '9'),
489 ('4', '\u270d' + '2'),
490
491
492
493 ]
494 for cands in candidates:
495 print(cands[0], '<vs>', cands[1], '=', locale.strcoll(cands[0], cands[1]))
496
497
498
499 print("======================================================================")
500 print("GNUmed i18n")
501 print("")
502 print("authors:", __author__)
503 print("license:", __license__)
504 print("======================================================================")
505
506 activate_locale()
507 print("system locale: ", system_locale, "; levels:", system_locale_level)
508 print("likely encoding:", get_encoding())
509
510 if len(sys.argv) > 2:
511 install_domain(domain = sys.argv[2])
512 else:
513 install_domain()
514
515 test_strcoll()
516
517
518
519
520
521
522 tmp = _('Translate this or i18n into <en_EN> will not work properly !')
523
524
525