2 * Copyright (C) 2001-2006 Bastien Nocera <hadess@hadess.net>
4 * encoding list copied from gnome-terminal/encoding.c
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * The Totem project hereby grant permission for non-gpl compatible GStreamer
21 * plugins to be used and distributed together with GStreamer and Totem. This
22 * permission are above and beyond the permissions granted by the GPL license
23 * Totem is covered by.
25 * Monday 7th February 2005: Christian Schaller: Add exception clause.
26 * See license_change file for details.
31 #include <glib/gi18n-lib.h>
32 #include "totem-subtitle-encoding.h"
36 SUBTITLE_ENCODING_CURRENT_LOCALE,
38 SUBTITLE_ENCODING_ISO_8859_6,
39 SUBTITLE_ENCODING_IBM_864,
40 SUBTITLE_ENCODING_MAC_ARABIC,
41 SUBTITLE_ENCODING_WINDOWS_1256,
43 SUBTITLE_ENCODING_ARMSCII_8,
45 SUBTITLE_ENCODING_ISO_8859_4,
46 SUBTITLE_ENCODING_ISO_8859_13,
47 SUBTITLE_ENCODING_WINDOWS_1257,
49 SUBTITLE_ENCODING_ISO_8859_14,
51 SUBTITLE_ENCODING_ISO_8859_2,
52 SUBTITLE_ENCODING_IBM_852,
53 SUBTITLE_ENCODING_MAC_CE,
54 SUBTITLE_ENCODING_WINDOWS_1250,
56 SUBTITLE_ENCODING_GB18030,
57 SUBTITLE_ENCODING_GB2312,
58 SUBTITLE_ENCODING_GBK,
61 SUBTITLE_ENCODING_BIG5,
62 SUBTITLE_ENCODING_BIG5_HKSCS,
63 SUBTITLE_ENCODING_EUC_TW,
65 SUBTITLE_ENCODING_MAC_CROATIAN,
67 SUBTITLE_ENCODING_ISO_8859_5,
68 SUBTITLE_ENCODING_IBM_855,
69 SUBTITLE_ENCODING_ISO_IR_111,
70 SUBTITLE_ENCODING_KOI8_R,
71 SUBTITLE_ENCODING_MAC_CYRILLIC,
72 SUBTITLE_ENCODING_WINDOWS_1251,
74 SUBTITLE_ENCODING_CP_866,
76 SUBTITLE_ENCODING_MAC_UKRAINIAN,
77 SUBTITLE_ENCODING_KOI8_U,
79 SUBTITLE_ENCODING_GEOSTD8,
81 SUBTITLE_ENCODING_ISO_8859_7,
82 SUBTITLE_ENCODING_MAC_GREEK,
83 SUBTITLE_ENCODING_WINDOWS_1253,
85 SUBTITLE_ENCODING_MAC_GUJARATI,
87 SUBTITLE_ENCODING_MAC_GURMUKHI,
89 SUBTITLE_ENCODING_ISO_8859_8_I,
90 SUBTITLE_ENCODING_IBM_862,
91 SUBTITLE_ENCODING_MAC_HEBREW,
92 SUBTITLE_ENCODING_WINDOWS_1255,
94 SUBTITLE_ENCODING_ISO_8859_8,
96 SUBTITLE_ENCODING_MAC_DEVANAGARI,
98 SUBTITLE_ENCODING_MAC_ICELANDIC,
100 SUBTITLE_ENCODING_EUC_JP,
101 SUBTITLE_ENCODING_ISO_2022_JP,
102 SUBTITLE_ENCODING_SHIFT_JIS,
104 SUBTITLE_ENCODING_EUC_KR,
105 SUBTITLE_ENCODING_ISO_2022_KR,
106 SUBTITLE_ENCODING_JOHAB,
107 SUBTITLE_ENCODING_UHC,
109 SUBTITLE_ENCODING_ISO_8859_10,
111 SUBTITLE_ENCODING_MAC_FARSI,
113 SUBTITLE_ENCODING_ISO_8859_16,
114 SUBTITLE_ENCODING_MAC_ROMANIAN,
116 SUBTITLE_ENCODING_ISO_8859_3,
118 SUBTITLE_ENCODING_TIS_620,
120 SUBTITLE_ENCODING_ISO_8859_9,
121 SUBTITLE_ENCODING_IBM_857,
122 SUBTITLE_ENCODING_MAC_TURKISH,
123 SUBTITLE_ENCODING_WINDOWS_1254,
125 SUBTITLE_ENCODING_UTF_7,
126 SUBTITLE_ENCODING_UTF_8,
127 SUBTITLE_ENCODING_UTF_16,
128 SUBTITLE_ENCODING_UCS_2,
129 SUBTITLE_ENCODING_UCS_4,
131 SUBTITLE_ENCODING_ISO_8859_1,
132 SUBTITLE_ENCODING_ISO_8859_15,
133 SUBTITLE_ENCODING_IBM_850,
134 SUBTITLE_ENCODING_MAC_ROMAN,
135 SUBTITLE_ENCODING_WINDOWS_1252,
137 SUBTITLE_ENCODING_TCVN,
138 SUBTITLE_ENCODING_VISCII,
139 SUBTITLE_ENCODING_WINDOWS_1258,
141 SUBTITLE_ENCODING_LAST
142 } SubtitleEncodingIndex;
154 static SubtitleEncoding encodings[] = {
156 {SUBTITLE_ENCODING_CURRENT_LOCALE, TRUE,
157 NULL, N_("Current Locale")},
159 {SUBTITLE_ENCODING_ISO_8859_6, FALSE,
160 "ISO-8859-6", N_("Arabic")},
161 {SUBTITLE_ENCODING_IBM_864, FALSE,
162 "IBM864", N_("Arabic")},
163 {SUBTITLE_ENCODING_MAC_ARABIC, FALSE,
164 "MAC_ARABIC", N_("Arabic")},
165 {SUBTITLE_ENCODING_WINDOWS_1256, FALSE,
166 "WINDOWS-1256", N_("Arabic")},
168 {SUBTITLE_ENCODING_ARMSCII_8, FALSE,
169 "ARMSCII-8", N_("Armenian")},
171 {SUBTITLE_ENCODING_ISO_8859_4, FALSE,
172 "ISO-8859-4", N_("Baltic")},
173 {SUBTITLE_ENCODING_ISO_8859_13, FALSE,
174 "ISO-8859-13", N_("Baltic")},
175 {SUBTITLE_ENCODING_WINDOWS_1257, FALSE,
176 "WINDOWS-1257", N_("Baltic")},
178 {SUBTITLE_ENCODING_ISO_8859_14, FALSE,
179 "ISO-8859-14", N_("Celtic")},
181 {SUBTITLE_ENCODING_ISO_8859_2, FALSE,
182 "ISO-8859-2", N_("Central European")},
183 {SUBTITLE_ENCODING_IBM_852, FALSE,
184 "IBM852", N_("Central European")},
185 {SUBTITLE_ENCODING_MAC_CE, FALSE,
186 "MAC_CE", N_("Central European")},
187 {SUBTITLE_ENCODING_WINDOWS_1250, FALSE,
188 "WINDOWS-1250", N_("Central European")},
190 {SUBTITLE_ENCODING_GB18030, FALSE,
191 "GB18030", N_("Chinese Simplified")},
192 {SUBTITLE_ENCODING_GB2312, FALSE,
193 "GB2312", N_("Chinese Simplified")},
194 {SUBTITLE_ENCODING_GBK, FALSE,
195 "GBK", N_("Chinese Simplified")},
196 {SUBTITLE_ENCODING_HZ, FALSE,
197 "HZ", N_("Chinese Simplified")},
199 {SUBTITLE_ENCODING_BIG5, FALSE,
200 "BIG5", N_("Chinese Traditional")},
201 {SUBTITLE_ENCODING_BIG5_HKSCS, FALSE,
202 "BIG5-HKSCS", N_("Chinese Traditional")},
203 {SUBTITLE_ENCODING_EUC_TW, FALSE,
204 "EUC-TW", N_("Chinese Traditional")},
206 {SUBTITLE_ENCODING_MAC_CROATIAN, FALSE,
207 "MAC_CROATIAN", N_("Croatian")},
209 {SUBTITLE_ENCODING_ISO_8859_5, FALSE,
210 "ISO-8859-5", N_("Cyrillic")},
211 {SUBTITLE_ENCODING_IBM_855, FALSE,
212 "IBM855", N_("Cyrillic")},
213 {SUBTITLE_ENCODING_ISO_IR_111, FALSE,
214 "ISO-IR-111", N_("Cyrillic")},
215 {SUBTITLE_ENCODING_KOI8_R, FALSE,
216 "KOI8-R", N_("Cyrillic")},
217 {SUBTITLE_ENCODING_MAC_CYRILLIC, FALSE,
218 "MAC-CYRILLIC", N_("Cyrillic")},
219 {SUBTITLE_ENCODING_WINDOWS_1251, FALSE,
220 "WINDOWS-1251", N_("Cyrillic")},
222 {SUBTITLE_ENCODING_CP_866, FALSE,
223 "CP866", N_("Cyrillic/Russian")},
225 {SUBTITLE_ENCODING_MAC_UKRAINIAN, FALSE,
226 "MAC_UKRAINIAN", N_("Cyrillic/Ukrainian")},
227 {SUBTITLE_ENCODING_KOI8_U, FALSE,
228 "KOI8-U", N_("Cyrillic/Ukrainian")},
230 {SUBTITLE_ENCODING_GEOSTD8, FALSE,
231 "GEORGIAN-PS", N_("Georgian")},
233 {SUBTITLE_ENCODING_ISO_8859_7, FALSE,
234 "ISO-8859-7", N_("Greek")},
235 {SUBTITLE_ENCODING_MAC_GREEK, FALSE,
236 "MAC_GREEK", N_("Greek")},
237 {SUBTITLE_ENCODING_WINDOWS_1253, FALSE,
238 "WINDOWS-1253", N_("Greek")},
240 {SUBTITLE_ENCODING_MAC_GUJARATI, FALSE,
241 "MAC_GUJARATI", N_("Gujarati")},
243 {SUBTITLE_ENCODING_MAC_GURMUKHI, FALSE,
244 "MAC_GURMUKHI", N_("Gurmukhi")},
246 {SUBTITLE_ENCODING_ISO_8859_8_I, FALSE,
247 "ISO-8859-8-I", N_("Hebrew")},
248 {SUBTITLE_ENCODING_IBM_862, FALSE,
249 "IBM862", N_("Hebrew")},
250 {SUBTITLE_ENCODING_MAC_HEBREW, FALSE,
251 "MAC_HEBREW", N_("Hebrew")},
252 {SUBTITLE_ENCODING_WINDOWS_1255, FALSE,
253 "WINDOWS-1255", N_("Hebrew")},
255 {SUBTITLE_ENCODING_ISO_8859_8, FALSE,
256 "ISO-8859-8", N_("Hebrew Visual")},
258 {SUBTITLE_ENCODING_MAC_DEVANAGARI, FALSE,
259 "MAC_DEVANAGARI", N_("Hindi")},
261 {SUBTITLE_ENCODING_MAC_ICELANDIC, FALSE,
262 "MAC_ICELANDIC", N_("Icelandic")},
264 {SUBTITLE_ENCODING_EUC_JP, FALSE,
265 "EUC-JP", N_("Japanese")},
266 {SUBTITLE_ENCODING_ISO_2022_JP, FALSE,
267 "ISO2022JP", N_("Japanese")},
268 {SUBTITLE_ENCODING_SHIFT_JIS, FALSE,
269 "SHIFT-JIS", N_("Japanese")},
271 {SUBTITLE_ENCODING_EUC_KR, FALSE,
272 "EUC-KR", N_("Korean")},
273 {SUBTITLE_ENCODING_ISO_2022_KR, FALSE,
274 "ISO2022KR", N_("Korean")},
275 {SUBTITLE_ENCODING_JOHAB, FALSE,
276 "JOHAB", N_("Korean")},
277 {SUBTITLE_ENCODING_UHC, FALSE,
278 "UHC", N_("Korean")},
280 {SUBTITLE_ENCODING_ISO_8859_10, FALSE,
281 "ISO-8859-10", N_("Nordic")},
283 {SUBTITLE_ENCODING_MAC_FARSI, FALSE,
284 "MAC_FARSI", N_("Persian")},
286 {SUBTITLE_ENCODING_ISO_8859_16, FALSE,
287 "ISO-8859-16", N_("Romanian")},
288 {SUBTITLE_ENCODING_MAC_ROMANIAN, FALSE,
289 "MAC_ROMANIAN", N_("Romanian")},
291 {SUBTITLE_ENCODING_ISO_8859_3, FALSE,
292 "ISO-8859-3", N_("South European")},
294 {SUBTITLE_ENCODING_TIS_620, FALSE,
295 "TIS-620", N_("Thai")},
297 {SUBTITLE_ENCODING_ISO_8859_9, FALSE,
298 "ISO-8859-9", N_("Turkish")},
299 {SUBTITLE_ENCODING_IBM_857, FALSE,
300 "IBM857", N_("Turkish")},
301 {SUBTITLE_ENCODING_MAC_TURKISH, FALSE,
302 "MAC_TURKISH", N_("Turkish")},
303 {SUBTITLE_ENCODING_WINDOWS_1254, FALSE,
304 "WINDOWS-1254", N_("Turkish")},
306 {SUBTITLE_ENCODING_UTF_7, FALSE,
307 "UTF-7", N_("Unicode")},
308 {SUBTITLE_ENCODING_UTF_8, FALSE,
309 "UTF-8", N_("Unicode")},
310 {SUBTITLE_ENCODING_UTF_16, FALSE,
311 "UTF-16", N_("Unicode")},
312 {SUBTITLE_ENCODING_UCS_2, FALSE,
313 "UCS-2", N_("Unicode")},
314 {SUBTITLE_ENCODING_UCS_4, FALSE,
315 "UCS-4", N_("Unicode")},
317 {SUBTITLE_ENCODING_ISO_8859_1, FALSE,
318 "ISO-8859-1", N_("Western")},
319 {SUBTITLE_ENCODING_ISO_8859_15, FALSE,
320 "ISO-8859-15", N_("Western")},
321 {SUBTITLE_ENCODING_IBM_850, FALSE,
322 "IBM850", N_("Western")},
323 {SUBTITLE_ENCODING_MAC_ROMAN, FALSE,
324 "MAC_ROMAN", N_("Western")},
325 {SUBTITLE_ENCODING_WINDOWS_1252, FALSE,
326 "WINDOWS-1252", N_("Western")},
328 {SUBTITLE_ENCODING_TCVN, FALSE,
329 "TCVN", N_("Vietnamese")},
330 {SUBTITLE_ENCODING_VISCII, FALSE,
331 "VISCII", N_("Vietnamese")},
332 {SUBTITLE_ENCODING_WINDOWS_1258, FALSE,
333 "WINDOWS-1258", N_("Vietnamese")}
336 static const SubtitleEncoding *
337 find_encoding_by_charset (const char *charset)
341 i = 1; /* skip current locale */
342 while (i < SUBTITLE_ENCODING_LAST) {
343 if (strcasecmp (charset, encodings[i].charset) == 0)
344 return &encodings[i];
349 if (strcasecmp (charset,
350 encodings[SUBTITLE_ENCODING_CURRENT_LOCALE].charset) == 0)
351 return &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
357 subtitle_encoding_init (void)
360 gsize bytes_read, bytes_written;
362 gchar ascii_sample[96];
364 g_get_charset ((const char **)
365 &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE].charset);
367 g_assert (G_N_ELEMENTS (encodings) == SUBTITLE_ENCODING_LAST);
369 /* Initialize the sample text with all of the printing ASCII characters
370 * from space (32) to the tilde (126), 95 in all. */
371 for (i = 0; i < (int) sizeof (ascii_sample); i++)
372 ascii_sample[i] = i + 32;
374 ascii_sample[sizeof (ascii_sample) - 1] = '\0';
377 while (i < SUBTITLE_ENCODING_LAST) {
381 g_assert (encodings[i].index == i);
383 /* Translate the names */
384 encodings[i].name = _(encodings[i].name);
386 /* Test that the encoding is a proper superset of ASCII (which naive
387 * apps are going to use anyway) by attempting to validate the text
388 * using the current encoding. This also flushes out any encodings
389 * which the underlying GIConv implementation can't support.
391 converted = g_convert (ascii_sample, sizeof (ascii_sample) - 1,
392 encodings[i].charset, encodings[i].charset,
393 &bytes_read, &bytes_written, NULL);
395 /* The encoding is only valid if ASCII passes through cleanly. */
396 if (i == SUBTITLE_ENCODING_CURRENT_LOCALE)
397 encodings[i].valid = TRUE;
400 (bytes_read == (sizeof (ascii_sample) - 1)) &&
401 (converted != NULL) && (strcmp (converted, ascii_sample) == 0);
403 #ifdef DEBUG_ENCODINGS
404 if (!encodings[i].valid) {
405 g_print ("Rejecting encoding %s as invalid:\n", encodings[i].charset);
406 g_print (" input \"%s\"\n", ascii_sample);
407 g_print (" output \"%s\"\n\n", converted ? converted : "(null)");
411 /* Discard the converted string. */
419 subtitle_encoding_get_index (const char *charset)
421 const SubtitleEncoding *e;
423 e = find_encoding_by_charset (charset);
427 return SUBTITLE_ENCODING_CURRENT_LOCALE;
431 subtitle_encoding_get_charset (int index_)
433 const SubtitleEncoding *e;
435 if (index_ >= SUBTITLE_ENCODING_LAST)
436 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
437 else if (index_ < SUBTITLE_ENCODING_CURRENT_LOCALE)
438 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
439 else if (!encodings[index_].valid)
440 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
442 e = &encodings[index_];
453 compare (GtkTreeModel * model, GtkTreeIter * a, GtkTreeIter * b, gpointer data)
455 gchar *str_a, *str_b;
458 gtk_tree_model_get (model, a, NAME_COL, &str_a, -1);
459 gtk_tree_model_get (model, b, NAME_COL, &str_b, -1);
461 result = strcmp (str_a, str_b);
470 is_encoding_sensitive (GtkCellLayout * cell_layout,
471 GtkCellRenderer * cell,
472 GtkTreeModel * tree_model, GtkTreeIter * iter, gpointer data)
477 sensitive = !gtk_tree_model_iter_has_child (tree_model, iter);
478 g_object_set (cell, "sensitive", sensitive, NULL);
481 static GtkTreeModel *
482 subtitle_encoding_create_store (void)
485 const gchar *lastlang = "";
486 GtkTreeIter iter, iter2;
490 store = gtk_tree_store_new (2, G_TYPE_INT, G_TYPE_STRING);
492 for (i = 0; i < SUBTITLE_ENCODING_LAST; i++) {
493 if (encodings[i].valid) {
494 if (strcmp (lastlang, encodings[i].name)) {
495 lastlang = encodings[i].name;
496 gtk_tree_store_append (store, &iter, NULL);
497 gtk_tree_store_set (store, &iter, INDEX_COL,
498 -1, NAME_COL, lastlang, -1);
500 label = g_strdup_printf("%s (%s)", lastlang, encodings[i].charset);
501 gtk_tree_store_append (store, &iter2, &iter);
502 gtk_tree_store_set (store, &iter2, INDEX_COL,
503 encodings[i].index, NAME_COL, label, -1);
507 gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (store),
508 compare, NULL, NULL);
509 gtk_tree_sortable_set_sort_column_id (GTK_TREE_SORTABLE (store),
510 NAME_COL, GTK_SORT_ASCENDING);
511 return GTK_TREE_MODEL (store);
515 subtitle_encoding_combo_render (GtkComboBox * combo)
517 GtkCellRenderer *renderer;
519 renderer = gtk_cell_renderer_text_new ();
520 gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (combo), renderer, TRUE);
521 gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (combo), renderer,
522 "text", NAME_COL, NULL);
523 gtk_cell_layout_set_cell_data_func (GTK_CELL_LAYOUT (combo),
524 renderer, is_encoding_sensitive, NULL, NULL);
528 totem_subtitle_encoding_get_selected (GtkComboBox * combo)
534 model = gtk_combo_box_get_model (combo);
535 if (gtk_combo_box_get_active_iter (combo, &iter)) {
536 gtk_tree_model_get (model, &iter, INDEX_COL, &index_, -1);
540 return subtitle_encoding_get_charset (index_);
544 totem_subtitle_encoding_set (GtkComboBox * combo, const char *encoding)
547 GtkTreeIter iter, iter2;
550 g_return_if_fail (encoding != NULL);
552 model = gtk_combo_box_get_model (combo);
553 index_ = subtitle_encoding_get_index (encoding);
554 gtk_tree_model_get_iter_first (model, &iter);
556 if (!gtk_tree_model_iter_has_child (model, &iter))
558 if (!gtk_tree_model_iter_children (model, &iter2, &iter))
561 gtk_tree_model_get (model, &iter2, INDEX_COL, &i, -1);
564 } while (gtk_tree_model_iter_next (model, &iter2));
567 } while (gtk_tree_model_iter_next (model, &iter));
568 gtk_combo_box_set_active_iter (combo, &iter2);
572 totem_subtitle_encoding_init (GtkComboBox *combo)
575 subtitle_encoding_init ();
576 model = subtitle_encoding_create_store ();
577 gtk_combo_box_set_model (combo, model);
578 g_object_unref (model);
579 subtitle_encoding_combo_render (combo);
583 * vim: sw=2 ts=8 cindent noai bs=2