xed/xedit/xedit-encodings.c

474 lines
11 KiB
C
Raw Permalink Normal View History

2011-11-07 13:46:58 -06:00
/*
2016-01-25 08:13:49 -06:00
* xedit-encodings.c
* This file is part of xedit
2011-11-07 13:46:58 -06:00
*
* Copyright (C) 2002-2005 Paolo Maggi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
2012-11-18 19:54:49 -06:00
* Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
2011-11-07 13:46:58 -06:00
*/
/*
2016-01-25 08:13:49 -06:00
* Modified by the xedit Team, 2002-2005. See the AUTHORS file for a
* list of people on the xedit Team.
2011-11-07 13:46:58 -06:00
* See the ChangeLog files for a list of changes.
*
* $Id$
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <glib/gi18n.h>
2016-01-25 08:13:49 -06:00
#include "xedit-encodings.h"
2011-11-07 13:46:58 -06:00
2016-01-25 08:13:49 -06:00
struct _XeditEncoding
2011-11-07 13:46:58 -06:00
{
gint index;
const gchar *charset;
const gchar *name;
};
/*
* The original versions of the following tables are taken from profterm
*
* Copyright (C) 2002 Red Hat, Inc.
*/
typedef enum
{
2016-01-25 08:13:49 -06:00
XEDIT_ENCODING_ISO_8859_1,
XEDIT_ENCODING_ISO_8859_2,
XEDIT_ENCODING_ISO_8859_3,
XEDIT_ENCODING_ISO_8859_4,
XEDIT_ENCODING_ISO_8859_5,
XEDIT_ENCODING_ISO_8859_6,
XEDIT_ENCODING_ISO_8859_7,
XEDIT_ENCODING_ISO_8859_8,
XEDIT_ENCODING_ISO_8859_9,
XEDIT_ENCODING_ISO_8859_10,
XEDIT_ENCODING_ISO_8859_13,
XEDIT_ENCODING_ISO_8859_14,
XEDIT_ENCODING_ISO_8859_15,
XEDIT_ENCODING_ISO_8859_16,
XEDIT_ENCODING_UTF_7,
XEDIT_ENCODING_UTF_16,
XEDIT_ENCODING_UTF_16_BE,
XEDIT_ENCODING_UTF_16_LE,
XEDIT_ENCODING_UTF_32,
XEDIT_ENCODING_UCS_2,
XEDIT_ENCODING_UCS_4,
XEDIT_ENCODING_ARMSCII_8,
XEDIT_ENCODING_BIG5,
XEDIT_ENCODING_BIG5_HKSCS,
XEDIT_ENCODING_CP_866,
XEDIT_ENCODING_EUC_JP,
XEDIT_ENCODING_EUC_JP_MS,
XEDIT_ENCODING_CP932,
XEDIT_ENCODING_EUC_KR,
XEDIT_ENCODING_EUC_TW,
XEDIT_ENCODING_GB18030,
XEDIT_ENCODING_GB2312,
XEDIT_ENCODING_GBK,
XEDIT_ENCODING_GEOSTD8,
XEDIT_ENCODING_IBM_850,
XEDIT_ENCODING_IBM_852,
XEDIT_ENCODING_IBM_855,
XEDIT_ENCODING_IBM_857,
XEDIT_ENCODING_IBM_862,
XEDIT_ENCODING_IBM_864,
XEDIT_ENCODING_ISO_2022_JP,
XEDIT_ENCODING_ISO_2022_KR,
XEDIT_ENCODING_ISO_IR_111,
XEDIT_ENCODING_JOHAB,
XEDIT_ENCODING_KOI8_R,
XEDIT_ENCODING_KOI8__R,
XEDIT_ENCODING_KOI8_U,
2011-11-07 13:46:58 -06:00
2016-01-25 08:13:49 -06:00
XEDIT_ENCODING_SHIFT_JIS,
XEDIT_ENCODING_TCVN,
XEDIT_ENCODING_TIS_620,
XEDIT_ENCODING_UHC,
XEDIT_ENCODING_VISCII,
XEDIT_ENCODING_WINDOWS_1250,
XEDIT_ENCODING_WINDOWS_1251,
XEDIT_ENCODING_WINDOWS_1252,
XEDIT_ENCODING_WINDOWS_1253,
XEDIT_ENCODING_WINDOWS_1254,
XEDIT_ENCODING_WINDOWS_1255,
XEDIT_ENCODING_WINDOWS_1256,
XEDIT_ENCODING_WINDOWS_1257,
XEDIT_ENCODING_WINDOWS_1258,
XEDIT_ENCODING_LAST,
XEDIT_ENCODING_UTF_8,
XEDIT_ENCODING_UNKNOWN
2011-11-07 13:46:58 -06:00
2016-01-25 08:13:49 -06:00
} XeditEncodingIndex;
2011-11-07 13:46:58 -06:00
2016-01-25 08:13:49 -06:00
static const XeditEncoding utf8_encoding = {
XEDIT_ENCODING_UTF_8,
2011-11-07 13:46:58 -06:00
"UTF-8",
N_("Unicode")
};
2016-01-25 08:13:49 -06:00
/* initialized in xedit_encoding_lazy_init() */
static XeditEncoding unknown_encoding = {
XEDIT_ENCODING_UNKNOWN,
2011-11-07 13:46:58 -06:00
NULL,
NULL
};
2016-01-25 08:13:49 -06:00
static const XeditEncoding encodings [] = {
2011-11-07 13:46:58 -06:00
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_1,
2011-11-07 13:46:58 -06:00
"ISO-8859-1", N_("Western") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_2,
2011-11-07 13:46:58 -06:00
"ISO-8859-2", N_("Central European") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_3,
2011-11-07 13:46:58 -06:00
"ISO-8859-3", N_("South European") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_4,
2011-11-07 13:46:58 -06:00
"ISO-8859-4", N_("Baltic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_5,
2011-11-07 13:46:58 -06:00
"ISO-8859-5", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_6,
2011-11-07 13:46:58 -06:00
"ISO-8859-6", N_("Arabic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_7,
2011-11-07 13:46:58 -06:00
"ISO-8859-7", N_("Greek") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_8,
2011-11-07 13:46:58 -06:00
"ISO-8859-8", N_("Hebrew Visual") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_9,
2011-11-07 13:46:58 -06:00
"ISO-8859-9", N_("Turkish") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_10,
2011-11-07 13:46:58 -06:00
"ISO-8859-10", N_("Nordic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_13,
2011-11-07 13:46:58 -06:00
"ISO-8859-13", N_("Baltic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_14,
2011-11-07 13:46:58 -06:00
"ISO-8859-14", N_("Celtic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_15,
2011-11-07 13:46:58 -06:00
"ISO-8859-15", N_("Western") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_8859_16,
2011-11-07 13:46:58 -06:00
"ISO-8859-16", N_("Romanian") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UTF_7,
2011-11-07 13:46:58 -06:00
"UTF-7", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UTF_16,
2011-11-07 13:46:58 -06:00
"UTF-16", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UTF_16_BE,
2011-11-07 13:46:58 -06:00
"UTF-16BE", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UTF_16_LE,
2011-11-07 13:46:58 -06:00
"UTF-16LE", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UTF_32,
2011-11-07 13:46:58 -06:00
"UTF-32", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UCS_2,
2011-11-07 13:46:58 -06:00
"UCS-2", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UCS_4,
2011-11-07 13:46:58 -06:00
"UCS-4", N_("Unicode") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ARMSCII_8,
2011-11-07 13:46:58 -06:00
"ARMSCII-8", N_("Armenian") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_BIG5,
2011-11-07 13:46:58 -06:00
"BIG5", N_("Chinese Traditional") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_BIG5_HKSCS,
2011-11-07 13:46:58 -06:00
"BIG5-HKSCS", N_("Chinese Traditional") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_CP_866,
2011-11-07 13:46:58 -06:00
"CP866", N_("Cyrillic/Russian") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_EUC_JP,
2011-11-07 13:46:58 -06:00
"EUC-JP", N_("Japanese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_EUC_JP_MS,
2011-11-07 13:46:58 -06:00
"EUC-JP-MS", N_("Japanese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_CP932,
2011-11-07 13:46:58 -06:00
"CP932", N_("Japanese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_EUC_KR,
2011-11-07 13:46:58 -06:00
"EUC-KR", N_("Korean") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_EUC_TW,
2011-11-07 13:46:58 -06:00
"EUC-TW", N_("Chinese Traditional") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_GB18030,
2011-11-07 13:46:58 -06:00
"GB18030", N_("Chinese Simplified") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_GB2312,
2011-11-07 13:46:58 -06:00
"GB2312", N_("Chinese Simplified") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_GBK,
2011-11-07 13:46:58 -06:00
"GBK", N_("Chinese Simplified") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_GEOSTD8,
2011-11-07 13:46:58 -06:00
"GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_850,
2011-11-07 13:46:58 -06:00
"IBM850", N_("Western") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_852,
2011-11-07 13:46:58 -06:00
"IBM852", N_("Central European") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_855,
2011-11-07 13:46:58 -06:00
"IBM855", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_857,
2011-11-07 13:46:58 -06:00
"IBM857", N_("Turkish") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_862,
2011-11-07 13:46:58 -06:00
"IBM862", N_("Hebrew") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_IBM_864,
2011-11-07 13:46:58 -06:00
"IBM864", N_("Arabic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_2022_JP,
2011-11-07 13:46:58 -06:00
"ISO-2022-JP", N_("Japanese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_2022_KR,
2011-11-07 13:46:58 -06:00
"ISO-2022-KR", N_("Korean") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_ISO_IR_111,
2011-11-07 13:46:58 -06:00
"ISO-IR-111", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_JOHAB,
2011-11-07 13:46:58 -06:00
"JOHAB", N_("Korean") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_KOI8_R,
2011-11-07 13:46:58 -06:00
"KOI8R", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_KOI8__R,
2011-11-07 13:46:58 -06:00
"KOI8-R", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_KOI8_U,
2011-11-07 13:46:58 -06:00
"KOI8U", N_("Cyrillic/Ukrainian") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_SHIFT_JIS,
2011-11-07 13:46:58 -06:00
"SHIFT_JIS", N_("Japanese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_TCVN,
2011-11-07 13:46:58 -06:00
"TCVN", N_("Vietnamese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_TIS_620,
2011-11-07 13:46:58 -06:00
"TIS-620", N_("Thai") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_UHC,
2011-11-07 13:46:58 -06:00
"UHC", N_("Korean") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_VISCII,
2011-11-07 13:46:58 -06:00
"VISCII", N_("Vietnamese") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1250,
2011-11-07 13:46:58 -06:00
"WINDOWS-1250", N_("Central European") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1251,
2011-11-07 13:46:58 -06:00
"WINDOWS-1251", N_("Cyrillic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1252,
2011-11-07 13:46:58 -06:00
"WINDOWS-1252", N_("Western") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1253,
2011-11-07 13:46:58 -06:00
"WINDOWS-1253", N_("Greek") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1254,
2011-11-07 13:46:58 -06:00
"WINDOWS-1254", N_("Turkish") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1255,
2011-11-07 13:46:58 -06:00
"WINDOWS-1255", N_("Hebrew") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1256,
2011-11-07 13:46:58 -06:00
"WINDOWS-1256", N_("Arabic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1257,
2011-11-07 13:46:58 -06:00
"WINDOWS-1257", N_("Baltic") },
2016-01-25 08:13:49 -06:00
{ XEDIT_ENCODING_WINDOWS_1258,
2011-11-07 13:46:58 -06:00
"WINDOWS-1258", N_("Vietnamese") }
};
static void
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init (void)
2011-11-07 13:46:58 -06:00
{
static gboolean initialized = FALSE;
const gchar *locale_charset;
if (initialized)
return;
if (g_get_charset (&locale_charset) == FALSE)
{
unknown_encoding.charset = g_strdup (locale_charset);
}
initialized = TRUE;
}
2016-01-25 08:13:49 -06:00
const XeditEncoding *
xedit_encoding_get_from_charset (const gchar *charset)
2011-11-07 13:46:58 -06:00
{
gint i;
g_return_val_if_fail (charset != NULL, NULL);
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
if (charset == NULL)
return NULL;
if (g_ascii_strcasecmp (charset, "UTF-8") == 0)
2016-01-25 08:13:49 -06:00
return xedit_encoding_get_utf8 ();
2011-11-07 13:46:58 -06:00
i = 0;
2016-01-25 08:13:49 -06:00
while (i < XEDIT_ENCODING_LAST)
2011-11-07 13:46:58 -06:00
{
if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0)
return &encodings[i];
++i;
}
if (unknown_encoding.charset != NULL)
{
if (g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0)
return &unknown_encoding;
}
return NULL;
}
2016-01-25 08:13:49 -06:00
const XeditEncoding *
xedit_encoding_get_from_index (gint idx)
2011-11-07 13:46:58 -06:00
{
g_return_val_if_fail (idx >= 0, NULL);
2016-01-25 08:13:49 -06:00
if (idx >= XEDIT_ENCODING_LAST)
2011-11-07 13:46:58 -06:00
return NULL;
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
return &encodings[idx];
}
2016-01-25 08:13:49 -06:00
const XeditEncoding *
xedit_encoding_get_utf8 (void)
2011-11-07 13:46:58 -06:00
{
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
return &utf8_encoding;
}
2016-01-25 08:13:49 -06:00
const XeditEncoding *
xedit_encoding_get_current (void)
2011-11-07 13:46:58 -06:00
{
static gboolean initialized = FALSE;
2016-01-25 08:13:49 -06:00
static const XeditEncoding *locale_encoding = NULL;
2011-11-07 13:46:58 -06:00
const gchar *locale_charset;
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
if (initialized != FALSE)
return locale_encoding;
if (g_get_charset (&locale_charset) == FALSE)
{
g_return_val_if_fail (locale_charset != NULL, &utf8_encoding);
2016-01-25 08:13:49 -06:00
locale_encoding = xedit_encoding_get_from_charset (locale_charset);
2011-11-07 13:46:58 -06:00
}
else
{
locale_encoding = &utf8_encoding;
}
if (locale_encoding == NULL)
{
locale_encoding = &unknown_encoding;
}
g_return_val_if_fail (locale_encoding != NULL, NULL);
initialized = TRUE;
return locale_encoding;
}
gchar *
2016-01-25 08:13:49 -06:00
xedit_encoding_to_string (const XeditEncoding* enc)
2011-11-07 13:46:58 -06:00
{
g_return_val_if_fail (enc != NULL, NULL);
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
g_return_val_if_fail (enc->charset != NULL, NULL);
if (enc->name != NULL)
{
return g_strdup_printf ("%s (%s)", _(enc->name), enc->charset);
}
else
{
if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0)
return g_strdup_printf ("US-ASCII (%s)", enc->charset);
else
return g_strdup (enc->charset);
}
}
const gchar *
2016-01-25 08:13:49 -06:00
xedit_encoding_get_charset (const XeditEncoding* enc)
2011-11-07 13:46:58 -06:00
{
g_return_val_if_fail (enc != NULL, NULL);
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
g_return_val_if_fail (enc->charset != NULL, NULL);
return enc->charset;
}
const gchar *
2016-01-25 08:13:49 -06:00
xedit_encoding_get_name (const XeditEncoding* enc)
2011-11-07 13:46:58 -06:00
{
g_return_val_if_fail (enc != NULL, NULL);
2016-01-25 08:13:49 -06:00
xedit_encoding_lazy_init ();
2011-11-07 13:46:58 -06:00
return (enc->name == NULL) ? _("Unknown") : _(enc->name);
}
/* These are to make language bindings happy. Since Encodings are
* const, copy() just returns the same pointer and fres() doesn't
* do nothing */
2016-01-25 08:13:49 -06:00
XeditEncoding *
xedit_encoding_copy (const XeditEncoding *enc)
2011-11-07 13:46:58 -06:00
{
g_return_val_if_fail (enc != NULL, NULL);
2016-01-25 08:13:49 -06:00
return (XeditEncoding *) enc;
2011-11-07 13:46:58 -06:00
}
void
2016-01-25 08:13:49 -06:00
xedit_encoding_free (XeditEncoding *enc)
2011-11-07 13:46:58 -06:00
{
g_return_if_fail (enc != NULL);
}
/**
2016-01-25 08:13:49 -06:00
* xedit_encoding_get_type:
2011-11-07 13:46:58 -06:00
*
* Retrieves the GType object which is associated with the
2016-01-25 08:13:49 -06:00
* #XeditEncoding class.
2011-11-07 13:46:58 -06:00
*
2016-01-25 08:13:49 -06:00
* Return value: the GType associated with #XeditEncoding.
2011-11-07 13:46:58 -06:00
**/
GType
2016-01-25 08:13:49 -06:00
xedit_encoding_get_type (void)
2011-11-07 13:46:58 -06:00
{
static GType our_type = 0;
if (!our_type)
our_type = g_boxed_type_register_static (
2016-01-25 08:13:49 -06:00
"XeditEncoding",
(GBoxedCopyFunc) xedit_encoding_copy,
(GBoxedFreeFunc) xedit_encoding_free);
2011-11-07 13:46:58 -06:00
return our_type;
}