xed/pluma/pluma-encodings.c

474 lines
11 KiB
C

/*
* pluma-encodings.c
* This file is part of pluma
*
* Copyright (C) 2002-2005 Paolo Maggi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/*
* Modified by the pluma Team, 2002-2005. See the AUTHORS file for a
* list of people on the pluma Team.
* See the ChangeLog files for a list of changes.
*
* $Id$
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <glib/gi18n.h>
#include "pluma-encodings.h"
struct _PlumaEncoding
{
gint index;
const gchar *charset;
const gchar *name;
};
/*
* The original versions of the following tables are taken from profterm
*
* Copyright (C) 2002 Red Hat, Inc.
*/
typedef enum
{
PLUMA_ENCODING_ISO_8859_1,
PLUMA_ENCODING_ISO_8859_2,
PLUMA_ENCODING_ISO_8859_3,
PLUMA_ENCODING_ISO_8859_4,
PLUMA_ENCODING_ISO_8859_5,
PLUMA_ENCODING_ISO_8859_6,
PLUMA_ENCODING_ISO_8859_7,
PLUMA_ENCODING_ISO_8859_8,
PLUMA_ENCODING_ISO_8859_9,
PLUMA_ENCODING_ISO_8859_10,
PLUMA_ENCODING_ISO_8859_13,
PLUMA_ENCODING_ISO_8859_14,
PLUMA_ENCODING_ISO_8859_15,
PLUMA_ENCODING_ISO_8859_16,
PLUMA_ENCODING_UTF_7,
PLUMA_ENCODING_UTF_16,
PLUMA_ENCODING_UTF_16_BE,
PLUMA_ENCODING_UTF_16_LE,
PLUMA_ENCODING_UTF_32,
PLUMA_ENCODING_UCS_2,
PLUMA_ENCODING_UCS_4,
PLUMA_ENCODING_ARMSCII_8,
PLUMA_ENCODING_BIG5,
PLUMA_ENCODING_BIG5_HKSCS,
PLUMA_ENCODING_CP_866,
PLUMA_ENCODING_EUC_JP,
PLUMA_ENCODING_EUC_JP_MS,
PLUMA_ENCODING_CP932,
PLUMA_ENCODING_EUC_KR,
PLUMA_ENCODING_EUC_TW,
PLUMA_ENCODING_GB18030,
PLUMA_ENCODING_GB2312,
PLUMA_ENCODING_GBK,
PLUMA_ENCODING_GEOSTD8,
PLUMA_ENCODING_IBM_850,
PLUMA_ENCODING_IBM_852,
PLUMA_ENCODING_IBM_855,
PLUMA_ENCODING_IBM_857,
PLUMA_ENCODING_IBM_862,
PLUMA_ENCODING_IBM_864,
PLUMA_ENCODING_ISO_2022_JP,
PLUMA_ENCODING_ISO_2022_KR,
PLUMA_ENCODING_ISO_IR_111,
PLUMA_ENCODING_JOHAB,
PLUMA_ENCODING_KOI8_R,
PLUMA_ENCODING_KOI8__R,
PLUMA_ENCODING_KOI8_U,
PLUMA_ENCODING_SHIFT_JIS,
PLUMA_ENCODING_TCVN,
PLUMA_ENCODING_TIS_620,
PLUMA_ENCODING_UHC,
PLUMA_ENCODING_VISCII,
PLUMA_ENCODING_WINDOWS_1250,
PLUMA_ENCODING_WINDOWS_1251,
PLUMA_ENCODING_WINDOWS_1252,
PLUMA_ENCODING_WINDOWS_1253,
PLUMA_ENCODING_WINDOWS_1254,
PLUMA_ENCODING_WINDOWS_1255,
PLUMA_ENCODING_WINDOWS_1256,
PLUMA_ENCODING_WINDOWS_1257,
PLUMA_ENCODING_WINDOWS_1258,
PLUMA_ENCODING_LAST,
PLUMA_ENCODING_UTF_8,
PLUMA_ENCODING_UNKNOWN
} PlumaEncodingIndex;
static const PlumaEncoding utf8_encoding = {
PLUMA_ENCODING_UTF_8,
"UTF-8",
N_("Unicode")
};
/* initialized in pluma_encoding_lazy_init() */
static PlumaEncoding unknown_encoding = {
PLUMA_ENCODING_UNKNOWN,
NULL,
NULL
};
static const PlumaEncoding encodings [] = {
{ PLUMA_ENCODING_ISO_8859_1,
"ISO-8859-1", N_("Western") },
{ PLUMA_ENCODING_ISO_8859_2,
"ISO-8859-2", N_("Central European") },
{ PLUMA_ENCODING_ISO_8859_3,
"ISO-8859-3", N_("South European") },
{ PLUMA_ENCODING_ISO_8859_4,
"ISO-8859-4", N_("Baltic") },
{ PLUMA_ENCODING_ISO_8859_5,
"ISO-8859-5", N_("Cyrillic") },
{ PLUMA_ENCODING_ISO_8859_6,
"ISO-8859-6", N_("Arabic") },
{ PLUMA_ENCODING_ISO_8859_7,
"ISO-8859-7", N_("Greek") },
{ PLUMA_ENCODING_ISO_8859_8,
"ISO-8859-8", N_("Hebrew Visual") },
{ PLUMA_ENCODING_ISO_8859_9,
"ISO-8859-9", N_("Turkish") },
{ PLUMA_ENCODING_ISO_8859_10,
"ISO-8859-10", N_("Nordic") },
{ PLUMA_ENCODING_ISO_8859_13,
"ISO-8859-13", N_("Baltic") },
{ PLUMA_ENCODING_ISO_8859_14,
"ISO-8859-14", N_("Celtic") },
{ PLUMA_ENCODING_ISO_8859_15,
"ISO-8859-15", N_("Western") },
{ PLUMA_ENCODING_ISO_8859_16,
"ISO-8859-16", N_("Romanian") },
{ PLUMA_ENCODING_UTF_7,
"UTF-7", N_("Unicode") },
{ PLUMA_ENCODING_UTF_16,
"UTF-16", N_("Unicode") },
{ PLUMA_ENCODING_UTF_16_BE,
"UTF-16BE", N_("Unicode") },
{ PLUMA_ENCODING_UTF_16_LE,
"UTF-16LE", N_("Unicode") },
{ PLUMA_ENCODING_UTF_32,
"UTF-32", N_("Unicode") },
{ PLUMA_ENCODING_UCS_2,
"UCS-2", N_("Unicode") },
{ PLUMA_ENCODING_UCS_4,
"UCS-4", N_("Unicode") },
{ PLUMA_ENCODING_ARMSCII_8,
"ARMSCII-8", N_("Armenian") },
{ PLUMA_ENCODING_BIG5,
"BIG5", N_("Chinese Traditional") },
{ PLUMA_ENCODING_BIG5_HKSCS,
"BIG5-HKSCS", N_("Chinese Traditional") },
{ PLUMA_ENCODING_CP_866,
"CP866", N_("Cyrillic/Russian") },
{ PLUMA_ENCODING_EUC_JP,
"EUC-JP", N_("Japanese") },
{ PLUMA_ENCODING_EUC_JP_MS,
"EUC-JP-MS", N_("Japanese") },
{ PLUMA_ENCODING_CP932,
"CP932", N_("Japanese") },
{ PLUMA_ENCODING_EUC_KR,
"EUC-KR", N_("Korean") },
{ PLUMA_ENCODING_EUC_TW,
"EUC-TW", N_("Chinese Traditional") },
{ PLUMA_ENCODING_GB18030,
"GB18030", N_("Chinese Simplified") },
{ PLUMA_ENCODING_GB2312,
"GB2312", N_("Chinese Simplified") },
{ PLUMA_ENCODING_GBK,
"GBK", N_("Chinese Simplified") },
{ PLUMA_ENCODING_GEOSTD8,
"GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
{ PLUMA_ENCODING_IBM_850,
"IBM850", N_("Western") },
{ PLUMA_ENCODING_IBM_852,
"IBM852", N_("Central European") },
{ PLUMA_ENCODING_IBM_855,
"IBM855", N_("Cyrillic") },
{ PLUMA_ENCODING_IBM_857,
"IBM857", N_("Turkish") },
{ PLUMA_ENCODING_IBM_862,
"IBM862", N_("Hebrew") },
{ PLUMA_ENCODING_IBM_864,
"IBM864", N_("Arabic") },
{ PLUMA_ENCODING_ISO_2022_JP,
"ISO-2022-JP", N_("Japanese") },
{ PLUMA_ENCODING_ISO_2022_KR,
"ISO-2022-KR", N_("Korean") },
{ PLUMA_ENCODING_ISO_IR_111,
"ISO-IR-111", N_("Cyrillic") },
{ PLUMA_ENCODING_JOHAB,
"JOHAB", N_("Korean") },
{ PLUMA_ENCODING_KOI8_R,
"KOI8R", N_("Cyrillic") },
{ PLUMA_ENCODING_KOI8__R,
"KOI8-R", N_("Cyrillic") },
{ PLUMA_ENCODING_KOI8_U,
"KOI8U", N_("Cyrillic/Ukrainian") },
{ PLUMA_ENCODING_SHIFT_JIS,
"SHIFT_JIS", N_("Japanese") },
{ PLUMA_ENCODING_TCVN,
"TCVN", N_("Vietnamese") },
{ PLUMA_ENCODING_TIS_620,
"TIS-620", N_("Thai") },
{ PLUMA_ENCODING_UHC,
"UHC", N_("Korean") },
{ PLUMA_ENCODING_VISCII,
"VISCII", N_("Vietnamese") },
{ PLUMA_ENCODING_WINDOWS_1250,
"WINDOWS-1250", N_("Central European") },
{ PLUMA_ENCODING_WINDOWS_1251,
"WINDOWS-1251", N_("Cyrillic") },
{ PLUMA_ENCODING_WINDOWS_1252,
"WINDOWS-1252", N_("Western") },
{ PLUMA_ENCODING_WINDOWS_1253,
"WINDOWS-1253", N_("Greek") },
{ PLUMA_ENCODING_WINDOWS_1254,
"WINDOWS-1254", N_("Turkish") },
{ PLUMA_ENCODING_WINDOWS_1255,
"WINDOWS-1255", N_("Hebrew") },
{ PLUMA_ENCODING_WINDOWS_1256,
"WINDOWS-1256", N_("Arabic") },
{ PLUMA_ENCODING_WINDOWS_1257,
"WINDOWS-1257", N_("Baltic") },
{ PLUMA_ENCODING_WINDOWS_1258,
"WINDOWS-1258", N_("Vietnamese") }
};
static void
pluma_encoding_lazy_init (void)
{
static gboolean initialized = FALSE;
const gchar *locale_charset;
if (initialized)
return;
if (g_get_charset (&locale_charset) == FALSE)
{
unknown_encoding.charset = g_strdup (locale_charset);
}
initialized = TRUE;
}
const PlumaEncoding *
pluma_encoding_get_from_charset (const gchar *charset)
{
gint i;
g_return_val_if_fail (charset != NULL, NULL);
pluma_encoding_lazy_init ();
if (charset == NULL)
return NULL;
if (g_ascii_strcasecmp (charset, "UTF-8") == 0)
return pluma_encoding_get_utf8 ();
i = 0;
while (i < PLUMA_ENCODING_LAST)
{
if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0)
return &encodings[i];
++i;
}
if (unknown_encoding.charset != NULL)
{
if (g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0)
return &unknown_encoding;
}
return NULL;
}
const PlumaEncoding *
pluma_encoding_get_from_index (gint idx)
{
g_return_val_if_fail (idx >= 0, NULL);
if (idx >= PLUMA_ENCODING_LAST)
return NULL;
pluma_encoding_lazy_init ();
return &encodings[idx];
}
const PlumaEncoding *
pluma_encoding_get_utf8 (void)
{
pluma_encoding_lazy_init ();
return &utf8_encoding;
}
const PlumaEncoding *
pluma_encoding_get_current (void)
{
static gboolean initialized = FALSE;
static const PlumaEncoding *locale_encoding = NULL;
const gchar *locale_charset;
pluma_encoding_lazy_init ();
if (initialized != FALSE)
return locale_encoding;
if (g_get_charset (&locale_charset) == FALSE)
{
g_return_val_if_fail (locale_charset != NULL, &utf8_encoding);
locale_encoding = pluma_encoding_get_from_charset (locale_charset);
}
else
{
locale_encoding = &utf8_encoding;
}
if (locale_encoding == NULL)
{
locale_encoding = &unknown_encoding;
}
g_return_val_if_fail (locale_encoding != NULL, NULL);
initialized = TRUE;
return locale_encoding;
}
gchar *
pluma_encoding_to_string (const PlumaEncoding* enc)
{
g_return_val_if_fail (enc != NULL, NULL);
pluma_encoding_lazy_init ();
g_return_val_if_fail (enc->charset != NULL, NULL);
if (enc->name != NULL)
{
return g_strdup_printf ("%s (%s)", _(enc->name), enc->charset);
}
else
{
if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0)
return g_strdup_printf ("US-ASCII (%s)", enc->charset);
else
return g_strdup (enc->charset);
}
}
const gchar *
pluma_encoding_get_charset (const PlumaEncoding* enc)
{
g_return_val_if_fail (enc != NULL, NULL);
pluma_encoding_lazy_init ();
g_return_val_if_fail (enc->charset != NULL, NULL);
return enc->charset;
}
const gchar *
pluma_encoding_get_name (const PlumaEncoding* enc)
{
g_return_val_if_fail (enc != NULL, NULL);
pluma_encoding_lazy_init ();
return (enc->name == NULL) ? _("Unknown") : _(enc->name);
}
/* These are to make language bindings happy. Since Encodings are
* const, copy() just returns the same pointer and fres() doesn't
* do nothing */
PlumaEncoding *
pluma_encoding_copy (const PlumaEncoding *enc)
{
g_return_val_if_fail (enc != NULL, NULL);
return (PlumaEncoding *) enc;
}
void
pluma_encoding_free (PlumaEncoding *enc)
{
g_return_if_fail (enc != NULL);
}
/**
* pluma_encoding_get_type:
*
* Retrieves the GType object which is associated with the
* #PlumaEncoding class.
*
* Return value: the GType associated with #PlumaEncoding.
**/
GType
pluma_encoding_get_type (void)
{
static GType our_type = 0;
if (!our_type)
our_type = g_boxed_type_register_static (
"PlumaEncoding",
(GBoxedCopyFunc) pluma_encoding_copy,
(GBoxedFreeFunc) pluma_encoding_free);
return our_type;
}