Merge smart converter in the document output stream

Based on 85279adad6 (diff-c243a4d5a7789c35057282ee780df3d5)
This commit is contained in:
JosephMcc
2017-01-19 03:29:38 -08:00
parent 2fb3813793
commit e3e9566570
9 changed files with 635 additions and 874 deletions

View File

@@ -40,9 +40,11 @@ test_consecutive_write (const gchar *inbuf,
GError *err = NULL;
gchar *b;
XedDocumentNewlineType type;
GSList *encodings = NULL;
doc = xed_document_new ();
out = xed_document_output_stream_new (doc);
encodings = g_slist_prepend (encodings, (gpointer)xed_encoding_get_utf8 ());
out = xed_document_output_stream_new (doc, encodings);
n = 0;
@@ -119,6 +121,237 @@ test_big_char ()
XED_DOCUMENT_NEWLINE_TYPE_LF);
}
/* SMART CONVERSION */
#define TEXT_TO_CONVERT "this is some text to make the tests"
#define TEXT_TO_GUESS "hello \xe6\x96\x87 world"
static void
print_hex (gchar *ptr, gint len)
{
gint i;
for (i = 0; i < len; ++i)
{
g_printf ("\\x%02x", (unsigned char)ptr[i]);
}
g_printf ("\n");
}
static gchar *
get_encoded_text (const gchar *text,
gsize nread,
const XedEncoding *to,
const XedEncoding *from,
gsize *bytes_written_aux,
gboolean care_about_error)
{
GCharsetConverter *converter;
gchar *out, *out_aux;
gsize bytes_read, bytes_read_aux;
gsize bytes_written;
GConverterResult res;
GError *err;
converter = g_charset_converter_new (xed_encoding_get_charset (to),
xed_encoding_get_charset (from),
NULL);
out = g_malloc (200);
out_aux = g_malloc (200);
err = NULL;
bytes_read_aux = 0;
*bytes_written_aux = 0;
if (nread == -1)
{
nread = strlen (text);
}
do
{
res = g_converter_convert (G_CONVERTER (converter),
text + bytes_read_aux,
nread,
out_aux,
200,
G_CONVERTER_INPUT_AT_END,
&bytes_read,
&bytes_written,
&err);
memcpy (out + *bytes_written_aux, out_aux, bytes_written);
bytes_read_aux += bytes_read;
*bytes_written_aux += bytes_written;
nread -= bytes_read;
} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR);
if (care_about_error)
{
g_assert_no_error (err);
}
else if (err)
{
g_printf ("** You don't care, but there was an error: %s", err->message);
return NULL;
}
out[*bytes_written_aux] = '\0';
if (!g_utf8_validate (out, *bytes_written_aux, NULL) && !care_about_error)
{
if (!care_about_error)
{
return NULL;
}
else
{
g_assert_not_reached ();
}
}
return out;
}
static GSList *
get_all_encodings ()
{
GSList *encs = NULL;
gint i = 0;
while (TRUE)
{
const XedEncoding *enc;
enc = xed_encoding_get_from_index (i);
if (enc == NULL)
break;
encs = g_slist_prepend (encs, (gpointer)enc);
i++;
}
return encs;
}
static gchar *
do_test (const gchar *test_in,
const gchar *enc,
GSList *encodings,
gsize nread,
const XedEncoding **guessed)
{
XedDocument *doc;
GOutputStream *out;
GError *err = NULL;
GtkTextIter start, end;
gchar *text;
if (enc != NULL)
{
encodings = NULL;
encodings = g_slist_prepend (encodings, (gpointer)xed_encoding_get_from_charset (enc));
}
doc = xed_document_new ();
encodings = g_slist_prepend (encodings, (gpointer)xed_encoding_get_utf8 ());
out = xed_document_output_stream_new (doc, encodings);
g_output_stream_write (out, test_in, nread, NULL, &err);
g_assert_no_error (err);
g_output_stream_flush (out, NULL, &err);
g_assert_no_error (err);
g_output_stream_close (out, NULL, &err);
g_assert_no_error (err);
if (guessed != NULL)
*guessed = xed_document_output_stream_get_guessed (XED_DOCUMENT_OUTPUT_STREAM (out));
gtk_text_buffer_get_bounds (GTK_TEXT_BUFFER (doc), &start, &end);
text = gtk_text_buffer_get_text (GTK_TEXT_BUFFER (doc),
&start,
&end,
FALSE);
g_object_unref (doc);
g_object_unref (out);
return text;
}
static void
test_utf8_utf8 ()
{
gchar *aux;
aux = do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), NULL);
g_assert_cmpstr (aux, ==, TEXT_TO_CONVERT);
aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, NULL);
g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 12, NULL);
g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8");
/* FIXME: Use the utf8 stream for a fallback? */
//do_test_with_error ("\xef\xbf\xbezzzzzz", encs, G_IO_ERROR_FAILED);
}
static void
test_empty_conversion ()
{
const XedEncoding *guessed;
gchar *out;
GSList *encodings = NULL;
/* testing the case of an empty file and list of encodings with no
utf-8. In this case, the smart converter cannot determine the right
encoding (because there is no input), but should still default to
utf-8 for the detection */
encodings = g_slist_prepend (encodings, (gpointer)xed_encoding_get_from_charset ("UTF-16"));
encodings = g_slist_prepend (encodings, (gpointer)xed_encoding_get_from_charset ("ISO-8859-15"));
out = do_test ("", NULL, encodings, 0, &guessed);
g_assert_cmpstr (out, ==, "");
g_assert (guessed == xed_encoding_get_utf8 ());
}
static void
test_guessed ()
{
GSList *encs = NULL;
gchar *aux, *aux2, *fail;
gsize aux_len, fail_len;
const XedEncoding *guessed;
aux = get_encoded_text (TEXT_TO_GUESS, -1,
xed_encoding_get_from_charset ("UTF-16"),
xed_encoding_get_from_charset ("UTF-8"),
&aux_len,
TRUE);
fail = get_encoded_text (aux, aux_len,
xed_encoding_get_from_charset ("UTF-8"),
xed_encoding_get_from_charset ("ISO-8859-15"),
&fail_len,
FALSE);
g_assert (fail == NULL);
/* ISO-8859-15 should fail */
encs = g_slist_append (encs, (gpointer)xed_encoding_get_from_charset ("ISO-8859-15"));
encs = g_slist_append (encs, (gpointer)xed_encoding_get_from_charset ("UTF-16"));
aux2 = do_test (aux, NULL, encs, aux_len, &guessed);
g_assert (guessed == xed_encoding_get_from_charset ("UTF-16"));
}
int main (int argc,
char *argv[])
{
@@ -130,5 +363,9 @@ int main (int argc,
g_test_add_func ("/document-output-stream/consecutive_tnewline", test_consecutive_tnewline);
g_test_add_func ("/document-output-stream/big-char", test_big_char);
g_test_add_func ("/document-output-stream/smart conversion: utf8-utf8", test_utf8_utf8);
g_test_add_func ("/document-output-stream/smart conversion: guessed", test_guessed);
g_test_add_func ("/document-output-stream/smart conversion: empty", test_empty_conversion);
return g_test_run ();
}