You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
4.4 KiB
150 lines
4.4 KiB
/* Copyright (C) 1999-2001, 2003, 2011 Bruno Haible. |
|
This file is not part of the GNU LIBICONV Library. |
|
This file is put into the public domain. */ |
|
|
|
#include "iconv_string.h" |
|
#include <iconv.h> |
|
#include <errno.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
|
|
#define tmpbufsize 4096 |
|
|
|
int iconv_string (const char* tocode, const char* fromcode, |
|
const char* start, const char* end, |
|
char** resultp, size_t* lengthp) |
|
{ |
|
iconv_t cd = iconv_open(tocode,fromcode); |
|
size_t length; |
|
char* result; |
|
if (cd == (iconv_t)(-1)) { |
|
if (errno != EINVAL) |
|
return -1; |
|
/* Unsupported fromcode or tocode. Check whether the caller requested |
|
autodetection. */ |
|
if (!strcmp(fromcode,"autodetect_utf8")) { |
|
int ret; |
|
/* Try UTF-8 first. There are very few ISO-8859-1 inputs that would |
|
be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */ |
|
ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp); |
|
if (!(ret < 0 && errno == EILSEQ)) |
|
return ret; |
|
ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp); |
|
return ret; |
|
} |
|
if (!strcmp(fromcode,"autodetect_jp")) { |
|
int ret; |
|
/* Try 7-bit encoding first. If the input contains bytes >= 0x80, |
|
it will fail. */ |
|
ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp); |
|
if (!(ret < 0 && errno == EILSEQ)) |
|
return ret; |
|
/* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This |
|
is unavoidable. People will condemn SHIFT_JIS. |
|
If we tried SHIFT_JIS first, then some short EUC-JP inputs would |
|
come out wrong, and people would condemn EUC-JP and Unix, which |
|
would not be good. */ |
|
ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp); |
|
if (!(ret < 0 && errno == EILSEQ)) |
|
return ret; |
|
/* Finally try SHIFT_JIS. */ |
|
ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp); |
|
return ret; |
|
} |
|
if (!strcmp(fromcode,"autodetect_kr")) { |
|
int ret; |
|
/* Try 7-bit encoding first. If the input contains bytes >= 0x80, |
|
it will fail. */ |
|
ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp); |
|
if (!(ret < 0 && errno == EILSEQ)) |
|
return ret; |
|
/* Finally try EUC-KR. */ |
|
ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp); |
|
return ret; |
|
} |
|
errno = EINVAL; |
|
return -1; |
|
} |
|
/* Determine the length we need. */ |
|
{ |
|
size_t count = 0; |
|
char tmpbuf[tmpbufsize]; |
|
const char* inptr = start; |
|
size_t insize = end-start; |
|
while (insize > 0) { |
|
char* outptr = tmpbuf; |
|
size_t outsize = tmpbufsize; |
|
size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); |
|
if (res == (size_t)(-1) && errno != E2BIG) { |
|
int saved_errno = (errno == EINVAL ? EILSEQ : errno); |
|
iconv_close(cd); |
|
errno = saved_errno; |
|
return -1; |
|
} |
|
count += outptr-tmpbuf; |
|
} |
|
{ |
|
char* outptr = tmpbuf; |
|
size_t outsize = tmpbufsize; |
|
size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); |
|
if (res == (size_t)(-1)) { |
|
int saved_errno = errno; |
|
iconv_close(cd); |
|
errno = saved_errno; |
|
return -1; |
|
} |
|
count += outptr-tmpbuf; |
|
} |
|
length = count; |
|
} |
|
if (lengthp != NULL) |
|
*lengthp = length; |
|
if (resultp == NULL) { |
|
iconv_close(cd); |
|
return 0; |
|
} |
|
result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length)); |
|
*resultp = result; |
|
if (length == 0) { |
|
iconv_close(cd); |
|
return 0; |
|
} |
|
if (result == NULL) { |
|
iconv_close(cd); |
|
errno = ENOMEM; |
|
return -1; |
|
} |
|
iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */ |
|
/* Do the conversion for real. */ |
|
{ |
|
const char* inptr = start; |
|
size_t insize = end-start; |
|
char* outptr = result; |
|
size_t outsize = length; |
|
while (insize > 0) { |
|
size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); |
|
if (res == (size_t)(-1)) { |
|
if (errno == EINVAL) |
|
break; |
|
else { |
|
int saved_errno = errno; |
|
iconv_close(cd); |
|
errno = saved_errno; |
|
return -1; |
|
} |
|
} |
|
} |
|
{ |
|
size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); |
|
if (res == (size_t)(-1)) { |
|
int saved_errno = errno; |
|
iconv_close(cd); |
|
errno = saved_errno; |
|
return -1; |
|
} |
|
} |
|
if (outsize != 0) abort(); |
|
} |
|
iconv_close(cd); |
|
return 0; |
|
}
|
|
|