Go to the documentation of this file.
25 #define VLC_CHARSET_H 1
67 while ((n =
vlc_towc(str, &cp)) != 0)
68 if (
likely(n != (
size_t)-1))
90 for (
const char *
p = str; (c = *
p) !=
'\0';
p++)
120 while ((n =
vlc_towc(str, &cp)) != 0)
121 if (
likely(n != (
size_t)-1))
138 #define VLC_ICONV_ERR ((size_t) -1)
156 # include <CoreFoundation/CoreFoundation.h>
161 VLC_USED static inline char *FromCFString(
const CFStringRef cfString,
162 const CFStringEncoding cfStringEncoding)
165 const char *tmpBuffer = CFStringGetCStringPtr(cfString, cfStringEncoding);
167 if (tmpBuffer != NULL) {
172 CFIndex length = CFStringGetLength(cfString);
174 CFStringGetMaximumSizeForEncoding(length, cfStringEncoding);
177 if (
unlikely(maxSize == kCFNotFound)) {
184 char *buffer = (
char *)malloc(maxSize);
191 Boolean success = CFStringGetCString(cfString, buffer, maxSize, cfStringEncoding);
201 static inline char *FromWide (
const wchar_t *wide)
203 size_t len = WideCharToMultiByte (CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);
207 char *out = (
char *)malloc (len);
210 WideCharToMultiByte (CP_UTF8, 0, wide, -1, out, len, NULL, NULL);
215 static inline wchar_t *ToWide (
const char *utf8)
217 int len = MultiByteToWideChar (CP_UTF8, 0, utf8, -1, NULL, 0);
221 wchar_t *out = (
wchar_t *)malloc (len *
sizeof (
wchar_t));
224 MultiByteToWideChar (CP_UTF8, 0, utf8, -1, out, len);
229 static inline char *ToCodePage (
unsigned cp,
const char *utf8)
231 wchar_t *wide = ToWide (utf8);
235 size_t len = WideCharToMultiByte (cp, 0, wide, -1, NULL, 0, NULL, NULL);
241 char *out = (
char *)malloc (len);
243 WideCharToMultiByte (cp, 0, wide, -1, out, len, NULL, NULL);
249 static inline char *FromCodePage (
unsigned cp,
const char *mb)
251 int len = MultiByteToWideChar (cp, 0, mb, -1, NULL, 0);
255 wchar_t *wide = (
wchar_t *)malloc (len *
sizeof (
wchar_t));
258 MultiByteToWideChar (cp, 0, mb, -1, wide, len);
260 char *utf8 = FromWide (wide);
266 static inline char *FromANSI (
const char *ansi)
268 return FromCodePage (GetACP (), ansi);
272 static inline char *ToANSI (
const char *utf8)
274 return ToCodePage (GetACP (), utf8);
277 # define FromLocale FromANSI
278 # define ToLocale ToANSI
279 # define LocaleFree(s) free((char *)(s))
280 # define FromLocaleDup FromANSI
281 # define ToLocaleDup ToANSI
283 #elif defined(__OS2__)
287 return locale ?
FromCharset ((
char *)
"", locale, strlen(locale)) : NULL;
293 return utf8 ? (
char *)
ToCharset (
"", utf8, &outsize) : NULL;
309 return (
char *)
ToCharset (
"", utf8, &outsize);
314 # define FromLocale(l) (l)
315 # define ToLocale(u) (u)
316 # define LocaleFree(s) ((void)(s))
317 # define FromLocaleDup strdup
318 # define ToLocaleDup strdup
324 static inline char *
FromLatin1 (
const char *latin)
326 char *str = (
char *)malloc (2 * strlen (latin) + 1), *utf8 = str;
332 while ((c = *(latin++)) !=
'\0')
336 *(utf8++) = 0xC0 | (c >> 6);
337 *(utf8++) = 0x80 | (c & 0x3F);
344 utf8 = (
char *)realloc (str, utf8 - str);
345 return utf8 ? utf8 : str;
void * vlc_iconv_t
Definition: vlc_charset.h:140
VLC_EXPORT size_t vlc_towc(const char *str, uint32_t *restrict pwc)
Decodes a code point from UTF-8.
Definition: unicode.c:113
#define VLC_API
Definition: fourcc_gen.c:31
VLC_EXPORT int us_asprintf(char **, const char *,...)
us_asprintf() has the same prototype as asprintf(), but doesn't use the system locale.
Definition: charset.c:119
VLC_EXPORT double us_strtod(const char *, char **)
us_strtod() has the same prototype as ANSI C strtod() but it uses the POSIX/C decimal format,...
Definition: charset.c:50
#define unlikely(p)
Predicted false condition.
Definition: vlc_common.h:227
static const char * IsASCII(const char *str)
Checks ASCII validity.
Definition: vlc_charset.h:87
VLC_EXPORT int vlc_iconv_close(vlc_iconv_t)
static char * FromLatin1(const char *latin)
Converts a nul-terminated string from ISO-8859-1 to UTF-8.
Definition: vlc_charset.h:325
#define VLC_MALLOC
Heap allocated result function annotation.
Definition: vlc_common.h:167
VLC_EXPORT char * vlc_strcasestr(const char *, const char *)
Look for an UTF-8 string within another one in a case-insensitive fashion.
Definition: unicode.c:198
#define FromLocaleDup
Definition: vlc_charset.h:318
static char * EnsureUTF8(char *str)
Removes non-UTF-8 sequences.
Definition: vlc_charset.h:115
#define LocaleFree(s)
Definition: vlc_charset.h:317
VLC_EXPORT size_t vlc_iconv(vlc_iconv_t, const char **, size_t *, char **, size_t *)
#define FromLocale(l)
Definition: vlc_charset.h:315
VLC_EXPORT double us_atof(const char *)
us_atof() has the same prototype as ANSI C atof() but it expects a dot as decimal separator,...
Definition: charset.c:88
VLC_EXPORT int us_vasprintf(char **, const char *, va_list)
us_vasprintf() has the same prototype as vasprintf(), but doesn't use the system locale.
Definition: charset.c:98
VLC_EXPORT vlc_iconv_t vlc_iconv_open(const char *, const char *)
VLC_EXPORT char * FromCharset(const char *charset, const void *data, size_t data_size)
Converts a string from the given character encoding to utf-8.
Definition: unicode.c:237
#define VLC_FORMAT(x, y)
String format function annotation.
Definition: vlc_common.h:141
#define ToLocaleDup
Definition: vlc_charset.h:319
#define FREENULL(a)
Definition: vlc_common.h:961
char * strdup(const char *)
#define VLC_USED
Definition: fourcc_gen.c:32
static const char * IsUTF8(const char *str)
Checks UTF-8 validity.
Definition: vlc_charset.h:63
VLC_EXPORT void * ToCharset(const char *charset, const char *in, size_t *outsize)
Converts a nul-terminated UTF-8 string to a given character encoding.
Definition: unicode.c:279
#define ToLocale(u)
Definition: vlc_charset.h:316
VLC_EXPORT int utf8_vfprintf(FILE *stream, const char *fmt, va_list ap)
Formats an UTF-8 string as vfprintf(), then print it, with appropriate conversion to local encoding.
Definition: unicode.c:52
#define likely(p)
Predicted true condition.
Definition: vlc_common.h:218
VLC_EXPORT int utf8_fprintf(FILE *, const char *,...)
Formats an UTF-8 string as fprintf(), then print it, with appropriate conversion to local encoding.
Definition: unicode.c:102
VLC_EXPORT float us_strtof(const char *, char **)
us_strtof() has the same prototype as ANSI C strtof() but it uses the POSIX/C decimal format,...
Definition: charset.c:69