aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcorvid <corvid@lavabit.com>2009-05-26 20:25:00 -0400
committercorvid <corvid@lavabit.com>2009-05-26 20:25:00 -0400
commita11a9dbd6db6632073ec5560ec473b76ed9c5536 (patch)
tree4ecec941ca9fdfc3a777a7f37650d959b95ce999
parent905f51d5f73d99b45d8fd7d1bd1b1ce28796d981 (diff)
Minor utf8 refactor
-rw-r--r--src/decode.c12
-rw-r--r--src/utf8.hh7
2 files changed, 10 insertions, 9 deletions
diff --git a/src/decode.c b/src/decode.c
index b4dd5214..5a1b7bb8 100644
--- a/src/decode.c
+++ b/src/decode.c
@@ -15,6 +15,7 @@
#include <stdlib.h> /* strtol */
#include "decode.h"
+#include "utf8.hh"
#include "msg.h"
static const int bufsize = 8*1024;
@@ -164,15 +165,8 @@ static Dstr *Decode_charset(Decode *dc, const char *instr, int inlen)
if (rc == EILSEQ){
inPtr++;
inLeft--;
- /*
- * U+FFFD: "used to replace an incoming character whose value is
- * unknown or unrepresentable in Unicode."
- */
- //dStr_append(output, "\ufffd");
- // \uxxxx is C99. UTF-8-specific:
- dStr_append_c(output, 0xEF);
- dStr_append_c(output, 0xBF);
- dStr_append_c(output, 0xBD);
+ dStr_append_l(output, utf8_replacement_char,
+ sizeof(utf8_replacement_char) - 1);
}
}
dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
diff --git a/src/utf8.hh b/src/utf8.hh
index cdf6b50a..eaf82b6f 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -8,6 +8,13 @@ extern "C" {
#include "d_size.h"
+/*
+ * Unicode replacement character U+FFFD
+ * "used to replace an incoming character whose value is unknown or otherwise
+ * unrepresentable in Unicode"
+ */
+const char utf8_replacement_char[] = "\xEF\xBF\xBD";
+
uint_t a_Utf8_end_of_char(const char *str, uint_t i);
uint_t a_Utf8_decode(const char*, const char* end, int* len);
int a_Utf8_encode(unsigned int ucs, char *buf);