aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohannes Hofmann <Johannes.Hofmann@gmx.de>2009-05-25 18:42:24 +0200
committerJohannes Hofmann <Johannes.Hofmann@gmx.de>2009-05-25 18:42:24 +0200
commit50260728b2e2d2c9e61a13b54b6b973bdc48fae0 (patch)
tree63b462a3d4cbbf445665714812331b2f7e968204 /src
parent6d62e8cf2ed9fe4eda942a59ba140b151b82b228 (diff)
make tab expansion for plain text utf8 aware
In discussion with corvid <corvid@lavabit.com>.
Diffstat (limited to 'src')
-rw-r--r--src/misc.c32
-rw-r--r--src/utf8.cc10
-rw-r--r--src/utf8.hh1
3 files changed, 29 insertions, 14 deletions
diff --git a/src/misc.c b/src/misc.c
index 8cfb7003..d1a5352e 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -16,6 +16,7 @@
#include <string.h>
#include <ctype.h>
+#include "utf8.hh"
#include "msg.h"
#include "misc.h"
@@ -47,7 +48,6 @@ char *a_Misc_escape_chars(const char *str, const char *esc_set)
return p;
}
-
#define TAB_SIZE 8
/*
* Takes a string and converts any tabs to spaces.
@@ -55,23 +55,27 @@ char *a_Misc_escape_chars(const char *str, const char *esc_set)
char *a_Misc_expand_tabs(const char *str, int len)
{
Dstr *New = dStr_new("");
- int i, j, pos, old_pos;
+ int i = 0, j, pos = 0, old_pos, char_len;
+ uint_t code;
char *val;
- if (len) {
- for (pos = 0, i = 0; i < len; i++) {
- if (str[i] == '\t') {
- /* Fill with whitespaces until the next tab. */
- old_pos = pos;
- pos += TAB_SIZE - (pos % TAB_SIZE);
- for (j = old_pos; j < pos; j++)
- dStr_append_c(New, ' ');
- } else {
- dStr_append_c(New, str[i]);
- pos++;
- }
+ while (i < len) {
+ code = a_Utf8_decode(&str[i], str + len, &char_len);
+
+ if (code == '\t') {
+ /* Fill with whitespaces until the next tab. */
+ old_pos = pos;
+ pos += TAB_SIZE - (pos % TAB_SIZE);
+ for (j = old_pos; j < pos; j++)
+ dStr_append_c(New, ' ');
+ } else {
+ dStr_append_l(New, &str[i], char_len);
+ pos++;
}
+
+ i += char_len;
}
+
val = New->str;
dStr_free(New, FALSE);
return val;
diff --git a/src/utf8.cc b/src/utf8.cc
index 261024fb..42f7e676 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -36,6 +36,16 @@ uint_t a_Utf8_end_of_char(const char *str, uint_t i)
}
/*
+ * Decode a single UTF-8-encoded character starting at p.
+ * The resulting Unicode value (in the range 0-0x10ffff) is returned,
+ * and len is set the the number of bytes in the UTF-8 encoding.
+ */
+uint_t a_Utf8_decode(const char* str, const char* end, int* len)
+{
+ return utf8decode(str, end, len);
+}
+
+/*
* Write UTF-8 encoding of ucs into buf and return number of bytes written.
*/
int a_Utf8_encode(unsigned int ucs, char *buf)
diff --git a/src/utf8.hh b/src/utf8.hh
index e861d600..cdf6b50a 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -9,6 +9,7 @@ extern "C" {
#include "d_size.h"
uint_t a_Utf8_end_of_char(const char *str, uint_t i);
+uint_t a_Utf8_decode(const char*, const char* end, int* len);
int a_Utf8_encode(unsigned int ucs, char *buf);
int a_Utf8_test(const char* src, unsigned int srclen);