blob: 9748059c225b26bb295a8ae8a42c8ff685046f09 [file] [log] [blame]
// Copyright 2014 The Android Open Source Project
//
// This software is licensed under the terms of the GNU General Public
// License version 2, as published by the Free Software Foundation, and
// may be copied, distributed, and modified under those terms.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
#include "android/base/misc/Utf8Utils.h"
namespace android {
namespace base {
bool utf8IsValid(const char* text, size_t textLen) {
const char* end = text + textLen;
while (text < end) {
size_t n = 0;
int ch = *text;
if (!(ch & 0x80)) {
n = 0;
} else if ((ch & 0xe0) == 0xc0) {
n = 1;
} else if ((ch & 0xf0) == 0xe0) {
n = 2;
} else if ((ch & 0xf8) == 0xf0) {
n = 3;
} else if ((ch & 0xfc) == 0xf8) {
n = 4;
} else if ((ch & 0xfe) == 0xfc) {
n = 5;
} else {
return false;
}
if ((size_t)(end - text) < n) {
return false;
}
for (text++; n > 0; text++, n--) {
if ((*text & 0xc0) != 0x80) {
return false;
}
}
}
return true;
}
int utf8Decode(const uint8_t* text, size_t textLen, uint32_t* codepoint) {
if (!textLen) {
// No input!
*codepoint = 0;
return 0;
}
int n = 0;
int ch = *text;
if (!(ch & 0x80)) {
n = 0;
} else if ((ch & 0xe0) == 0xc0) {
n = 1;
} else if ((ch & 0xf0) == 0xe0) {
n = 2;
} else if ((ch & 0xf8) == 0xf0) {
n = 3;
} else if ((ch & 0xfc) == 0xf8) {
n = 4;
} else if ((ch & 0xfe) == 0xfc) {
n = 5;
} else {
// Invalid input.
return -1;
}
if ((size_t)n + 1U > textLen) {
// Input is too short.
return -1;
}
uint32_t value = 0;
if (n == 0) {
value = text[0];
} else {
value = text[0] & (0x7f >> n);
for (int i = 0; i < n; i++) {
value = (value << 6U) | (uint32_t)(text[i + 1] & 0x3f);
}
}
*codepoint = value;
return n + 1;
}
int utf8Encode(uint32_t codepoint, uint8_t* buffer, size_t bufferLen) {
int len = 0;
if (codepoint <= 0x7f) {
len = 1;
} else if (codepoint <= 0x7ff) {
len = 2;
} else if (codepoint <= 0xffff) {
len = 3;
} else if (codepoint <= 0x1fffff) {
len = 4;
} else if (codepoint <= 0x3ffffff) {
len = 5;
} else if (codepoint <= 0x7fffffff) {
len = 6;
} else {
// Invalid value.
return -1;
}
if (!buffer) {
return len;
}
if (bufferLen < (size_t)len) {
// buffer too small.
return -1;
}
for (int n = len - 1; n > 0; --n) {
buffer[n] = (uint8_t)(codepoint & 0x3f) | 0x80;
codepoint >>= 6;
}
if (len == 1) {
buffer[0] = (uint8_t)codepoint;
} else {
buffer[0] = (uint8_t)(0xff00 >> len) | codepoint;
}
return len;
}
} // namespace base
} // namespace android