1 /* Convert multibyte character to wide character. 2 Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc. 3 4 This file is free software: you can redistribute it and/or modify 5 it under the terms of the GNU Lesser General Public License as 6 published by the Free Software Foundation; either version 2.1 of the 7 License, or (at your option) any later version. 8 9 This file is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public License 15 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 16 17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */ 18 19 /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions 20 that handles the special case of the UTF-8 encoding. */ 21 22 /* Cf. unistr/u8-mbtouc.c. */ 23 unsigned char c = (unsigned char) p[0]; 24 25 if (c < 0x80) 26 { 27 if (pwc != NULL) 28 *pwc = c; 29 res = (c == 0 ? 0 : 1); 30 goto success; 31 } 32 if (c >= 0xc2) 33 { 34 if (c < 0xe0) 35 { 36 if (m == 1) 37 goto incomplete; 38 else /* m >= 2 */ 39 { 40 unsigned char c2 = (unsigned char) p[1]; 41 42 if ((c2 ^ 0x80) < 0x40) 43 { 44 if (pwc != NULL) 45 *pwc = ((unsigned int) (c & 0x1f) << 6) 46 | (unsigned int) (c2 ^ 0x80); 47 res = 2; 48 goto success; 49 } 50 } 51 } 52 else if (c < 0xf0) 53 { 54 if (m == 1) 55 goto incomplete; 56 else 57 { 58 unsigned char c2 = (unsigned char) p[1]; 59 60 if ((c2 ^ 0x80) < 0x40 61 && (c >= 0xe1 || c2 >= 0xa0) 62 && (c != 0xed || c2 < 0xa0)) 63 { 64 if (m == 2) 65 goto incomplete; 66 else /* m >= 3 */ 67 { 68 unsigned char c3 = (unsigned char) p[2]; 69 70 if ((c3 ^ 0x80) < 0x40) 71 { 72 unsigned int wc = 73 (((unsigned int) (c & 0x0f) << 12) 74 | ((unsigned int) (c2 ^ 0x80) << 6) 75 | (unsigned int) (c3 ^ 0x80)); 76 77 if (FITS_IN_CHAR_TYPE (wc)) 78 { 79 if (pwc != NULL) 80 *pwc = wc; 81 res = 3; 82 goto success; 83 } 84 } 85 } 86 } 87 } 88 } 89 else if (c <= 0xf4) 90 { 91 if (m == 1) 92 goto incomplete; 93 else 94 { 95 unsigned char c2 = (unsigned char) p[1]; 96 97 if ((c2 ^ 0x80) < 0x40 98 && (c >= 0xf1 || c2 >= 0x90) 99 && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90))) 100 { 101 if (m == 2) 102 goto incomplete; 103 else 104 { 105 unsigned char c3 = (unsigned char) p[2]; 106 107 if ((c3 ^ 0x80) < 0x40) 108 { 109 if (m == 3) 110 goto incomplete; 111 else /* m >= 4 */ 112 { 113 unsigned char c4 = (unsigned char) p[3]; 114 115 if ((c4 ^ 0x80) < 0x40) 116 { 117 unsigned int wc = 118 (((unsigned int) (c & 0x07) << 18) 119 | ((unsigned int) (c2 ^ 0x80) << 12) 120 | ((unsigned int) (c3 ^ 0x80) << 6) 121 | (unsigned int) (c4 ^ 0x80)); 122 123 if (FITS_IN_CHAR_TYPE (wc)) 124 { 125 if (pwc != NULL) 126 *pwc = wc; 127 res = 4; 128 goto success; 129 } 130 } 131 } 132 } 133 } 134 } 135 } 136 } 137 } 138 goto invalid; 139