xref: /Universal-ctags/gnulib/mbrtowc-impl-utf8.h (revision 820c1a8d46849a90376d8eb15b319ac05439f656)
1*820c1a8dSHiroo HAYASHI /* Convert multibyte character to wide character.
2*820c1a8dSHiroo HAYASHI    Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc.
3*820c1a8dSHiroo HAYASHI 
4*820c1a8dSHiroo HAYASHI    This file is free software: you can redistribute it and/or modify
5*820c1a8dSHiroo HAYASHI    it under the terms of the GNU Lesser General Public License as
6*820c1a8dSHiroo HAYASHI    published by the Free Software Foundation; either version 2.1 of the
7*820c1a8dSHiroo HAYASHI    License, or (at your option) any later version.
8*820c1a8dSHiroo HAYASHI 
9*820c1a8dSHiroo HAYASHI    This file is distributed in the hope that it will be useful,
10*820c1a8dSHiroo HAYASHI    but WITHOUT ANY WARRANTY; without even the implied warranty of
11*820c1a8dSHiroo HAYASHI    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*820c1a8dSHiroo HAYASHI    GNU Lesser General Public License for more details.
13*820c1a8dSHiroo HAYASHI 
14*820c1a8dSHiroo HAYASHI    You should have received a copy of the GNU Lesser General Public License
15*820c1a8dSHiroo HAYASHI    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16*820c1a8dSHiroo HAYASHI 
17*820c1a8dSHiroo HAYASHI /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18*820c1a8dSHiroo HAYASHI 
19*820c1a8dSHiroo HAYASHI /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
20*820c1a8dSHiroo HAYASHI    that handles the special case of the UTF-8 encoding.  */
21*820c1a8dSHiroo HAYASHI 
22*820c1a8dSHiroo HAYASHI         /* Cf. unistr/u8-mbtouc.c.  */
23*820c1a8dSHiroo HAYASHI         unsigned char c = (unsigned char) p[0];
24*820c1a8dSHiroo HAYASHI 
25*820c1a8dSHiroo HAYASHI         if (c < 0x80)
26*820c1a8dSHiroo HAYASHI           {
27*820c1a8dSHiroo HAYASHI             if (pwc != NULL)
28*820c1a8dSHiroo HAYASHI               *pwc = c;
29*820c1a8dSHiroo HAYASHI             res = (c == 0 ? 0 : 1);
30*820c1a8dSHiroo HAYASHI             goto success;
31*820c1a8dSHiroo HAYASHI           }
32*820c1a8dSHiroo HAYASHI         if (c >= 0xc2)
33*820c1a8dSHiroo HAYASHI           {
34*820c1a8dSHiroo HAYASHI             if (c < 0xe0)
35*820c1a8dSHiroo HAYASHI               {
36*820c1a8dSHiroo HAYASHI                 if (m == 1)
37*820c1a8dSHiroo HAYASHI                   goto incomplete;
38*820c1a8dSHiroo HAYASHI                 else /* m >= 2 */
39*820c1a8dSHiroo HAYASHI                   {
40*820c1a8dSHiroo HAYASHI                     unsigned char c2 = (unsigned char) p[1];
41*820c1a8dSHiroo HAYASHI 
42*820c1a8dSHiroo HAYASHI                     if ((c2 ^ 0x80) < 0x40)
43*820c1a8dSHiroo HAYASHI                       {
44*820c1a8dSHiroo HAYASHI                         if (pwc != NULL)
45*820c1a8dSHiroo HAYASHI                           *pwc = ((unsigned int) (c & 0x1f) << 6)
46*820c1a8dSHiroo HAYASHI                                  | (unsigned int) (c2 ^ 0x80);
47*820c1a8dSHiroo HAYASHI                         res = 2;
48*820c1a8dSHiroo HAYASHI                         goto success;
49*820c1a8dSHiroo HAYASHI                       }
50*820c1a8dSHiroo HAYASHI                   }
51*820c1a8dSHiroo HAYASHI               }
52*820c1a8dSHiroo HAYASHI             else if (c < 0xf0)
53*820c1a8dSHiroo HAYASHI               {
54*820c1a8dSHiroo HAYASHI                 if (m == 1)
55*820c1a8dSHiroo HAYASHI                   goto incomplete;
56*820c1a8dSHiroo HAYASHI                 else
57*820c1a8dSHiroo HAYASHI                   {
58*820c1a8dSHiroo HAYASHI                     unsigned char c2 = (unsigned char) p[1];
59*820c1a8dSHiroo HAYASHI 
60*820c1a8dSHiroo HAYASHI                     if ((c2 ^ 0x80) < 0x40
61*820c1a8dSHiroo HAYASHI                         && (c >= 0xe1 || c2 >= 0xa0)
62*820c1a8dSHiroo HAYASHI                         && (c != 0xed || c2 < 0xa0))
63*820c1a8dSHiroo HAYASHI                       {
64*820c1a8dSHiroo HAYASHI                         if (m == 2)
65*820c1a8dSHiroo HAYASHI                           goto incomplete;
66*820c1a8dSHiroo HAYASHI                         else /* m >= 3 */
67*820c1a8dSHiroo HAYASHI                           {
68*820c1a8dSHiroo HAYASHI                             unsigned char c3 = (unsigned char) p[2];
69*820c1a8dSHiroo HAYASHI 
70*820c1a8dSHiroo HAYASHI                             if ((c3 ^ 0x80) < 0x40)
71*820c1a8dSHiroo HAYASHI                               {
72*820c1a8dSHiroo HAYASHI                                 unsigned int wc =
73*820c1a8dSHiroo HAYASHI                                   (((unsigned int) (c & 0x0f) << 12)
74*820c1a8dSHiroo HAYASHI                                    | ((unsigned int) (c2 ^ 0x80) << 6)
75*820c1a8dSHiroo HAYASHI                                    | (unsigned int) (c3 ^ 0x80));
76*820c1a8dSHiroo HAYASHI 
77*820c1a8dSHiroo HAYASHI                                 if (FITS_IN_CHAR_TYPE (wc))
78*820c1a8dSHiroo HAYASHI                                   {
79*820c1a8dSHiroo HAYASHI                                     if (pwc != NULL)
80*820c1a8dSHiroo HAYASHI                                       *pwc = wc;
81*820c1a8dSHiroo HAYASHI                                     res = 3;
82*820c1a8dSHiroo HAYASHI                                     goto success;
83*820c1a8dSHiroo HAYASHI                                   }
84*820c1a8dSHiroo HAYASHI                               }
85*820c1a8dSHiroo HAYASHI                           }
86*820c1a8dSHiroo HAYASHI                       }
87*820c1a8dSHiroo HAYASHI                   }
88*820c1a8dSHiroo HAYASHI               }
89*820c1a8dSHiroo HAYASHI             else if (c <= 0xf4)
90*820c1a8dSHiroo HAYASHI               {
91*820c1a8dSHiroo HAYASHI                 if (m == 1)
92*820c1a8dSHiroo HAYASHI                   goto incomplete;
93*820c1a8dSHiroo HAYASHI                 else
94*820c1a8dSHiroo HAYASHI                   {
95*820c1a8dSHiroo HAYASHI                     unsigned char c2 = (unsigned char) p[1];
96*820c1a8dSHiroo HAYASHI 
97*820c1a8dSHiroo HAYASHI                     if ((c2 ^ 0x80) < 0x40
98*820c1a8dSHiroo HAYASHI                         && (c >= 0xf1 || c2 >= 0x90)
99*820c1a8dSHiroo HAYASHI                         && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90)))
100*820c1a8dSHiroo HAYASHI                       {
101*820c1a8dSHiroo HAYASHI                         if (m == 2)
102*820c1a8dSHiroo HAYASHI                           goto incomplete;
103*820c1a8dSHiroo HAYASHI                         else
104*820c1a8dSHiroo HAYASHI                           {
105*820c1a8dSHiroo HAYASHI                             unsigned char c3 = (unsigned char) p[2];
106*820c1a8dSHiroo HAYASHI 
107*820c1a8dSHiroo HAYASHI                             if ((c3 ^ 0x80) < 0x40)
108*820c1a8dSHiroo HAYASHI                               {
109*820c1a8dSHiroo HAYASHI                                 if (m == 3)
110*820c1a8dSHiroo HAYASHI                                   goto incomplete;
111*820c1a8dSHiroo HAYASHI                                 else /* m >= 4 */
112*820c1a8dSHiroo HAYASHI                                   {
113*820c1a8dSHiroo HAYASHI                                     unsigned char c4 = (unsigned char) p[3];
114*820c1a8dSHiroo HAYASHI 
115*820c1a8dSHiroo HAYASHI                                     if ((c4 ^ 0x80) < 0x40)
116*820c1a8dSHiroo HAYASHI                                       {
117*820c1a8dSHiroo HAYASHI                                         unsigned int wc =
118*820c1a8dSHiroo HAYASHI                                           (((unsigned int) (c & 0x07) << 18)
119*820c1a8dSHiroo HAYASHI                                            | ((unsigned int) (c2 ^ 0x80) << 12)
120*820c1a8dSHiroo HAYASHI                                            | ((unsigned int) (c3 ^ 0x80) << 6)
121*820c1a8dSHiroo HAYASHI                                            | (unsigned int) (c4 ^ 0x80));
122*820c1a8dSHiroo HAYASHI 
123*820c1a8dSHiroo HAYASHI                                         if (FITS_IN_CHAR_TYPE (wc))
124*820c1a8dSHiroo HAYASHI                                           {
125*820c1a8dSHiroo HAYASHI                                             if (pwc != NULL)
126*820c1a8dSHiroo HAYASHI                                               *pwc = wc;
127*820c1a8dSHiroo HAYASHI                                             res = 4;
128*820c1a8dSHiroo HAYASHI                                             goto success;
129*820c1a8dSHiroo HAYASHI                                           }
130*820c1a8dSHiroo HAYASHI                                       }
131*820c1a8dSHiroo HAYASHI                                   }
132*820c1a8dSHiroo HAYASHI                               }
133*820c1a8dSHiroo HAYASHI                           }
134*820c1a8dSHiroo HAYASHI                       }
135*820c1a8dSHiroo HAYASHI                   }
136*820c1a8dSHiroo HAYASHI               }
137*820c1a8dSHiroo HAYASHI           }
138*820c1a8dSHiroo HAYASHI         goto invalid;
139