1*a939078aSHiroo HAYASHI /* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2021
2*a939078aSHiroo HAYASHI Free Software Foundation, Inc.
3*a939078aSHiroo HAYASHI
4*a939078aSHiroo HAYASHI Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
5*a939078aSHiroo HAYASHI with help from Dan Sahlin (dan@sics.se) and
6*a939078aSHiroo HAYASHI commentary by Jim Blandy (jimb@ai.mit.edu);
7*a939078aSHiroo HAYASHI adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
8*a939078aSHiroo HAYASHI and implemented by Roland McGrath (roland@ai.mit.edu).
9*a939078aSHiroo HAYASHI
10*a939078aSHiroo HAYASHI NOTE: The canonical source of this file is maintained with the GNU C Library.
11*a939078aSHiroo HAYASHI Bugs can be reported to bug-glibc@prep.ai.mit.edu.
12*a939078aSHiroo HAYASHI
13*a939078aSHiroo HAYASHI This file is free software: you can redistribute it and/or modify
14*a939078aSHiroo HAYASHI it under the terms of the GNU Lesser General Public License as
15*a939078aSHiroo HAYASHI published by the Free Software Foundation; either version 2.1 of the
16*a939078aSHiroo HAYASHI License, or (at your option) any later version.
17*a939078aSHiroo HAYASHI
18*a939078aSHiroo HAYASHI This file is distributed in the hope that it will be useful,
19*a939078aSHiroo HAYASHI but WITHOUT ANY WARRANTY; without even the implied warranty of
20*a939078aSHiroo HAYASHI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21*a939078aSHiroo HAYASHI GNU Lesser General Public License for more details.
22*a939078aSHiroo HAYASHI
23*a939078aSHiroo HAYASHI You should have received a copy of the GNU Lesser General Public License
24*a939078aSHiroo HAYASHI along with this program. If not, see <https://www.gnu.org/licenses/>. */
25*a939078aSHiroo HAYASHI
26*a939078aSHiroo HAYASHI #ifndef _LIBC
27*a939078aSHiroo HAYASHI # include <config.h>
28*a939078aSHiroo HAYASHI #endif
29*a939078aSHiroo HAYASHI
30*a939078aSHiroo HAYASHI #include <string.h>
31*a939078aSHiroo HAYASHI
32*a939078aSHiroo HAYASHI #include <stddef.h>
33*a939078aSHiroo HAYASHI
34*a939078aSHiroo HAYASHI #if defined _LIBC
35*a939078aSHiroo HAYASHI # include <memcopy.h>
36*a939078aSHiroo HAYASHI #else
37*a939078aSHiroo HAYASHI # define reg_char char
38*a939078aSHiroo HAYASHI #endif
39*a939078aSHiroo HAYASHI
40*a939078aSHiroo HAYASHI #include <limits.h>
41*a939078aSHiroo HAYASHI
42*a939078aSHiroo HAYASHI #if HAVE_BP_SYM_H || defined _LIBC
43*a939078aSHiroo HAYASHI # include <bp-sym.h>
44*a939078aSHiroo HAYASHI #else
45*a939078aSHiroo HAYASHI # define BP_SYM(sym) sym
46*a939078aSHiroo HAYASHI #endif
47*a939078aSHiroo HAYASHI
48*a939078aSHiroo HAYASHI #undef __memchr
49*a939078aSHiroo HAYASHI #ifdef _LIBC
50*a939078aSHiroo HAYASHI # undef memchr
51*a939078aSHiroo HAYASHI #endif
52*a939078aSHiroo HAYASHI
53*a939078aSHiroo HAYASHI #ifndef weak_alias
54*a939078aSHiroo HAYASHI # define __memchr memchr
55*a939078aSHiroo HAYASHI #endif
56*a939078aSHiroo HAYASHI
57*a939078aSHiroo HAYASHI /* Search no more than N bytes of S for C. */
58*a939078aSHiroo HAYASHI void *
__memchr(void const * s,int c_in,size_t n)59*a939078aSHiroo HAYASHI __memchr (void const *s, int c_in, size_t n)
60*a939078aSHiroo HAYASHI {
61*a939078aSHiroo HAYASHI /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
62*a939078aSHiroo HAYASHI long instead of a 64-bit uintmax_t tends to give better
63*a939078aSHiroo HAYASHI performance. On 64-bit hardware, unsigned long is generally 64
64*a939078aSHiroo HAYASHI bits already. Change this typedef to experiment with
65*a939078aSHiroo HAYASHI performance. */
66*a939078aSHiroo HAYASHI typedef unsigned long int longword;
67*a939078aSHiroo HAYASHI
68*a939078aSHiroo HAYASHI const unsigned char *char_ptr;
69*a939078aSHiroo HAYASHI const longword *longword_ptr;
70*a939078aSHiroo HAYASHI longword repeated_one;
71*a939078aSHiroo HAYASHI longword repeated_c;
72*a939078aSHiroo HAYASHI unsigned reg_char c;
73*a939078aSHiroo HAYASHI
74*a939078aSHiroo HAYASHI c = (unsigned char) c_in;
75*a939078aSHiroo HAYASHI
76*a939078aSHiroo HAYASHI /* Handle the first few bytes by reading one byte at a time.
77*a939078aSHiroo HAYASHI Do this until CHAR_PTR is aligned on a longword boundary. */
78*a939078aSHiroo HAYASHI for (char_ptr = (const unsigned char *) s;
79*a939078aSHiroo HAYASHI n > 0 && (size_t) char_ptr % sizeof (longword) != 0;
80*a939078aSHiroo HAYASHI --n, ++char_ptr)
81*a939078aSHiroo HAYASHI if (*char_ptr == c)
82*a939078aSHiroo HAYASHI return (void *) char_ptr;
83*a939078aSHiroo HAYASHI
84*a939078aSHiroo HAYASHI longword_ptr = (const longword *) char_ptr;
85*a939078aSHiroo HAYASHI
86*a939078aSHiroo HAYASHI /* All these elucidatory comments refer to 4-byte longwords,
87*a939078aSHiroo HAYASHI but the theory applies equally well to any size longwords. */
88*a939078aSHiroo HAYASHI
89*a939078aSHiroo HAYASHI /* Compute auxiliary longword values:
90*a939078aSHiroo HAYASHI repeated_one is a value which has a 1 in every byte.
91*a939078aSHiroo HAYASHI repeated_c has c in every byte. */
92*a939078aSHiroo HAYASHI repeated_one = 0x01010101;
93*a939078aSHiroo HAYASHI repeated_c = c | (c << 8);
94*a939078aSHiroo HAYASHI repeated_c |= repeated_c << 16;
95*a939078aSHiroo HAYASHI if (0xffffffffU < (longword) -1)
96*a939078aSHiroo HAYASHI {
97*a939078aSHiroo HAYASHI repeated_one |= repeated_one << 31 << 1;
98*a939078aSHiroo HAYASHI repeated_c |= repeated_c << 31 << 1;
99*a939078aSHiroo HAYASHI if (8 < sizeof (longword))
100*a939078aSHiroo HAYASHI {
101*a939078aSHiroo HAYASHI size_t i;
102*a939078aSHiroo HAYASHI
103*a939078aSHiroo HAYASHI for (i = 64; i < sizeof (longword) * 8; i *= 2)
104*a939078aSHiroo HAYASHI {
105*a939078aSHiroo HAYASHI repeated_one |= repeated_one << i;
106*a939078aSHiroo HAYASHI repeated_c |= repeated_c << i;
107*a939078aSHiroo HAYASHI }
108*a939078aSHiroo HAYASHI }
109*a939078aSHiroo HAYASHI }
110*a939078aSHiroo HAYASHI
111*a939078aSHiroo HAYASHI /* Instead of the traditional loop which tests each byte, we will test a
112*a939078aSHiroo HAYASHI longword at a time. The tricky part is testing if *any of the four*
113*a939078aSHiroo HAYASHI bytes in the longword in question are equal to c. We first use an xor
114*a939078aSHiroo HAYASHI with repeated_c. This reduces the task to testing whether *any of the
115*a939078aSHiroo HAYASHI four* bytes in longword1 is zero.
116*a939078aSHiroo HAYASHI
117*a939078aSHiroo HAYASHI We compute tmp =
118*a939078aSHiroo HAYASHI ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
119*a939078aSHiroo HAYASHI That is, we perform the following operations:
120*a939078aSHiroo HAYASHI 1. Subtract repeated_one.
121*a939078aSHiroo HAYASHI 2. & ~longword1.
122*a939078aSHiroo HAYASHI 3. & a mask consisting of 0x80 in every byte.
123*a939078aSHiroo HAYASHI Consider what happens in each byte:
124*a939078aSHiroo HAYASHI - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
125*a939078aSHiroo HAYASHI and step 3 transforms it into 0x80. A carry can also be propagated
126*a939078aSHiroo HAYASHI to more significant bytes.
127*a939078aSHiroo HAYASHI - If a byte of longword1 is nonzero, let its lowest 1 bit be at
128*a939078aSHiroo HAYASHI position k (0 <= k <= 7); so the lowest k bits are 0. After step 1,
129*a939078aSHiroo HAYASHI the byte ends in a single bit of value 0 and k bits of value 1.
130*a939078aSHiroo HAYASHI After step 2, the result is just k bits of value 1: 2^k - 1. After
131*a939078aSHiroo HAYASHI step 3, the result is 0. And no carry is produced.
132*a939078aSHiroo HAYASHI So, if longword1 has only non-zero bytes, tmp is zero.
133*a939078aSHiroo HAYASHI Whereas if longword1 has a zero byte, call j the position of the least
134*a939078aSHiroo HAYASHI significant zero byte. Then the result has a zero at positions 0, ...,
135*a939078aSHiroo HAYASHI j-1 and a 0x80 at position j. We cannot predict the result at the more
136*a939078aSHiroo HAYASHI significant bytes (positions j+1..3), but it does not matter since we
137*a939078aSHiroo HAYASHI already have a non-zero bit at position 8*j+7.
138*a939078aSHiroo HAYASHI
139*a939078aSHiroo HAYASHI So, the test whether any byte in longword1 is zero is equivalent to
140*a939078aSHiroo HAYASHI testing whether tmp is nonzero. */
141*a939078aSHiroo HAYASHI
142*a939078aSHiroo HAYASHI while (n >= sizeof (longword))
143*a939078aSHiroo HAYASHI {
144*a939078aSHiroo HAYASHI longword longword1 = *longword_ptr ^ repeated_c;
145*a939078aSHiroo HAYASHI
146*a939078aSHiroo HAYASHI if ((((longword1 - repeated_one) & ~longword1)
147*a939078aSHiroo HAYASHI & (repeated_one << 7)) != 0)
148*a939078aSHiroo HAYASHI break;
149*a939078aSHiroo HAYASHI longword_ptr++;
150*a939078aSHiroo HAYASHI n -= sizeof (longword);
151*a939078aSHiroo HAYASHI }
152*a939078aSHiroo HAYASHI
153*a939078aSHiroo HAYASHI char_ptr = (const unsigned char *) longword_ptr;
154*a939078aSHiroo HAYASHI
155*a939078aSHiroo HAYASHI /* At this point, we know that either n < sizeof (longword), or one of the
156*a939078aSHiroo HAYASHI sizeof (longword) bytes starting at char_ptr is == c. On little-endian
157*a939078aSHiroo HAYASHI machines, we could determine the first such byte without any further
158*a939078aSHiroo HAYASHI memory accesses, just by looking at the tmp result from the last loop
159*a939078aSHiroo HAYASHI iteration. But this does not work on big-endian machines. Choose code
160*a939078aSHiroo HAYASHI that works in both cases. */
161*a939078aSHiroo HAYASHI
162*a939078aSHiroo HAYASHI for (; n > 0; --n, ++char_ptr)
163*a939078aSHiroo HAYASHI {
164*a939078aSHiroo HAYASHI if (*char_ptr == c)
165*a939078aSHiroo HAYASHI return (void *) char_ptr;
166*a939078aSHiroo HAYASHI }
167*a939078aSHiroo HAYASHI
168*a939078aSHiroo HAYASHI return NULL;
169*a939078aSHiroo HAYASHI }
170*a939078aSHiroo HAYASHI #ifdef weak_alias
171*a939078aSHiroo HAYASHI weak_alias (__memchr, BP_SYM (memchr))
172*a939078aSHiroo HAYASHI #endif
173