]>
git.proxmox.com Git - mirror_edk2.git/blob - StdLib/LibC/Stdio/vfscanf.c
2 Implementation of scanf internals for <stdio.h>.
4 Copyright (c) 2010 - 2014, Intel Corporation. All rights reserved.<BR>
5 This program and the accompanying materials are licensed and made available
6 under the terms and conditions of the BSD License that accompanies this
7 distribution. The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 Copyright (c) 1990, 1993
14 The Regents of the University of California. All rights reserved.
16 This code is derived from software contributed to Berkeley by
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions
22 - Redistributions of source code must retain the above copyright
23 notice, this list of conditions and the following disclaimer.
24 - Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27 - Neither the name of the University nor the names of its contributors
28 may be used to endorse or promote products derived from this software
29 without specific prior written permission.
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
43 NetBSD: vfscanf.c,v 1.37.4.1 2007/05/07 19:49:08 pavel Exp
44 FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp
45 vfscanf.c 8.1 (Berkeley) 6/4/93
47 #include <LibConfig.h>
49 #include "namespace.h"
59 #include <sys/types.h>
63 #include "reentrant.h"
66 #ifndef NO_FLOATING_POINT
71 * Provide an external name for vfscanf. Note, EFI uses the normal
72 * namespace.h method; stdio routines explicitly use the internal name
76 __weak_alias(vfscanf
,__svfscanf
)
79 #define BUF 513 /* Maximum length of numeric string. */
82 * Flags used during conversion.
84 #define LONG 0x0001 /* l: long or double */
85 #define LONGDBL 0x0002 /* L: long double */
86 #define SHORT 0x0004 /* h: short */
87 #define SUPPRESS 0x0008 /* *: suppress assignment */
88 #define POINTER 0x0010 /* p: void * (as hex) */
89 #define NOSKIP 0x0020 /* [ or c: do not skip blanks */
90 #define LONGLONG 0x0400 /* ll: long long (+ deprecated q: quad) */
91 #define INTMAXT 0x0800 /* j: intmax_t */
92 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
93 #define SIZET 0x2000 /* z: size_t */
94 #define SHORTSHORT 0x4000 /* hh: char */
95 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
98 * The following are used in integral conversions only:
99 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
101 #define SIGNOK 0x00040 /* +/- is (still) legal */
102 #define NDIGITS 0x00080 /* no digits detected */
103 #define PFXOK 0x00100 /* 0x prefix is (still) legal */
104 #define NZDIGITS 0x00200 /* no zero digits detected */
105 #define HAVESIGN 0x10000 /* sign detected */
110 #define CT_CHAR 0 /* %c conversion */
111 #define CT_CCL 1 /* %[...] conversion */
112 #define CT_STRING 2 /* %s conversion */
113 #define CT_INT 3 /* %[dioupxX] conversion */
114 #define CT_FLOAT 4 /* %[efgEFG] conversion */
116 static const u_char
*__sccl(char *, const u_char
*);
117 #ifndef NO_FLOATING_POINT
118 static int parsefloat(FILE *, char *, char *);
121 int __scanfdebug
= 0;
123 #define __collate_load_error /*CONSTCOND*/0
125 __collate_range_cmp(int c1
, int c2
)
127 static char s1
[2] = { 0 };
128 static char s2
[2] = { 0 };
132 return strcoll(s1
, s2
);
137 * __svfscanf - MT-safe version
140 __svfscanf(FILE *fp
, char const *fmt0
, va_list ap
)
149 ret
= __svfscanf_unlocked(fp
, fmt0
, ap
);
155 * __svfscanf_unlocked - non-MT-safe version of __svfscanf
158 __svfscanf_unlocked(FILE *fp
, const char *fmt0
, va_list ap
)
160 const u_char
*fmt
= (const u_char
*)fmt0
;
161 int c
; /* character from format, or conversion */
162 size_t width
; /* field width, or 0 */
163 char *p
; /* points into all kinds of strings */
164 size_t n
; /* handy size_t */
165 int flags
; /* flags as defined above */
166 char *p0
; /* saves original value of p when necessary */
167 int nassigned
; /* number of fields assigned */
168 int nconversions
; /* number of conversions */
169 int nread
; /* number of characters consumed from fp */
170 int base
; /* base argument to conversion function */
171 char ccltab
[256]; /* character class table for %[...] */
172 char buf
[BUF
]; /* buffer for numeric and mb conversions */
173 wchar_t *wcp
; /* handy wide character pointer */
174 size_t nconv
; /* length of multibyte sequence converted */
175 static const mbstate_t initial
= { 0 };
178 /* `basefix' is used to avoid `if' tests in the integer scanner */
179 static const short basefix
[17] =
180 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
182 _DIAGASSERT(fp
!= NULL
);
183 _DIAGASSERT(fmt0
!= NULL
);
189 _SET_ORIENTATION(fp
, -1);
191 //Print(L"%a( %d, \"%a\", ...)\n", __func__, fp->_file, fmt0);
197 c
= (unsigned char)*fmt
++;
201 while ((fp
->_r
> 0 || __srefill(fp
) == 0) &&
203 nread
++, fp
->_r
--, fp
->_p
++;
206 //Print(L"%a: %d\n", __func__, __LINE__);
212 * switch on the format. continue if done;
213 * break once format type is derived.
216 //Print(L"%a: %d\n", __func__, __LINE__);
220 //Print(L"%a: %d\n", __func__, __LINE__);
221 if (fp
->_r
<= 0 && __srefill(fp
))
243 flags
|= LONGLONG
; /* not quite */
262 case '0': case '1': case '2': case '3': case '4':
263 case '5': case '6': case '7': case '8': case '9':
264 width
= width
* 10 + c
- '0';
294 flags
|= PFXOK
; /* enable 0x prefixing */
300 #ifndef NO_FLOATING_POINT
301 case 'A': case 'E': case 'F': case 'G':
302 case 'a': case 'e': case 'f': case 'g':
315 fmt
= __sccl(ccltab
, fmt
);
328 case 'p': /* pointer format is like hex */
329 flags
|= POINTER
| PFXOK
;
330 c
= CT_INT
; /* assumes sizeof(uintmax_t) */
331 flags
|= UNSIGNED
; /* >= sizeof(uintptr_t) */
337 if (flags
& SUPPRESS
) /* ??? */
339 if (flags
& SHORTSHORT
)
340 *va_arg(ap
, char *) = (char)nread
;
341 else if (flags
& SHORT
)
342 *va_arg(ap
, short *) = (short)nread
;
343 else if (flags
& LONG
)
344 *va_arg(ap
, long *) = nread
;
345 else if (flags
& LONGLONG
)
346 *va_arg(ap
, long long *) = nread
;
347 else if (flags
& INTMAXT
)
348 *va_arg(ap
, intmax_t *) = nread
;
349 else if (flags
& SIZET
)
350 *va_arg(ap
, size_t *) = nread
;
351 else if (flags
& PTRDIFFT
)
352 *va_arg(ap
, ptrdiff_t *) = nread
;
354 *va_arg(ap
, int *) = nread
;
361 * Disgusting backwards compatibility hack. XXX
363 case '\0': /* compat */
366 //Print(L"%a: %d\n", __func__, __LINE__);
369 * We have a conversion that requires input.
371 if (fp
->_r
<= 0 && __srefill(fp
))
373 //Print(L"%a: %d\n", __func__, __LINE__);
378 * Consume leading white space, except for formats
379 * that suppress this.
381 if ((flags
& NOSKIP
) == 0) {
382 while (isspace(*fp
->_p
)) {
386 else if (__srefill(fp
))
388 //Print(L"%a: %d\n", __func__, __LINE__);
393 * Note that there is at least one character in
394 * the buffer, so conversions that do not set NOSKIP
395 * ca no longer result in an input failure.
402 //Print(L"%a: %d\n", __func__, __LINE__);
406 /* scan arbitrary characters (sets NOSKIP) */
410 if ((flags
& SUPPRESS
) == 0)
411 wcp
= va_arg(ap
, wchar_t *);
416 if (n
== MB_CUR_MAX
) {
417 fp
->_flags
|= __SERR
;
424 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
425 if (nconv
== (size_t)-1) {
426 fp
->_flags
|= __SERR
;
429 if (nconv
== 0 && !(flags
& SUPPRESS
))
431 if (nconv
!= (size_t)-2) {
434 if (!(flags
& SUPPRESS
))
438 if (fp
->_r
<= 0 && __srefill(fp
)) {
440 fp
->_flags
|= __SERR
;
446 if (!(flags
& SUPPRESS
))
448 } else if (flags
& SUPPRESS
) {
451 if ((n
= fp
->_r
) < width
) {
462 fp
->_r
-= (int)width
;
469 size_t r
= fread(va_arg(ap
, char *), 1,
481 /* scan a (nonempty) character class (sets NOSKIP) */
483 width
= (size_t)~0; /* `infinity' */
484 /* take only those things in the class */
489 if ((flags
& SUPPRESS
) == 0)
490 wcp
= va_arg(ap
, wchar_t *);
496 if (n
== MB_CUR_MAX
) {
497 fp
->_flags
|= __SERR
;
504 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
505 if (nconv
== (size_t)-1) {
506 fp
->_flags
|= __SERR
;
511 if (nconv
!= (size_t)-2) {
512 if (wctob(*wcp
) != EOF
&&
513 !ccltab
[wctob(*wcp
)]) {
523 if (!(flags
& SUPPRESS
))
528 if (fp
->_r
<= 0 && __srefill(fp
)) {
530 fp
->_flags
|= __SERR
;
537 fp
->_flags
|= __SERR
;
543 if (!(flags
& SUPPRESS
)) {
547 } else if (flags
& SUPPRESS
) {
549 while (ccltab
[*fp
->_p
]) {
550 n
++, fp
->_r
--, fp
->_p
++;
553 if (fp
->_r
<= 0 && __srefill(fp
)) {
562 p0
= p
= va_arg(ap
, char *);
563 while (ccltab
[*fp
->_p
]) {
568 if (fp
->_r
<= 0 && __srefill(fp
)) {
585 /* like CCL, but zero-length string OK, & no NOSKIP */
591 if ((flags
& SUPPRESS
) == 0)
592 wcp
= va_arg(ap
, wchar_t *);
596 while (!isspace(*fp
->_p
) && width
!= 0) {
597 if (n
== MB_CUR_MAX
) {
598 fp
->_flags
|= __SERR
;
605 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
606 if (nconv
== (size_t)-1) {
607 fp
->_flags
|= __SERR
;
612 if (nconv
!= (size_t)-2) {
613 if (iswspace(*wcp
)) {
623 if (!(flags
& SUPPRESS
))
627 if (fp
->_r
<= 0 && __srefill(fp
)) {
629 fp
->_flags
|= __SERR
;
635 if (!(flags
& SUPPRESS
)) {
639 } else if (flags
& SUPPRESS
) {
641 while (!isspace(*fp
->_p
)) {
642 n
++, fp
->_r
--, fp
->_p
++;
645 if (fp
->_r
<= 0 && __srefill(fp
))
650 p0
= p
= va_arg(ap
, char *);
651 while (!isspace(*fp
->_p
)) {
656 if (fp
->_r
<= 0 && __srefill(fp
))
660 nread
+= (int)(p
- p0
);
667 //Print(L"%a: %d\n", __func__, __LINE__);
668 /* scan an integer as if by the conversion function */
670 if (width
== 0 || width
> sizeof(buf
) - 1)
671 width
= sizeof(buf
) - 1;
673 /* size_t is unsigned, hence this optimisation */
674 if (--width
> sizeof(buf
) - 2)
675 width
= sizeof(buf
) - 2;
678 flags
|= SIGNOK
| NDIGITS
| NZDIGITS
;
679 for (p
= buf
; width
; width
--) {
682 * Switch on the character; `goto ok'
683 * if we accept it as a part of number.
688 * The digit 0 is always legal, but is
689 * special. For %i conversions, if no
690 * digits (zero or nonzero) have been
691 * scanned (only signs), we will have
692 * base==0. In that case, we should set
693 * it to 8 and enable 0x prefixing.
694 * Also, if we have not scanned zero digits
695 * before this, do not turn off prefixing
696 * (someone else will turn it off if we
697 * have scanned any nonzero digits).
704 if (flags
& NZDIGITS
)
705 flags
&= ~(SIGNOK
|NZDIGITS
|NDIGITS
);
707 flags
&= ~(SIGNOK
|PFXOK
|NDIGITS
);
710 /* 1 through 7 always legal */
711 case '1': case '2': case '3':
712 case '4': case '5': case '6': case '7':
713 base
= basefix
[base
];
714 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
717 /* digits 8 and 9 ok iff decimal or hex */
719 base
= basefix
[base
];
721 break; /* not legal here */
722 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
725 /* letters ok iff hex */
726 case 'A': case 'B': case 'C':
727 case 'D': case 'E': case 'F':
728 case 'a': case 'b': case 'c':
729 case 'd': case 'e': case 'f':
730 /* no need to fix base here */
732 break; /* not legal here */
733 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
736 /* sign ok only as first character */
738 if (flags
& SIGNOK
) {
746 * x ok iff flag still set & 2nd char (or
747 * 3rd char if we have a sign).
750 if (flags
& PFXOK
&& p
==
751 buf
+ 1 + !!(flags
& HAVESIGN
)) {
752 base
= 16; /* if %i */
760 * If we got here, c is not a legal character
761 * for a number. Stop accumulating digits.
766 * c is legal: store it and look at the next.
771 else if (__srefill(fp
))
775 * If we had only a sign, it is no good; push
776 * back the sign. If the number ends in `x',
777 * it was [sign] '0' 'x', so push back the x
778 * and treat it as [sign] '0'.
780 if (flags
& NDIGITS
) {
782 (void)ungetc(*(u_char
*)--p
, fp
);
785 c
= ((u_char
*)p
)[-1];
786 if (c
== 'x' || c
== 'X') {
790 if ((flags
& SUPPRESS
) == 0) {
792 // Use a union to get around the truncation warnings.
807 if ((flags
& UNSIGNED
) == 0)
808 res
.imax
= strtoimax(buf
, (char **)NULL
, base
);
810 res
.umax
= strtoumax(buf
, (char **)NULL
, base
);
812 *va_arg(ap
, void **) = res
.vp
;
813 //(void *)((uintptr_t)res);
814 else if (flags
& SHORTSHORT
)
815 *va_arg(ap
, char *) = res
.ch
;
816 else if (flags
& SHORT
)
817 *va_arg(ap
, short *) = res
.hw
;
818 else if (flags
& LONG
)
819 *va_arg(ap
, long *) = res
.lo
;
820 else if (flags
& LONGLONG
)
821 *va_arg(ap
, long long *) = res
.ll
;
822 else if (flags
& INTMAXT
)
823 *va_arg(ap
, intmax_t *) = res
.imax
;
824 else if (flags
& PTRDIFFT
)
825 *va_arg(ap
, ptrdiff_t *) = res
.pdt
;
827 else if (flags
& SIZET
)
828 *va_arg(ap
, size_t *) = res
.sz
;
830 *va_arg(ap
, int *) = res
.in
;
833 nread
+= (int)(p
- buf
);
835 //Print(L"%a: %d\n", __func__, __LINE__);
838 #ifndef NO_FLOATING_POINT
840 /* scan a floating point number as if by strtod */
841 if (width
== 0 || width
> sizeof(buf
) - 1)
842 width
= sizeof(buf
) - 1;
843 if ((width
= parsefloat(fp
, buf
, buf
+ width
)) == 0)
845 if ((flags
& SUPPRESS
) == 0) {
846 if (flags
& LONGDBL
) {
847 long double res
= strtold(buf
, &p
);
848 *va_arg(ap
, long double *) = res
;
849 } else if (flags
& LONG
) {
850 double res
= strtod(buf
, &p
);
851 *va_arg(ap
, double *) = res
;
853 float res
= strtof(buf
, &p
);
854 *va_arg(ap
, float *) = res
;
856 if (__scanfdebug
&& p
- buf
!= (ptrdiff_t)width
)
863 #endif /* !NO_FLOATING_POINT */
867 //Print(L"%a: %d\n", __func__, __LINE__);
868 return (nconversions
!= 0 ? nassigned
: EOF
);
874 * Fill in the given table from the scanset at the given format
875 * (just after `['). Return a pointer to the character past the
876 * closing `]'. The table has a 1 wherever characters should be
877 * considered part of the scanset.
879 static const u_char
*
880 __sccl(char *tab
, const u_char
*fmt
)
884 _DIAGASSERT(tab
!= NULL
);
885 _DIAGASSERT(fmt
!= NULL
);
886 /* first `clear' the whole table */
887 c
= *fmt
++; /* first char hat => negated scanset */
889 v
= 1; /* default => accept */
890 c
= *fmt
++; /* get new first char */
892 v
= 0; /* default => reject */
894 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
895 (void)memset(tab
, v
, 256);
898 return (fmt
- 1);/* format ended before closing ] */
901 * Now set the entries corresponding to the actual scanset
902 * to the opposite of the above.
904 * The first character may be ']' (or '-') without being special;
905 * the last character may be '-'.
909 tab
[c
] = (char)v
; /* take character c */
911 n
= *fmt
++; /* and examine the next */
914 case 0: /* format ended too soon */
919 * A scanset of the form
921 * is defined as `the digit 0, the digit 1,
922 * the character +, the character -', but
923 * the effect of a scanset such as
925 * is implementation defined. The V7 Unix
926 * scanf treats `a-z' as `the letters a through
927 * z', but treats `a-a' as `the letter a, the
928 * character -, and the letter a'.
930 * For compatibility, the `-' is not considerd
931 * to define a range if the character following
932 * it is either a close bracket (required by ANSI)
933 * or is not numerically greater than the character
934 * we just stored in the table (c).
937 if (n
== ']' || (__collate_load_error
? n
< c
:
938 __collate_range_cmp(n
, c
) < 0)) {
940 break; /* resume the for(;;) */
943 /* fill in the range */
944 if (__collate_load_error
) {
949 for (i
= 0; i
< 256; i
++)
950 if (__collate_range_cmp(c
, i
) < 0 &&
951 __collate_range_cmp(i
, n
) <= 0)
954 #if 1 /* XXX another disgusting compatibility hack */
957 * Alas, the V7 Unix scanf also treats formats
958 * such as [a-c-e] as `the letters a through e'.
959 * This too is permitted by the standard....
970 case ']': /* end of scanset */
973 default: /* just another character */
981 #ifndef NO_FLOATING_POINT
983 parsefloat(FILE *fp
, char *buf
, char *end
)
988 S_START
, S_GOTSIGN
, S_INF
, S_NAN
, S_MAYBEHEX
,
989 S_DIGITS
, S_FRAC
, S_EXP
, S_EXPDIGITS
992 char decpt
= *localeconv()->decimal_point
;
993 _Bool gotmantdig
= 0, ishex
= 0;
1001 * We set commit = p whenever the string we have read so far
1002 * constitutes a valid representation of a floating point
1003 * number by itself. At some point, the parse will complete
1004 * or fail, and we will ungetc() back to the last commit point.
1005 * To ensure that the file offset gets updated properly, it is
1006 * always necessary to read at least one character that doesn't
1007 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1010 for (p
= buf
; p
< end
; ) {
1016 if (c
== '-' || c
== '+')
1040 if (infnanpos
> 6 ||
1041 (c
!= "nfinity"[infnanpos
] &&
1042 c
!= "NFINITY"[infnanpos
]))
1044 if (infnanpos
== 1 || infnanpos
== 6)
1045 commit
= p
; /* inf or infinity */
1049 switch (infnanpos
) {
1050 case -1: /* XXX kludge to deal with nan(...) */
1053 if (c
!= 'A' && c
!= 'a')
1057 if (c
!= 'N' && c
!= 'n')
1070 } else if (!isalnum(c
) && c
!= '_')
1078 if (c
== 'X' || c
== 'x') {
1081 } else { /* we saw a '0', but no 'x' */
1086 if ((ishex
&& isxdigit(c
)) || isdigit(c
))
1097 if (((c
== 'E' || c
== 'e') && !ishex
) ||
1098 ((c
== 'P' || c
== 'p') && ishex
)) {
1103 } else if ((ishex
&& isxdigit(c
)) || isdigit(c
)) {
1110 state
= S_EXPDIGITS
;
1111 if (c
== '-' || c
== '+')
1127 else if (__srefill(fp
))
1132 while (commit
< --p
)
1133 (void)ungetc(*(u_char
*)p
, fp
);
1135 return (int)(commit
- buf
);