]>
git.proxmox.com Git - mirror_edk2.git/blob - StdLib/LibC/Stdio/vfscanf.c
861fb5330176cb8c5ff0afd9fc183fc4849b7418
2 Implementation of scanf internals for <stdio.h>.
4 Copyright (c) 2010 - 2011, Intel Corporation. All rights reserved.<BR>
5 This program and the accompanying materials are licensed and made available
6 under the terms and conditions of the BSD License that accompanies this
7 distribution. The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 Copyright (c) 1990, 1993
14 The Regents of the University of California. All rights reserved.
16 This code is derived from software contributed to Berkeley by
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions
22 - Redistributions of source code must retain the above copyright
23 notice, this list of conditions and the following disclaimer.
24 - Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27 - Neither the name of the University nor the names of its contributors
28 may be used to endorse or promote products derived from this software
29 without specific prior written permission.
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
43 NetBSD: vfscanf.c,v 1.37.4.1 2007/05/07 19:49:08 pavel Exp
44 FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp
45 vfscanf.c 8.1 (Berkeley) 6/4/93
47 #include <LibConfig.h>
49 #include "namespace.h"
59 #include <sys/types.h>
63 #include "reentrant.h"
66 #ifndef NO_FLOATING_POINT
71 * Provide an external name for vfscanf. Note, we don't use the normal
72 * namespace.h method; stdio routines explicitly use the internal name
76 __weak_alias(vfscanf
,__svfscanf
)
79 #define BUF 513 /* Maximum length of numeric string. */
82 * Flags used during conversion.
84 #define LONG 0x0001 /* l: long or double */
85 #define LONGDBL 0x0002 /* L: long double */
86 #define SHORT 0x0004 /* h: short */
87 #define SUPPRESS 0x0008 /* *: suppress assignment */
88 #define POINTER 0x0010 /* p: void * (as hex) */
89 #define NOSKIP 0x0020 /* [ or c: do not skip blanks */
90 #define LONGLONG 0x0400 /* ll: long long (+ deprecated q: quad) */
91 #define INTMAXT 0x0800 /* j: intmax_t */
92 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
93 #define SIZET 0x2000 /* z: size_t */
94 #define SHORTSHORT 0x4000 /* hh: char */
95 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
98 * The following are used in integral conversions only:
99 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
101 #define SIGNOK 0x00040 /* +/- is (still) legal */
102 #define NDIGITS 0x00080 /* no digits detected */
103 #define PFXOK 0x00100 /* 0x prefix is (still) legal */
104 #define NZDIGITS 0x00200 /* no zero digits detected */
105 #define HAVESIGN 0x10000 /* sign detected */
110 #define CT_CHAR 0 /* %c conversion */
111 #define CT_CCL 1 /* %[...] conversion */
112 #define CT_STRING 2 /* %s conversion */
113 #define CT_INT 3 /* %[dioupxX] conversion */
114 #define CT_FLOAT 4 /* %[efgEFG] conversion */
116 static const u_char
*__sccl(char *, const u_char
*);
117 #ifndef NO_FLOATING_POINT
118 static int parsefloat(FILE *, char *, char *);
121 int __scanfdebug
= 0;
123 #define __collate_load_error /*CONSTCOND*/0
125 __collate_range_cmp(int c1
, int c2
)
127 static char s1
[2], s2
[2];
131 return strcoll(s1
, s2
);
136 * __svfscanf - MT-safe version
139 __svfscanf(FILE *fp
, char const *fmt0
, va_list ap
)
148 ret
= __svfscanf_unlocked(fp
, fmt0
, ap
);
154 * __svfscanf_unlocked - non-MT-safe version of __svfscanf
157 __svfscanf_unlocked(FILE *fp
, const char *fmt0
, va_list ap
)
159 const u_char
*fmt
= (const u_char
*)fmt0
;
160 int c
; /* character from format, or conversion */
161 size_t width
; /* field width, or 0 */
162 char *p
; /* points into all kinds of strings */
163 size_t n
; /* handy size_t */
164 int flags
; /* flags as defined above */
165 char *p0
; /* saves original value of p when necessary */
166 int nassigned
; /* number of fields assigned */
167 int nconversions
; /* number of conversions */
168 int nread
; /* number of characters consumed from fp */
169 int base
; /* base argument to conversion function */
170 char ccltab
[256]; /* character class table for %[...] */
171 char buf
[BUF
]; /* buffer for numeric and mb conversions */
172 wchar_t *wcp
; /* handy wide character pointer */
173 size_t nconv
; /* length of multibyte sequence converted */
174 static const mbstate_t initial
= { 0 };
177 /* `basefix' is used to avoid `if' tests in the integer scanner */
178 static const short basefix
[17] =
179 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
181 _DIAGASSERT(fp
!= NULL
);
182 _DIAGASSERT(fmt0
!= NULL
);
188 _SET_ORIENTATION(fp
, -1);
190 //Print(L"%a( %d, \"%a\", ...)\n", __func__, fp->_file, fmt0);
196 c
= (unsigned char)*fmt
++;
200 while ((fp
->_r
> 0 || __srefill(fp
) == 0) &&
202 nread
++, fp
->_r
--, fp
->_p
++;
205 //Print(L"%a: %d\n", __func__, __LINE__);
211 * switch on the format. continue if done;
212 * break once format type is derived.
215 //Print(L"%a: %d\n", __func__, __LINE__);
219 //Print(L"%a: %d\n", __func__, __LINE__);
220 if (fp
->_r
<= 0 && __srefill(fp
))
242 flags
|= LONGLONG
; /* not quite */
261 case '0': case '1': case '2': case '3': case '4':
262 case '5': case '6': case '7': case '8': case '9':
263 width
= width
* 10 + c
- '0';
293 flags
|= PFXOK
; /* enable 0x prefixing */
299 #ifndef NO_FLOATING_POINT
300 case 'A': case 'E': case 'F': case 'G':
301 case 'a': case 'e': case 'f': case 'g':
314 fmt
= __sccl(ccltab
, fmt
);
327 case 'p': /* pointer format is like hex */
328 flags
|= POINTER
| PFXOK
;
329 c
= CT_INT
; /* assumes sizeof(uintmax_t) */
330 flags
|= UNSIGNED
; /* >= sizeof(uintptr_t) */
336 if (flags
& SUPPRESS
) /* ??? */
338 if (flags
& SHORTSHORT
)
339 *va_arg(ap
, char *) = (char)nread
;
340 else if (flags
& SHORT
)
341 *va_arg(ap
, short *) = (short)nread
;
342 else if (flags
& LONG
)
343 *va_arg(ap
, long *) = nread
;
344 else if (flags
& LONGLONG
)
345 *va_arg(ap
, long long *) = nread
;
346 else if (flags
& INTMAXT
)
347 *va_arg(ap
, intmax_t *) = nread
;
348 else if (flags
& SIZET
)
349 *va_arg(ap
, size_t *) = nread
;
350 else if (flags
& PTRDIFFT
)
351 *va_arg(ap
, ptrdiff_t *) = nread
;
353 *va_arg(ap
, int *) = nread
;
360 * Disgusting backwards compatibility hack. XXX
362 case '\0': /* compat */
365 //Print(L"%a: %d\n", __func__, __LINE__);
368 * We have a conversion that requires input.
370 if (fp
->_r
<= 0 && __srefill(fp
))
372 //Print(L"%a: %d\n", __func__, __LINE__);
377 * Consume leading white space, except for formats
378 * that suppress this.
380 if ((flags
& NOSKIP
) == 0) {
381 while (isspace(*fp
->_p
)) {
385 else if (__srefill(fp
))
387 //Print(L"%a: %d\n", __func__, __LINE__);
392 * Note that there is at least one character in
393 * the buffer, so conversions that do not set NOSKIP
394 * ca no longer result in an input failure.
401 //Print(L"%a: %d\n", __func__, __LINE__);
405 /* scan arbitrary characters (sets NOSKIP) */
409 if ((flags
& SUPPRESS
) == 0)
410 wcp
= va_arg(ap
, wchar_t *);
415 if (n
== MB_CUR_MAX
) {
416 fp
->_flags
|= __SERR
;
423 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
424 if (nconv
== (size_t)-1) {
425 fp
->_flags
|= __SERR
;
428 if (nconv
== 0 && !(flags
& SUPPRESS
))
430 if (nconv
!= (size_t)-2) {
433 if (!(flags
& SUPPRESS
))
437 if (fp
->_r
<= 0 && __srefill(fp
)) {
439 fp
->_flags
|= __SERR
;
445 if (!(flags
& SUPPRESS
))
447 } else if (flags
& SUPPRESS
) {
450 if ((n
= fp
->_r
) < width
) {
461 fp
->_r
-= (int)width
;
468 size_t r
= fread(va_arg(ap
, char *), 1,
480 /* scan a (nonempty) character class (sets NOSKIP) */
482 width
= (size_t)~0; /* `infinity' */
483 /* take only those things in the class */
488 if ((flags
& SUPPRESS
) == 0)
489 wcp
= va_arg(ap
, wchar_t *);
495 if (n
== MB_CUR_MAX
) {
496 fp
->_flags
|= __SERR
;
503 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
504 if (nconv
== (size_t)-1) {
505 fp
->_flags
|= __SERR
;
510 if (nconv
!= (size_t)-2) {
511 if (wctob(*wcp
) != EOF
&&
512 !ccltab
[wctob(*wcp
)]) {
522 if (!(flags
& SUPPRESS
))
527 if (fp
->_r
<= 0 && __srefill(fp
)) {
529 fp
->_flags
|= __SERR
;
536 fp
->_flags
|= __SERR
;
542 if (!(flags
& SUPPRESS
)) {
546 } else if (flags
& SUPPRESS
) {
548 while (ccltab
[*fp
->_p
]) {
549 n
++, fp
->_r
--, fp
->_p
++;
552 if (fp
->_r
<= 0 && __srefill(fp
)) {
561 p0
= p
= va_arg(ap
, char *);
562 while (ccltab
[*fp
->_p
]) {
567 if (fp
->_r
<= 0 && __srefill(fp
)) {
584 /* like CCL, but zero-length string OK, & no NOSKIP */
590 if ((flags
& SUPPRESS
) == 0)
591 wcp
= va_arg(ap
, wchar_t *);
595 while (!isspace(*fp
->_p
) && width
!= 0) {
596 if (n
== MB_CUR_MAX
) {
597 fp
->_flags
|= __SERR
;
604 nconv
= mbrtowc(wcp
, buf
, n
, &mbs
);
605 if (nconv
== (size_t)-1) {
606 fp
->_flags
|= __SERR
;
611 if (nconv
!= (size_t)-2) {
612 if (iswspace(*wcp
)) {
622 if (!(flags
& SUPPRESS
))
626 if (fp
->_r
<= 0 && __srefill(fp
)) {
628 fp
->_flags
|= __SERR
;
634 if (!(flags
& SUPPRESS
)) {
638 } else if (flags
& SUPPRESS
) {
640 while (!isspace(*fp
->_p
)) {
641 n
++, fp
->_r
--, fp
->_p
++;
644 if (fp
->_r
<= 0 && __srefill(fp
))
649 p0
= p
= va_arg(ap
, char *);
650 while (!isspace(*fp
->_p
)) {
655 if (fp
->_r
<= 0 && __srefill(fp
))
659 nread
+= (int)(p
- p0
);
666 //Print(L"%a: %d\n", __func__, __LINE__);
667 /* scan an integer as if by the conversion function */
669 if (width
== 0 || width
> sizeof(buf
) - 1)
670 width
= sizeof(buf
) - 1;
672 /* size_t is unsigned, hence this optimisation */
673 if (--width
> sizeof(buf
) - 2)
674 width
= sizeof(buf
) - 2;
677 flags
|= SIGNOK
| NDIGITS
| NZDIGITS
;
678 for (p
= buf
; width
; width
--) {
681 * Switch on the character; `goto ok'
682 * if we accept it as a part of number.
687 * The digit 0 is always legal, but is
688 * special. For %i conversions, if no
689 * digits (zero or nonzero) have been
690 * scanned (only signs), we will have
691 * base==0. In that case, we should set
692 * it to 8 and enable 0x prefixing.
693 * Also, if we have not scanned zero digits
694 * before this, do not turn off prefixing
695 * (someone else will turn it off if we
696 * have scanned any nonzero digits).
703 if (flags
& NZDIGITS
)
704 flags
&= ~(SIGNOK
|NZDIGITS
|NDIGITS
);
706 flags
&= ~(SIGNOK
|PFXOK
|NDIGITS
);
709 /* 1 through 7 always legal */
710 case '1': case '2': case '3':
711 case '4': case '5': case '6': case '7':
712 base
= basefix
[base
];
713 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
716 /* digits 8 and 9 ok iff decimal or hex */
718 base
= basefix
[base
];
720 break; /* not legal here */
721 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
724 /* letters ok iff hex */
725 case 'A': case 'B': case 'C':
726 case 'D': case 'E': case 'F':
727 case 'a': case 'b': case 'c':
728 case 'd': case 'e': case 'f':
729 /* no need to fix base here */
731 break; /* not legal here */
732 flags
&= ~(SIGNOK
| PFXOK
| NDIGITS
);
735 /* sign ok only as first character */
737 if (flags
& SIGNOK
) {
745 * x ok iff flag still set & 2nd char (or
746 * 3rd char if we have a sign).
749 if (flags
& PFXOK
&& p
==
750 buf
+ 1 + !!(flags
& HAVESIGN
)) {
751 base
= 16; /* if %i */
759 * If we got here, c is not a legal character
760 * for a number. Stop accumulating digits.
765 * c is legal: store it and look at the next.
770 else if (__srefill(fp
))
774 * If we had only a sign, it is no good; push
775 * back the sign. If the number ends in `x',
776 * it was [sign] '0' 'x', so push back the x
777 * and treat it as [sign] '0'.
779 if (flags
& NDIGITS
) {
781 (void)ungetc(*(u_char
*)--p
, fp
);
784 c
= ((u_char
*)p
)[-1];
785 if (c
== 'x' || c
== 'X') {
789 if ((flags
& SUPPRESS
) == 0) {
791 // Use a union to get around the truncation warnings.
806 if ((flags
& UNSIGNED
) == 0)
807 res
.imax
= strtoimax(buf
, (char **)NULL
, base
);
809 res
.umax
= strtoumax(buf
, (char **)NULL
, base
);
811 *va_arg(ap
, void **) = res
.vp
;
812 //(void *)((uintptr_t)res);
813 else if (flags
& SHORTSHORT
)
814 *va_arg(ap
, char *) = res
.ch
;
815 else if (flags
& SHORT
)
816 *va_arg(ap
, short *) = res
.hw
;
817 else if (flags
& LONG
)
818 *va_arg(ap
, long *) = res
.lo
;
819 else if (flags
& LONGLONG
)
820 *va_arg(ap
, long long *) = res
.ll
;
821 else if (flags
& INTMAXT
)
822 *va_arg(ap
, intmax_t *) = res
.imax
;
823 else if (flags
& PTRDIFFT
)
824 *va_arg(ap
, ptrdiff_t *) = res
.pdt
;
826 else if (flags
& SIZET
)
827 *va_arg(ap
, size_t *) = res
.sz
;
829 *va_arg(ap
, int *) = res
.in
;
832 nread
+= (int)(p
- buf
);
834 //Print(L"%a: %d\n", __func__, __LINE__);
837 #ifndef NO_FLOATING_POINT
839 /* scan a floating point number as if by strtod */
840 if (width
== 0 || width
> sizeof(buf
) - 1)
841 width
= sizeof(buf
) - 1;
842 if ((width
= parsefloat(fp
, buf
, buf
+ width
)) == 0)
844 if ((flags
& SUPPRESS
) == 0) {
845 if (flags
& LONGDBL
) {
846 long double **mp
= (long double **)ap
;
847 long double res
= strtold(buf
, &p
);
850 ap
+= sizeof(long double *);
851 /*???*/ //*va_arg(ap, long double *) = res;
852 } else if (flags
& LONG
) {
853 double res
= strtod(buf
, &p
);
854 *va_arg(ap
, double *) = res
;
856 float res
= strtof(buf
, &p
);
857 *va_arg(ap
, float *) = res
;
859 if (__scanfdebug
&& p
- buf
!= (ptrdiff_t)width
)
866 #endif /* !NO_FLOATING_POINT */
870 //Print(L"%a: %d\n", __func__, __LINE__);
871 return (nconversions
!= 0 ? nassigned
: EOF
);
877 * Fill in the given table from the scanset at the given format
878 * (just after `['). Return a pointer to the character past the
879 * closing `]'. The table has a 1 wherever characters should be
880 * considered part of the scanset.
882 static const u_char
*
883 __sccl(char *tab
, const u_char
*fmt
)
887 _DIAGASSERT(tab
!= NULL
);
888 _DIAGASSERT(fmt
!= NULL
);
889 /* first `clear' the whole table */
890 c
= *fmt
++; /* first char hat => negated scanset */
892 v
= 1; /* default => accept */
893 c
= *fmt
++; /* get new first char */
895 v
= 0; /* default => reject */
897 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
898 (void)memset(tab
, v
, 256);
901 return (fmt
- 1);/* format ended before closing ] */
904 * Now set the entries corresponding to the actual scanset
905 * to the opposite of the above.
907 * The first character may be ']' (or '-') without being special;
908 * the last character may be '-'.
912 tab
[c
] = (char)v
; /* take character c */
914 n
= *fmt
++; /* and examine the next */
917 case 0: /* format ended too soon */
922 * A scanset of the form
924 * is defined as `the digit 0, the digit 1,
925 * the character +, the character -', but
926 * the effect of a scanset such as
928 * is implementation defined. The V7 Unix
929 * scanf treats `a-z' as `the letters a through
930 * z', but treats `a-a' as `the letter a, the
931 * character -, and the letter a'.
933 * For compatibility, the `-' is not considerd
934 * to define a range if the character following
935 * it is either a close bracket (required by ANSI)
936 * or is not numerically greater than the character
937 * we just stored in the table (c).
940 if (n
== ']' || (__collate_load_error
? n
< c
:
941 __collate_range_cmp(n
, c
) < 0)) {
943 break; /* resume the for(;;) */
946 /* fill in the range */
947 if (__collate_load_error
) {
952 for (i
= 0; i
< 256; i
++)
953 if (__collate_range_cmp(c
, i
) < 0 &&
954 __collate_range_cmp(i
, n
) <= 0)
957 #if 1 /* XXX another disgusting compatibility hack */
960 * Alas, the V7 Unix scanf also treats formats
961 * such as [a-c-e] as `the letters a through e'.
962 * This too is permitted by the standard....
973 case ']': /* end of scanset */
976 default: /* just another character */
984 #ifndef NO_FLOATING_POINT
986 parsefloat(FILE *fp
, char *buf
, char *end
)
991 S_START
, S_GOTSIGN
, S_INF
, S_NAN
, S_MAYBEHEX
,
992 S_DIGITS
, S_FRAC
, S_EXP
, S_EXPDIGITS
995 char decpt
= *localeconv()->decimal_point
;
996 _Bool gotmantdig
= 0, ishex
= 0;
1004 * We set commit = p whenever the string we have read so far
1005 * constitutes a valid representation of a floating point
1006 * number by itself. At some point, the parse will complete
1007 * or fail, and we will ungetc() back to the last commit point.
1008 * To ensure that the file offset gets updated properly, it is
1009 * always necessary to read at least one character that doesn't
1010 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1013 for (p
= buf
; p
< end
; ) {
1019 if (c
== '-' || c
== '+')
1043 if (infnanpos
> 6 ||
1044 (c
!= "nfinity"[infnanpos
] &&
1045 c
!= "NFINITY"[infnanpos
]))
1047 if (infnanpos
== 1 || infnanpos
== 6)
1048 commit
= p
; /* inf or infinity */
1052 switch (infnanpos
) {
1053 case -1: /* XXX kludge to deal with nan(...) */
1056 if (c
!= 'A' && c
!= 'a')
1060 if (c
!= 'N' && c
!= 'n')
1073 } else if (!isalnum(c
) && c
!= '_')
1081 if (c
== 'X' || c
== 'x') {
1084 } else { /* we saw a '0', but no 'x' */
1089 if ((ishex
&& isxdigit(c
)) || isdigit(c
))
1100 if (((c
== 'E' || c
== 'e') && !ishex
) ||
1101 ((c
== 'P' || c
== 'p') && ishex
)) {
1106 } else if ((ishex
&& isxdigit(c
)) || isdigit(c
)) {
1113 state
= S_EXPDIGITS
;
1114 if (c
== '-' || c
== '+')
1130 else if (__srefill(fp
))
1135 while (commit
< --p
)
1136 (void)ungetc(*(u_char
*)p
, fp
);
1138 return (int)(commit
- buf
);