logoalt Hacker News

noriryesterday at 6:38 PM1 replyview on HN

This is not a hard thing to do without using a library. The code below is easily adapted to the unsigned case and/or arbitrary base rather than 10.

    #include <stdio.h>
    int main(int argc, char **argv) {
        if (argc != 2) {
            fprintf(stderr, "usage: require one numeric argument");
        }
        char *nump = argv[1];
        unsigned neg = 0;
        unsigned long long ures = 0;
        if (*nump == '-') {
            neg = 1;
            nump = nump + 1;
        }
        if (!*nump) {
            fprintf(stderr, "require non empty string\n");
            return 1;
        }
        char b;
        while (b = *nump++) {
            if (b >= '0' && b <= '9') {
                unsigned long long nres = (ures * 10) + (b - '0'); 
                if (nres < ures) {
                    fprintf(stderr, "overflow in '%s'\n", argv[1]);
                    return 1;
                }   
                ures = nres;
            } else {
                if (b >= ' ') {
                    fprintf(stderr, "invalid char '%c' in '%s'\n", b, argv[1]); 
                } else {
                    fprintf(stderr, "invalid byte '%d' in '%s'\n", b, argv[1]);
                }
                return 1;  
            }
        }
        long long res = (long long) ures;
        if (neg) {
            if (ures <= 0x8000000000000000ULL) {
                res = -res;
            } else {
                fprintf(stderr, "underflow in '%s'\n", argv[1]);
                return 1;
            }
        } else if (ures > 0x7FFFFFFFFFFFFFFFULL) {
            fprintf(stderr, "overflow in '%s'\n", argv[1]);
            return 1;
        }
        fprintf(stdout, "result: %lld\n", res);
        return 0;
    }

Replies

wCxV8HzziQBbyesterday at 6:59 PM

The bound on ures <= 0x80[...] should be either ures < 0x80[...] or ures <= 0x7F[...]. Otherwise, parsing negative `0x8000000000000000` will run code to negate the signed integer INT64_MIN (-0x80[...]) to 0x80[...], which doesn't fit in an integer (INT*_MAX is 0x80[...]).

    $ clang parseint.c -fsanitize=undefined -O0 -g -o parseint
    $ ./parseint -9223372036854775808
    parseint.c:38:23: runtime error: negation of -9223372036854775808 cannot be represented in type 'long long'; cast to an unsigned type to negate this value to itself
    result: -9223372036854775808
edit: this is just to show that getting undefined behavior right is hard!