diff --git a/Makefile b/Makefile index e5fd67b..97c5179 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ SRCS = ft_memset.c \ \ ft_isalpha.c \ ft_isdigit.c \ + ft_isdigit_superscript.c \ ft_isalnum.c \ ft_isnumber.c \ ft_isascii.c \ @@ -67,7 +68,11 @@ SRCS = ft_memset.c \ ft_split.c \ \ ft_atoi.c \ + ft_atoi_superscript.c \ ft_atol.c \ + ft_atol_superscript.c \ + ft_atoll.c \ + ft_atoll_superscript.c \ ft_atof.c \ ft_itoa.c \ ft_utoa.c \ diff --git a/includes/libft.h b/includes/libft.h index b7f35a6..7946cd0 100644 --- a/includes/libft.h +++ b/includes/libft.h @@ -14,6 +14,8 @@ #define LIBFT_H #include // for write #include // for malloc and free +#include // for uint8_t for utf8 characters +#include // for INT_MAX, LONG_MAX, etc #include "ft_gnl.h" #include "ft_printf.h" @@ -27,6 +29,7 @@ int ft_memcmp(const void *s1, const void *s2, size_t n); size_t ft_strlen(const char *str); int ft_isalpha(int c); int ft_isdigit(int c); +int ft_isdigit_superscript(const char *input, int *size); int ft_isnumber(char *nb); int ft_isalnum(int c); int ft_isascii(int c); @@ -42,7 +45,11 @@ size_t ft_strlcpy(char *dst, const char *src, size_t size); size_t ft_strlcat(char *dst, const char *src, size_t size); char *ft_strnstr(const char *b, const char *l, size_t s); int ft_atoi(const char *str); +int ft_atoi_superscript(const char *str); long ft_atol(const char *str); +long ft_atol_superscript(const char *str); +long long ft_atoll(const char *str); +long long ft_atoll_superscript(const char *str); double ft_atof(const char *str); void *ft_calloc(size_t count, size_t size); char *ft_strdup(const char *s1); @@ -65,9 +72,6 @@ void ft_putstr_fd(const char *s, int fd); void ft_putnbr_fd(int n, int fd); void ft_putnbrbase_fd(int nbr, const char *base, int fd); -// void ft_putendl(char const *str); -// void ft_putendl_fd(char *s, int fd); - typedef struct s_list { void *content; @@ -75,19 +79,6 @@ typedef struct s_list struct s_list *next; } t_list; -// t_list *ft_lstnew(void *content); -// void ft_lstadd_front(t_list **alst, t_list *n); -// void *ft_lstadd_back(t_list **alst, t_list *n); -// int ft_lstsize(t_list *lst); -// t_list *ft_lstlast(t_list *lst); -// void ft_lstdelone(t_list *lst, void (*del)(void *)); -// void ft_lstclear(t_list **lst, void (*del)(void *)); -// void ft_lstiter(t_list *lst, void (*f)(void *)); -// t_list *ft_lstmap(t_list *l, void *(*f)(void *), void (*d)(void *)); - -// void ft_lstremove(t_list *lst, void (*del)(void *)); -// t_list *ft_lstfind(t_list *lst, void *to_find, int (*comp)(void *, void *)); - t_list *ft_lstcreate(void *content); void *ft_lstpush_back(t_list **lst, t_list *new); void ft_lstpush_front(t_list **alst, t_list *new); diff --git a/srcs/ft_atoi.c b/srcs/ft_atoi.c index 4c74378..385285e 100644 --- a/srcs/ft_atoi.c +++ b/srcs/ft_atoi.c @@ -14,27 +14,13 @@ int ft_atoi(const char *str) { - long nbr; - int i; - int negatif; + long long result = ft_atoll(str); - i = 0; - negatif = 1; - nbr = 0; + // clamp to int range + if (result > INT_MAX) + return INT_MAX; + if (result < INT_MIN) + return INT_MIN; - // Skip leading whitespace - while ((str[i] == ' ') || (str[i] > 8 && str[i] < 14)) - i++; - - // Handle optional sign - if (str[i] == '-') - negatif = -1; - if (str[i] == '+' || str[i] == '-') - i++; - - // parse integer - while (str[i] >= '0' && str[i] <= '9') - nbr = nbr * 10 + (str[i++] - '0'); - - return (nbr * negatif); + return (int)result; } diff --git a/srcs/ft_atoi_superscript.c b/srcs/ft_atoi_superscript.c new file mode 100644 index 0000000..e90c888 --- /dev/null +++ b/srcs/ft_atoi_superscript.c @@ -0,0 +1,16 @@ + +#include "libft.h" + +/** + * Parses a string of superscript digits into an int. + * Returns the integer value, or INT_MAX/INT_MIN on overflow. + */ +int ft_atoi_superscript(const char *str) +{ + long long result = ft_atoll_superscript(str); + if (result > INT_MAX) + return INT_MAX; + if (result < INT_MIN) + return INT_MIN; + return (int)result; +} \ No newline at end of file diff --git a/srcs/ft_atol.c b/srcs/ft_atol.c index 817d26c..d118aea 100644 --- a/srcs/ft_atol.c +++ b/srcs/ft_atol.c @@ -3,27 +3,13 @@ long ft_atol(const char *str) { - long long nbr; - int i; - int negatif; + long long result = ft_atoll(str); - i = 0; - negatif = 1; - nbr = 0; + // clamp to long range + if (result > LONG_MAX) + return LONG_MAX; + if (result < LONG_MIN) + return LONG_MIN; - // Skip leading whitespace - while ((str[i] == ' ') || (str[i] > 8 && str[i] < 14)) - i++; - - // Handle optional sign - if (str[i] == '-') - negatif = -1; - if (str[i] == '+' || str[i] == '-') - i++; - - // parse integer - while (str[i] >= '0' && str[i] <= '9') - nbr = nbr * 10 + (str[i++] - '0'); - - return (nbr * negatif); + return (long)result; } diff --git a/srcs/ft_atol_superscript.c b/srcs/ft_atol_superscript.c new file mode 100644 index 0000000..7528a50 --- /dev/null +++ b/srcs/ft_atol_superscript.c @@ -0,0 +1,16 @@ + +#include "libft.h" + +/** + * Parses a string of superscript digits into a long. + * Returns the integer value, or LONG_MAX/LONG_MIN on overflow. + */ +long ft_atol_superscript(const char *str) +{ + long long result = ft_atoll_superscript(str); + if (result > LONG_MAX) + return LONG_MAX; + if (result < LONG_MIN) + return LONG_MIN; + return (long)result; +} \ No newline at end of file diff --git a/srcs/ft_atoll.c b/srcs/ft_atoll.c new file mode 100644 index 0000000..6457465 --- /dev/null +++ b/srcs/ft_atoll.c @@ -0,0 +1,39 @@ + +#include "libft.h" + +long long ft_atoll(const char *str) +{ + long long nbr; + int i; + int sign; + + i = 0; + sign = 1; + nbr = 0; + + // skip leading whitespace + while (ft_isspace(str[i])) + i++; + + // handle optional sign + if (str[i] == '-') + sign = -1; + if (str[i] == '+' || str[i] == '-') + i++; + + // parse digits with overflow check for `long long` + while (str[i] >= '0' && str[i] <= '9') + { + // check if multiplying by 10 would overflow + if (nbr > LLONG_MAX / 10) + return (sign > 0) ? LLONG_MAX : LLONG_MIN; + // check if adding the next digit would overflow + if (nbr == LLONG_MAX / 10 && (str[i] - '0') > LLONG_MAX % 10) + return (sign > 0) ? LLONG_MAX : LLONG_MIN; + + nbr = nbr * 10 + (str[i] - '0'); + i++; + } + + return (nbr * sign); +} diff --git a/srcs/ft_atoll_superscript.c b/srcs/ft_atoll_superscript.c new file mode 100644 index 0000000..ce44f0b --- /dev/null +++ b/srcs/ft_atoll_superscript.c @@ -0,0 +1,65 @@ + +#include "libft.h" + +/** + * Parses a string of superscript digits (e.g., "²³⁴") into a long long. + * Returns the integer value, or LLONG_MAX/LLONG_MIN on overflow. + */ +long long ft_atoll_superscript(const char *str) +{ + long long nbr = 0; + int i = 0; + int negatif = 1; + int superscript_size; + + // Skip leading whitespace + while (ft_isspace(str[i])) + i++; + + // Handle optional sign + if (str[i] == '-') + negatif = -1; + if (str[i] == '+' || str[i] == '-') + i++; + + // Parse superscript digits + while (1) + { + if (!ft_isdigit_superscript(&str[i], &superscript_size)) + break; // Not a superscript digit + + // Extract the digit value (0-9) from the superscript character + int digit; + if (superscript_size == 2) + { + // ² (U+00B2) or ³ (U+00B3) + if ((uint8_t)str[i + 1] == 0xB2) + digit = 2; + else if ((uint8_t)str[i + 1] == 0xB3) + digit = 3; + else + break; // Invalid, should not happen + } + else if (superscript_size == 3) + { + // ⁰ (U+2070) to ⁹ (U+2079) + digit = (uint8_t)str[i + 2] - 0xB0; // 0xB0 → 0, 0xB1 → 1, ..., 0xB9 → 9 + } + else + { + break; // Invalid, should not happen + } + + // Check for overflow + if (nbr > LLONG_MAX / 10 || + (nbr == LLONG_MAX / 10 && digit > LLONG_MAX % 10)) + { + return (negatif > 0) ? LLONG_MAX : LLONG_MIN; + } + + nbr = nbr * 10 + digit; + i += superscript_size; // Skip the entire UTF-8 character + } + + return (nbr * negatif); +} \ No newline at end of file diff --git a/srcs/ft_isdigit_superscript.c b/srcs/ft_isdigit_superscript.c new file mode 100644 index 0000000..857f360 --- /dev/null +++ b/srcs/ft_isdigit_superscript.c @@ -0,0 +1,55 @@ +#include "libft.h" + +/** + * Checks if the UTF-8 character at `input` is a superscript digit (², ³, ⁰-⁹). + * If it is, sets `*size` to the number of bytes in the character (2 or 3). + * Returns 1 if true, 0 otherwise. + */ +int ft_isdigit_superscript(const char *input, int *size) +{ + if (size != NULL) + { + *size = 0; // Default to 0 if not a superscript digit + } + + // 2-byte UTF-8 superscript digits: ² (U+00B2) and ³ (U+00B3) --- + // In UTF-8, 2-byte characters start with a byte in the range 0xC0-0xDF. + // For superscript ² and ³: + // - First byte: 0xC2 (binary: 11000010) + // - Second byte: 0xB2 (²) or 0xB3 (³) (binary: 10110010 or 10110011) + if ((uint8_t)*input == 0xC2) // Check if first byte is 0xC2 (start of 2-byte UTF-8) + { + // Check if the second byte matches ² (0xB2) or ³ (0xB3) + if ((uint8_t)*(input + 1) == 0xB2 || (uint8_t)*(input + 1) == 0xB3) + { + if (size != NULL) + { + *size = 2; // 2-byte character + } + return 1; // Valid superscript digit (² or ³) + } + } + // 3-byte UTF-8 superscript digits: ⁰ (U+2070) to ⁹ (U+2079) --- + // In UTF-8, 3-byte characters start with a byte in the range 0xE0-0xEF. + // For superscript ⁰-⁹: + // - First byte: 0xE2 (binary: 11100010) + // - Second byte: 0x81 (binary: 10000001) + // - Third byte: 0xB0 (⁰) to 0xB9 (⁹) (binary: 10110000 to 10111001) + else if ((uint8_t)*input == 0xE2) // Check if first byte is 0xE2 (start of 3-byte UTF-8) + { + // Check if the second byte is 0x81 (part of the 3-byte sequence for ⁰-⁹) + if ((uint8_t)*(input + 1) == 0x81) + { + // Check if the third byte is in the range 0xB0-0xB9 (⁰ to ⁹) + if ((uint8_t)*(input + 2) >= 0xB0 && (uint8_t)*(input + 2) <= 0xB9) + { + if (size != NULL) + { + *size = 3; // 3-byte character + } + return 1; // Valid superscript digit (⁰-⁹) + } + } + } + return 0; +} \ No newline at end of file diff --git a/testing/srcs/test_atoi.c b/testing/srcs/test_atoi.c index 01660a9..120065d 100644 --- a/testing/srcs/test_atoi.c +++ b/testing/srcs/test_atoi.c @@ -12,28 +12,56 @@ #include "libft.h" -int ft_atoi(const char *str) -{ - long long nbr; - int i; - int n; +// int ft_atoi(const char *str) +// { +// long long nbr; // Use `long long` to detect overflow before it happens in `int` +// int i; // Current position in the string +// int n; // Sign multiplier: 1 (positive) or -1 (negative) - i = 0; - n = 1; - nbr = 0; - while ((str[i] == ' ') || (str[i] > 8 && str[i] < 14)) - i++; - if (str[i] == '-') - n = -1; - if (str[i] == '+' || str[i] == '-') - i++; - while (str[i] >= '0' && str[i] <= '9') - { - if ((nbr >= 922337203685477580 - && ((str[i] > 8 && n < 0) || (str[i] > 7 && n > 0)))) - return ((n > 0) ? -1 : 0); - else - nbr = nbr * 10 + (str[i++] - '0'); - } - return (nbr * n); -} +// i = 0; +// n = 1; +// nbr = 0; + +// // skip leading whitespace +// while (ft_isspace(str[i])) +// i++; + +// // handle optional sign +// if (str[i] == '-') +// n = -1; +// if (str[i] == '+' || str[i] == '-') +// i++; + +// // convert digits to integer +// while (str[i] >= '0' && str[i] <= '9') +// { +// // --- Overflow Check --- +// // LLONG_MAX = 9223372036854775807 (maximum value for a 64-bit signed integer) +// // If `nbr` is already >= 922337203685477580 (LLONG_MAX / 10), +// // then multiplying by 10 and adding another digit could overflow. +// // +// // Cases: +// // - For positive numbers: If `nbr >= 922337203685477580` and the next digit > 7, +// // then `nbr * 10 + digit` would exceed LLONG_MAX (e.g., 9223372036854775808). +// // - For negative numbers: If `nbr >= 922337203685477580` and the next digit > 8, +// // then `nbr * 10 + digit` would exceed LLONG_MIN (e.g., -9223372036854775809). +// // +// // Why 7 and 8? +// // - LLONG_MAX = 9223372036854775807 → Last digit is 7. +// // So, if `nbr = 922337203685477580` and the next digit is > 7, overflow occurs. +// // - LLONG_MIN = -9223372036854775808 → Last digit is 8. +// // So, if `nbr = 922337203685477580` and the next digit is > 8, underflow occurs. +// if ((nbr >= 922337203685477580 && ((str[i] > 8 && n < 0) || (str[i] > 7 && n > 0)))) +// { +// // Overflow: Return -1 for positive, 0 for negative (mimics INT_MAX/INT_MIN behavior) +// return ((n > 0) ? -1 : 0); +// } + +// // convert digit to integer : `str[i] - '0'` converts the ASCII character to its numeric value (e.g., '5' → 5) +// nbr = nbr * 10 + (str[i] - '0'); +// i++; +// } + +// // apply sign and return +// return (nbr * n); +// }