From aa35d294f2ff1bb28cc6c20e89ed2e0deedf11e1 Mon Sep 17 00:00:00 2001 From: hugogogo Date: Thu, 7 May 2026 14:29:36 +0200 Subject: [PATCH] fix isdigit_superscript --- srcs/ft_isdigit_superscript.c | 63 +++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/srcs/ft_isdigit_superscript.c b/srcs/ft_isdigit_superscript.c index 857f360..270ed0a 100644 --- a/srcs/ft_isdigit_superscript.c +++ b/srcs/ft_isdigit_superscript.c @@ -1,7 +1,28 @@ #include "libft.h" +// --- UTF-8 byte sequence macros for 2-byte superscript digits --- +#define TWO_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1 0xC2 + +// Superscript digits: ¹, ², ³ +#define SUPERSCRIPT_1_BYTE2 0xB9 +#define SUPERSCRIPT_2_BYTE2 0xB2 +#define SUPERSCRIPT_3_BYTE2 0xB3 + +// --- UTF-8 byte sequence macros for 3-byte superscript digits --- +#define THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1 0xE2 +#define THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE2 0x81 + +// Superscript digits: ⁰, ⁴, ⁵, ⁶, ⁷, ⁸, ⁹ +#define SUPERSCRIPT_0_BYTE3 0xB0 +#define SUPERSCRIPT_4_BYTE3 0xB4 +#define SUPERSCRIPT_5_BYTE3 0xB5 +#define SUPERSCRIPT_6_BYTE3 0xB6 +#define SUPERSCRIPT_7_BYTE3 0xB7 +#define SUPERSCRIPT_8_BYTE3 0xB8 +#define SUPERSCRIPT_9_BYTE3 0xB9 + /** - * Checks if the UTF-8 character at `input` is a superscript digit (², ³, ⁰-⁹). + * Checks if the UTF-8 character at `input` is a superscript digit (¹, ², ³, ⁰-⁹). * If it is, sets `*size` to the number of bytes in the character (2 or 3). * Returns 1 if true, 0 otherwise. */ @@ -12,42 +33,40 @@ int ft_isdigit_superscript(const char *input, int *size) *size = 0; // Default to 0 if not a superscript digit } - // 2-byte UTF-8 superscript digits: ² (U+00B2) and ³ (U+00B3) --- - // In UTF-8, 2-byte characters start with a byte in the range 0xC0-0xDF. - // For superscript ² and ³: - // - First byte: 0xC2 (binary: 11000010) - // - Second byte: 0xB2 (²) or 0xB3 (³) (binary: 10110010 or 10110011) - if ((uint8_t)*input == 0xC2) // Check if first byte is 0xC2 (start of 2-byte UTF-8) + // --- Check for 2-byte superscript digits (¹, ², ³) --- + if ((uint8_t)*input == TWO_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1) { - // Check if the second byte matches ² (0xB2) or ³ (0xB3) - if ((uint8_t)*(input + 1) == 0xB2 || (uint8_t)*(input + 1) == 0xB3) + uint8_t second_byte = (uint8_t)*(input + 1); + if (second_byte == SUPERSCRIPT_1_BYTE2 || + second_byte == SUPERSCRIPT_2_BYTE2 || + second_byte == SUPERSCRIPT_3_BYTE2) { if (size != NULL) { *size = 2; // 2-byte character } - return 1; // Valid superscript digit (² or ³) + return 1; // Valid superscript digit (¹, ², or ³) } } - // 3-byte UTF-8 superscript digits: ⁰ (U+2070) to ⁹ (U+2079) --- - // In UTF-8, 3-byte characters start with a byte in the range 0xE0-0xEF. - // For superscript ⁰-⁹: - // - First byte: 0xE2 (binary: 11100010) - // - Second byte: 0x81 (binary: 10000001) - // - Third byte: 0xB0 (⁰) to 0xB9 (⁹) (binary: 10110000 to 10111001) - else if ((uint8_t)*input == 0xE2) // Check if first byte is 0xE2 (start of 3-byte UTF-8) + // --- Check for 3-byte superscript digits (⁰, ⁴-⁹) --- + else if ((uint8_t)*input == THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1) { - // Check if the second byte is 0x81 (part of the 3-byte sequence for ⁰-⁹) - if ((uint8_t)*(input + 1) == 0x81) + if ((uint8_t)*(input + 1) == THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE2) { - // Check if the third byte is in the range 0xB0-0xB9 (⁰ to ⁹) - if ((uint8_t)*(input + 2) >= 0xB0 && (uint8_t)*(input + 2) <= 0xB9) + uint8_t third_byte = (uint8_t)*(input + 2); + if (third_byte == SUPERSCRIPT_0_BYTE3 || + third_byte == SUPERSCRIPT_4_BYTE3 || + third_byte == SUPERSCRIPT_5_BYTE3 || + third_byte == SUPERSCRIPT_6_BYTE3 || + third_byte == SUPERSCRIPT_7_BYTE3 || + third_byte == SUPERSCRIPT_8_BYTE3 || + third_byte == SUPERSCRIPT_9_BYTE3) { if (size != NULL) { *size = 3; // 3-byte character } - return 1; // Valid superscript digit (⁰-⁹) + return 1; // Valid superscript digit (⁰, ⁴-⁹) } } }