Compare commits

..

1 Commits

Author SHA1 Message Date
hugogogo
aa35d294f2 fix isdigit_superscript 2026-05-07 14:29:36 +02:00

View File

@@ -1,7 +1,28 @@
#include "libft.h" #include "libft.h"
// --- UTF-8 byte sequence macros for 2-byte superscript digits ---
#define TWO_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1 0xC2
// Superscript digits: ¹, ², ³
#define SUPERSCRIPT_1_BYTE2 0xB9
#define SUPERSCRIPT_2_BYTE2 0xB2
#define SUPERSCRIPT_3_BYTE2 0xB3
// --- UTF-8 byte sequence macros for 3-byte superscript digits ---
#define THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1 0xE2
#define THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE2 0x81
// Superscript digits: ⁰, ⁴, ⁵, ⁶, ⁷, ⁸, ⁹
#define SUPERSCRIPT_0_BYTE3 0xB0
#define SUPERSCRIPT_4_BYTE3 0xB4
#define SUPERSCRIPT_5_BYTE3 0xB5
#define SUPERSCRIPT_6_BYTE3 0xB6
#define SUPERSCRIPT_7_BYTE3 0xB7
#define SUPERSCRIPT_8_BYTE3 0xB8
#define SUPERSCRIPT_9_BYTE3 0xB9
/** /**
* Checks if the UTF-8 character at `input` is a superscript digit (², ³, ⁰-⁹). * Checks if the UTF-8 character at `input` is a superscript digit (¹, ², ³, ⁰-⁹).
* If it is, sets `*size` to the number of bytes in the character (2 or 3). * If it is, sets `*size` to the number of bytes in the character (2 or 3).
* Returns 1 if true, 0 otherwise. * Returns 1 if true, 0 otherwise.
*/ */
@@ -12,42 +33,40 @@ int ft_isdigit_superscript(const char *input, int *size)
*size = 0; // Default to 0 if not a superscript digit *size = 0; // Default to 0 if not a superscript digit
} }
// 2-byte UTF-8 superscript digits: ² (U+00B2) and ³ (U+00B3) --- // --- Check for 2-byte superscript digits (¹, ², ³) ---
// In UTF-8, 2-byte characters start with a byte in the range 0xC0-0xDF. if ((uint8_t)*input == TWO_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1)
// For superscript ² and ³:
// - First byte: 0xC2 (binary: 11000010)
// - Second byte: 0xB2 (²) or 0xB3 (³) (binary: 10110010 or 10110011)
if ((uint8_t)*input == 0xC2) // Check if first byte is 0xC2 (start of 2-byte UTF-8)
{ {
// Check if the second byte matches ² (0xB2) or ³ (0xB3) uint8_t second_byte = (uint8_t)*(input + 1);
if ((uint8_t)*(input + 1) == 0xB2 || (uint8_t)*(input + 1) == 0xB3) if (second_byte == SUPERSCRIPT_1_BYTE2 ||
second_byte == SUPERSCRIPT_2_BYTE2 ||
second_byte == SUPERSCRIPT_3_BYTE2)
{ {
if (size != NULL) if (size != NULL)
{ {
*size = 2; // 2-byte character *size = 2; // 2-byte character
} }
return 1; // Valid superscript digit (² or ³) return 1; // Valid superscript digit (¹, ², or ³)
} }
} }
// 3-byte UTF-8 superscript digits: ⁰ (U+2070) to ⁹ (U+2079) --- // --- Check for 3-byte superscript digits (⁰, ⁴-⁹) ---
// In UTF-8, 3-byte characters start with a byte in the range 0xE0-0xEF. else if ((uint8_t)*input == THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE1)
// For superscript ⁰-⁹:
// - First byte: 0xE2 (binary: 11100010)
// - Second byte: 0x81 (binary: 10000001)
// - Third byte: 0xB0 (⁰) to 0xB9 (⁹) (binary: 10110000 to 10111001)
else if ((uint8_t)*input == 0xE2) // Check if first byte is 0xE2 (start of 3-byte UTF-8)
{ {
// Check if the second byte is 0x81 (part of the 3-byte sequence for ⁰-⁹) if ((uint8_t)*(input + 1) == THREE_BYTE_UTF8_SUPERSCRIPT_DIGIT_BYTE2)
if ((uint8_t)*(input + 1) == 0x81)
{ {
// Check if the third byte is in the range 0xB0-0xB9 (⁰ to ⁹) uint8_t third_byte = (uint8_t)*(input + 2);
if ((uint8_t)*(input + 2) >= 0xB0 && (uint8_t)*(input + 2) <= 0xB9) if (third_byte == SUPERSCRIPT_0_BYTE3 ||
third_byte == SUPERSCRIPT_4_BYTE3 ||
third_byte == SUPERSCRIPT_5_BYTE3 ||
third_byte == SUPERSCRIPT_6_BYTE3 ||
third_byte == SUPERSCRIPT_7_BYTE3 ||
third_byte == SUPERSCRIPT_8_BYTE3 ||
third_byte == SUPERSCRIPT_9_BYTE3)
{ {
if (size != NULL) if (size != NULL)
{ {
*size = 3; // 3-byte character *size = 3; // 3-byte character
} }
return 1; // Valid superscript digit (⁰-⁹) return 1; // Valid superscript digit (⁰, ⁴-⁹)
} }
} }
} }