adding atoi_superscript and isdigit_superscript

This commit is contained in:
hugogogo
2026-05-06 20:02:44 +02:00
parent b64ede50af
commit f93c635234
10 changed files with 269 additions and 82 deletions

View File

@@ -0,0 +1,55 @@
#include "libft.h"
/**
* Checks if the UTF-8 character at `input` is a superscript digit (², ³, ⁰-⁹).
* If it is, sets `*size` to the number of bytes in the character (2 or 3).
* Returns 1 if true, 0 otherwise.
*/
int ft_isdigit_superscript(const char *input, int *size)
{
if (size != NULL)
{
*size = 0; // Default to 0 if not a superscript digit
}
// 2-byte UTF-8 superscript digits: ² (U+00B2) and ³ (U+00B3) ---
// In UTF-8, 2-byte characters start with a byte in the range 0xC0-0xDF.
// For superscript ² and ³:
// - First byte: 0xC2 (binary: 11000010)
// - Second byte: 0xB2 (²) or 0xB3 (³) (binary: 10110010 or 10110011)
if ((uint8_t)*input == 0xC2) // Check if first byte is 0xC2 (start of 2-byte UTF-8)
{
// Check if the second byte matches ² (0xB2) or ³ (0xB3)
if ((uint8_t)*(input + 1) == 0xB2 || (uint8_t)*(input + 1) == 0xB3)
{
if (size != NULL)
{
*size = 2; // 2-byte character
}
return 1; // Valid superscript digit (² or ³)
}
}
// 3-byte UTF-8 superscript digits: ⁰ (U+2070) to ⁹ (U+2079) ---
// In UTF-8, 3-byte characters start with a byte in the range 0xE0-0xEF.
// For superscript ⁰-⁹:
// - First byte: 0xE2 (binary: 11100010)
// - Second byte: 0x81 (binary: 10000001)
// - Third byte: 0xB0 (⁰) to 0xB9 (⁹) (binary: 10110000 to 10111001)
else if ((uint8_t)*input == 0xE2) // Check if first byte is 0xE2 (start of 3-byte UTF-8)
{
// Check if the second byte is 0x81 (part of the 3-byte sequence for ⁰-⁹)
if ((uint8_t)*(input + 1) == 0x81)
{
// Check if the third byte is in the range 0xB0-0xB9 (⁰ to ⁹)
if ((uint8_t)*(input + 2) >= 0xB0 && (uint8_t)*(input + 2) <= 0xB9)
{
if (size != NULL)
{
*size = 3; // 3-byte character
}
return 1; // Valid superscript digit (⁰-⁹)
}
}
}
return 0;
}