minishell/tokenizer/tokenizer.c
2025-06-04 14:58:04 +02:00

98 lines
2.9 KiB
C

/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: anarama <anarama@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/07/16 19:49:35 by anarama #+# #+# */
/* Updated: 2024/08/23 14:24:32 by vvobis ### ########.fr */
/* */
/* ************************************************************************** */
#include "../minishell.h"
#include <stdbool.h>
static t_token *initialise_tokens(uint32_t word_count)
{
t_token *tokens;
if (word_count == 0)
return (NULL);
tokens = ft_calloc(word_count + 1, sizeof(t_token));
lst_memory(tokens, free, ADD);
tokens[word_count].token_type = TOKEN_EOL;
return (tokens);
}
static t_token check_symbol_and_create_token(const char **input)
{
if (is_double_special(*input))
return (create_token_double_special_symbol((char **)input));
else if (is_single_special(**input))
return (create_token_single_special_symbol(input));
else if (**input)
return (create_token_word(input));
else
return ((t_token){0, 0});
}
static uint32_t get_word_count(char *input)
{
uint32_t i;
uint32_t word_count;
i = 0;
word_count = 0;
while (input[i])
{
while (ft_isspace(input[i]))
i++;
if (is_double_special(&input[i]))
i += 2;
else if (is_in_alphabet(input[i]))
i++;
else if (ft_isalnum(input[i]))
while (input[i] && !ft_isspace(input[i]) \
&& !is_double_special(&input[i]) && !is_special_char(input[i]))
i++;
else if (input[i])
i++;
if (input[i] == '\n' || input[i] == ';')
word_count++;
word_count++;
}
return (word_count);
}
t_token *tokens_setup(char *input)
{
if (!input || !*input || *input == '\n' || *input == ';')
return (NULL);
if (input[ft_strlen(input) - 1] == '\n')
input[ft_strlen(input) - 1] = '\0';
return (initialise_tokens(get_word_count((char *)input)));
}
t_token *lexical_analysis(char *input)
{
t_token *tokens;
uint32_t i;
t_ring_buffer heredoc_buffer;
tokens = tokens_setup(input);
if (!tokens)
return (NULL);
i = -1;
heredoc_buffer = (t_ring_buffer){0};
heredoc_buffer.buffer_capacity = BUFFER_CAPACITY;
while (*input)
{
tokenizing_loop(&input, tokens, &i, &heredoc_buffer);
tokens[++i] = check_symbol_and_create_token((const char **)&input);
if (tokens[i].token_type == TOKEN_HEREDOC)
ring_buffer_put(&heredoc_buffer, &tokens[i]);
}
return (tokens[++i].token_type = TOKEN_EOL, tokens);
}