failing to impl stringbuilder

This commit is contained in:
Victor Vobis 2025-05-13 18:24:52 +02:00
parent 1df58ad7c3
commit 30fb9231d8
10 changed files with 360 additions and 49 deletions

197
Makefile
View File

@ -1,104 +1,243 @@
# Basic configuration
TARGET := langc TARGET := langc
AFLAGS = -felf64 -F dwarf -g AFLAGS = -felf64 -F dwarf -g
ifdef DEBUG ifdef DEBUG
AFLAGS += -DDEBUG_BUILD AFLAGS += -DDEBUG_BUILD
endif endif
# Directory structure
SRCDIR := src SRCDIR := src
OBJDIR := obj
LIBDIR := lib
# Core directories
COREDIR := $(SRCDIR)/core COREDIR := $(SRCDIR)/core
MATHDIR := $(COREDIR)/math MATHDIR := $(COREDIR)/math
VECDIR := $(COREDIR)/vector VECDIR := $(COREDIR)/vector
STRDIR := $(COREDIR)/string STRDIR := $(COREDIR)/string
SBDIR := $(COREDIR)/string_builder
PRINTDIR := $(COREDIR)/print PRINTDIR := $(COREDIR)/print
MEMDIR := $(COREDIR)/mem MEMDIR := $(COREDIR)/mem
SYSCALLDIR := $(COREDIR)/syscall SYSCALLDIR := $(COREDIR)/syscall
FILEDIR := $(COREDIR)/file FILEDIR := $(COREDIR)/file
# Parser directories
PARSEDIR := $(SRCDIR)/parse PARSEDIR := $(SRCDIR)/parse
EXPRDIR := $(PARSEDIR)/expression EXPRDIR := $(PARSEDIR)/expression
TOKDIR := $(PARSEDIR)/token TOKDIR := $(PARSEDIR)/token
# Lexer directories
LEXDIR := $(SRCDIR)/lexer LEXDIR := $(SRCDIR)/lexer
VARDIR := $(LEXDIR)/vars VARDIR := $(LEXDIR)/vars
# Other directories
GLOBALDIR := $(SRCDIR)/global GLOBALDIR := $(SRCDIR)/global
# Define source files # Object file directories (mirroring source structure)
OBJCOREDIR := $(OBJDIR)/core
OBJMATHDIR := $(OBJCOREDIR)/math
OBJVECDIR := $(OBJCOREDIR)/vector
OBJSTRDIR := $(OBJCOREDIR)/string
OBJSBDIR := $(OBJCOREDIR)/string_builder
OBJPRINTDIR := $(OBJCOREDIR)/print
OBJMEMDIR := $(OBJCOREDIR)/mem
OBJSYSCALLDIR := $(OBJCOREDIR)/syscall
OBJFILEDIR := $(OBJCOREDIR)/file
OBJPARSEDIR := $(OBJDIR)/parse
OBJEXPRDIR := $(OBJPARSEDIR)/expression
OBJTOKDIR := $(OBJPARSEDIR)/token
OBJLEXDIR := $(OBJDIR)/lexer
OBJVARDIR := $(OBJLEXDIR)/vars
OBJGLOBALDIR := $(OBJDIR)/global
# All object directories in dependency order
OBJDIRS := $(OBJDIR) \
$(OBJCOREDIR) \
$(OBJMATHDIR) \
$(OBJVECDIR) \
$(OBJSTRDIR) \
$(OBJSBDIR) \
$(OBJPRINTDIR) \
$(OBJMEMDIR) \
$(OBJSYSCALLDIR) \
$(OBJFILEDIR) \
$(OBJPARSEDIR) \
$(OBJEXPRDIR) \
$(OBJTOKDIR) \
$(OBJLEXDIR) \
$(OBJVARDIR) \
$(OBJGLOBALDIR)
# Source file definitions by module
MATHSRC := $(addprefix $(MATHDIR)/, $(addsuffix .s, \ MATHSRC := $(addprefix $(MATHDIR)/, $(addsuffix .s, \
operators \ operators \
)) ))
STRSRC := $(addprefix $(STRDIR)/, $(addsuffix .s, \ STRSRC := $(addprefix $(STRDIR)/, $(addsuffix .s, \
strlen split strcpy substr is_num strcmp is_alpha \ strlen split strcpy substr is_num strcmp is_alpha \
)) ))
MEMSRC := $(addprefix $(MEMDIR)/, $(addsuffix .s, \ MEMSRC := $(addprefix $(MEMDIR)/, $(addsuffix .s, \
malloc memchr memcpy memset \ malloc memchr memcpy memset \
)) ))
VECSRC := $(addprefix $(VECDIR)/, $(addsuffix .s, \ VECSRC := $(addprefix $(VECDIR)/, $(addsuffix .s, \
vec_create vec_push vec_get vec_pop \ vec_create vec_push vec_get vec_pop \
)) ))
PRINTSRC := $(addprefix $(PRINTDIR)/, $(addsuffix .s, \ PRINTSRC := $(addprefix $(PRINTDIR)/, $(addsuffix .s, \
print putnumber \ print putnumber \
)) ))
FILESRC := $(addprefix $(FILEDIR)/, $(addsuffix .s, \ FILESRC := $(addprefix $(FILEDIR)/, $(addsuffix .s, \
read_file get_file_content \ read_file get_file_content \
)) ))
SYSCALLSRC := $(addprefix $(SYSCALLDIR)/, $(addsuffix .s, \ SYSCALLSRC := $(addprefix $(SYSCALLDIR)/, $(addsuffix .s, \
exit file_ops syscall_err\ exit file_ops syscall_err fork\
)) ))
TOKSRC := $(addprefix $(TOKDIR)/, $(addsuffix .s, \ TOKSRC := $(addprefix $(TOKDIR)/, $(addsuffix .s, \
parse_tokens debug_token \ parse_tokens debug_token \
)) ))
EXPRSRC := $(addprefix $(EXPRDIR)/, $(addsuffix .s, \ EXPRSRC := $(addprefix $(EXPRDIR)/, $(addsuffix .s, \
create_expressions debug_expression \ create_expressions debug_expression \
)) ))
LEXSRC := $(addprefix $(LEXDIR)/, $(addsuffix .s, \ LEXSRC := $(addprefix $(LEXDIR)/, $(addsuffix .s, \
lexer lex_err lex_load lex_func program_prologue \ lexer lex_err lex_load lex_func program_prologue \
func_boiler_plate \ func_boiler_plate \
)) ))
VARSRC := $(addprefix $(VARDIR)/, $(addsuffix .s, \ VARSRC := $(addprefix $(VARDIR)/, $(addsuffix .s, \
get_vars insert_var \ get_vars insert_var \
)) ))
GLOBALSRC := $(addprefix $(GLOBALDIR)/, $(addsuffix .s, \ GLOBALSRC := $(addprefix $(GLOBALDIR)/, $(addsuffix .s, \
function_table regs \ function_table regs \
)) ))
# Collect all source files SBSRC := $(addprefix $(SBDIR)/, $(addsuffix .s, \
SRC := $(SRCDIR)/start.s $(MATHSRC) $(STRSRC) $(PRINTSRC) $(FILESRC) $(VARSRC) $(SYSCALLSRC) $(MEMSRC) $(TOKSRC) $(EXPRSRC) $(LEXSRC) $(GLOBALSRC) $(VECSRC) string_builder\
))
# Fix: Preserve directory structure in object files # Collect all sources and objects
OBJDIR := obj MAIN_SRC := $(SRCDIR)/start.s
OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SRC)) ALL_SRC := $(MAIN_SRC) $(MATHSRC) $(STRSRC) $(SBSRC) $(PRINTSRC) $(FILESRC) $(VARSRC) $(SYSCALLSRC) $(MEMSRC) $(TOKSRC) $(EXPRSRC) $(LEXSRC) $(GLOBALSRC) $(VECSRC)
LIBOBJ := $(filter-out start.o, $(OBJ)) # Generate object file paths
LIBNAME := core.lang ALL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(ALL_SRC))
all: $(TARGET) # Library settings
LIBNAME := $(LIBDIR)/core.a
LIB_OBJ := $(filter-out $(OBJDIR)/start.o, $(ALL_OBJ))
lib: $(OBJ) # Module-specific object files for staged compilation
ar rcs $(LIBNAME) $(LIBOBJ) MATH_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MATHSRC))
STR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(STRSRC))
MEM_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MEMSRC))
VEC_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(VECSRC))
PRINT_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(PRINTSRC))
FILE_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(FILESRC))
SYSCALL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SYSCALLSRC))
TOK_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(TOKSRC))
EXPR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(EXPRSRC))
LEX_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(LEXSRC))
VAR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(VARSRC))
GLOBAL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(GLOBALSRC))
SB_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SBSRC))
MAIN_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MAIN_SRC))
# Create output directories # Main targets
$(OBJDIR)/core/math $(OBJDIR)/core/string $(OBJDIR)/core/print $(OBJDIR)/core/mem $(OBJDIR)/core/syscall $(OBJDIR)/core/file $(OBJDIR)/parse/expression $(OBJDIR)/parse/token $(OBJDIR)/lexer $(OBJDIR)/lexer/vars $(OBJDIR)/global: all: prepare-build build-core build-parser build-lexer build-global build-main link-executable create-library
# Stage 1: Prepare build environment
prepare-build: create-directories
create-directories: | $(OBJDIRS) $(LIBDIR)
$(OBJDIRS):
mkdir -p $@ mkdir -p $@
# Main target $(LIBDIR):
$(TARGET): $(OBJDIR) $(OBJDIR)/core/math $(OBJDIR)/core/string $(OBJDIR)/core/print $(OBJDIR)/core/mem $(OBJDIR)/core/syscall $(OBJDIR)/core/file $(OBJDIR)/parse/expression $(OBJDIR)/parse/token $(OBJDIR)/lexer $(OBJDIR)/lexer/vars $(OBJDIR)/global $(OBJ) mkdir -p $@
ld -g -o $@ $(OBJ) -nostdlib -static
# Fix: Use a more specific pattern rule that preserves paths # Stage 2: Build core modules
$(OBJDIR)/%.o: $(SRCDIR)/%.s build-core: build-math build-string build-memory build-vector build-print build-file build-syscall build-string-builder
mkdir -p $(dir $@)
build-math: $(MATH_OBJ)
build-string: $(STR_OBJ)
build-memory: $(MEM_OBJ)
build-vector: $(VEC_OBJ)
build-print: $(PRINT_OBJ)
build-file: $(FILE_OBJ)
build-syscall: $(SYSCALL_OBJ)
build-string-builder: $(SB_OBJ)
# Stage 3: Build parser modules
build-parser: build-tokens build-expressions
build-tokens: $(TOK_OBJ)
build-expressions: $(EXPR_OBJ)
# Stage 4: Build lexer modules
build-lexer: build-lex-core build-lex-vars
build-lex-core: $(LEX_OBJ)
build-lex-vars: $(VAR_OBJ)
# Stage 5: Build global modules
build-global: $(GLOBAL_OBJ)
# Stage 6: Build main entry point
build-main: $(MAIN_OBJ)
# Stage 7: Link executable
link-executable: $(TARGET)
$(TARGET): $(ALL_OBJ)
ld -g -o $@ $(ALL_OBJ) -nostdlib -static
# Stage 8: Create library
create-library: $(LIBNAME)
$(LIBNAME): $(LIB_OBJ) | $(LIBDIR)
ar rcs $@ $(LIB_OBJ)
# Individual file compilation rule
$(OBJDIR)/%.o: $(SRCDIR)/%.s | $(OBJDIRS)
nasm $(AFLAGS) $< -o $@ nasm $(AFLAGS) $< -o $@
$(OBJDIR): # Utility targets
mkdir -p $@
clean: clean:
rm -rf $(OBJDIR) rm -rf $(OBJDIR)
fclean: clean clean-library:
rm -rf $(TARGET) rm -f $(LIBNAME)
re: clean all clean-executable:
rm -f $(TARGET)
fclean: clean clean-library clean-executable
re: fclean all
# Debug target to show what would be built
show-config:
@echo "Target: $(TARGET)"
@echo "Flags: $(AFLAGS)"
@echo "Source files: $(words $(ALL_SRC)) files"
@echo "Object files: $(words $(ALL_OBJ)) files"
@echo "Object directories: $(OBJDIRS)"
.PHONY: all clean re

BIN
langc Executable file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,77 @@
%include "./src/core/string_builder/sb.s"
section .text
extern strlen
extern strcpy
extern malloc
extern err_malloc
%define SB [rbp - 16]
%define SB_LEN [rbp - 4]
%define SB_CAP [rbp - 8]
%define APPENDIX [rbp - 24]
sb_append: ; (rdi: *sb, rsi: char*)
push rbp
mov rbp, rsp
sub rsp, 32
test rsi, rsi
jz .done
; store sb on stack
mov SB, rdi
; store new str on stack
mov APPENDIX, rsi
; get sb len
mov eax, dword [rdi + STR_LEN]
mov dword SB_LEN, eax
mov eax, dword [rdi + STR_CAP]
mov dword SB_CAP, eax
push rdi
mov rdi, rsi
call strlen
add rax, SB_LEN
cmp rax, SB_CAP
jl .copy_string
mov r9, SB_CAP
; new string will be to large for current cap, need to realloc
.get_new_len:
imul r9, 2
cmp r9, rax
jl .get_new_len
mov SB_CAP, r9
push r9
push rax
mov rdi, r9
call malloc
test rax, rax
jz err_malloc
mov rdi, rax
mov rsi, SB
mov rsi, [rsi + STR_DATA]
call strcpy
pop rax
mov rsi, SB
pop r9
mov [SB + STR_CAP], r9
.copy_string:
pop rdi
mov r9, SB
mov dword [r9 + STR_LEN], eax
mov rdi, [r9 + STR_DATA]
mov eax, dword [rbp - 4]
lea rdi, [rdi + eax]
mov rsi, [rbp - 24]
call strcpy
.done:
mov rsp, rbp
pop rbp
ret

View File

@ -0,0 +1,12 @@
%define STRING_INIT_CAP 1024
%define STR_SIZE 16
%define STR_LEN 0
%define STR_CAP 4
%define STR_DATA 8
; struct string:
; .len 0 uint
; .cap 4 uint
; .data 8 char*

View File

@ -0,0 +1,56 @@
%include "./src/core/string_builder/sb.s"
section .text
extern malloc
extern err_malloc
extern memcpy
extern strlen
global sb_new
sb_new: ; rax: str*(rdi: char* || NULL, rsi: *hidden_copy_ptr)
push rbx
push rdi
push rsi
mov rbx, STRING_INIT_CAP
xor r9, r9
test rdi, rdi
jz .alloc_string
push rdi
call strlen
pop rdi
mov r9, rax
cmp r9, STRING_INIT_CAP
jl .alloc_string
.calc_init_len:
add rbx, STRING_INIT_CAP
cmp r9, rbx
jg .calc_init_len
.alloc_string:
push rdi
mov rdi, rbx
call malloc
test rax, rax
jz err_malloc
pop rdi
pop rsi
mov dword [rsi + STR_CAP], ebx
mov dword [rsi + STR_LEN], r9d
mov [rsi + STR_DATA], rax
test r9, r9
jz .done
mov rdi, rax
mov rdx, r9
pop rsi
call memcpy
.done:
pop rbx
ret

9
src/core/syscall/fork.s Normal file
View File

@ -0,0 +1,9 @@
%define SYS_FORK 57
section .text
global fork
fork: ; rax: pid()
mov rdi, SYS_FORK
syscall
ret

View File

@ -1,7 +1,7 @@
section .data section .data
EARGCNT: db "[ERROR] Invalid arg count: expected 1", 0xa, 0 EARGCNT: db 0xa, "[ERROR] Invalid arg count: expected 1", 0xa, 0
EMALLOC: db "[ERROR] Malloc fialed!", 0xa, 0 EMALLOC: db 0xa, "[ERROR] Malloc failed!", 0xa, 0
ELSEEK: db "[ERROR] lseek failed!", 0xa, 0 ELSEEK: db 0xa, "[ERROR] lseek failed!", 0xa, 0
section .text section .text
global err_args global err_args

View File

@ -19,6 +19,7 @@ section .text
extern func_prologue extern func_prologue
extern func_epilogue extern func_epilogue
extern memset
extern program_prologue extern program_prologue
@ -37,6 +38,14 @@ lex: ; rax: lex* (rdi: char *file_content)
cmp rax, 0 cmp rax, 0
je err_malloc je err_malloc
; zero out lexer
push rax
mov rdi, rax
mov rsi, 0
mov rdx, LEX_SIZE
call memset
pop rax
mov [rbp - 24], rax ; store lex on stack mov [rbp - 24], rax ; store lex on stack
lea rsi, [rbp - 16] ; int* expr_cnt lea rsi, [rbp - 16] ; int* expr_cnt

View File

@ -13,31 +13,40 @@ section .text
extern vec_get extern vec_get
extern putchar extern putchar
extern vec_pop extern vec_pop
extern sb_new
print_usage: print_usage:
mov rdi, usage mov rdi, usage
call putstr call putstr
_start: _start:
pop rdi
cmp rdi, 2
jne err_args
mov rdi, [rsp + 8] ; argv[1]
push rbp push rbp
mov rbp, rsp mov rbp, rsp
sub rsp, 16 sub rsp, 16
mov rdi, usage
lea rsi, [rsp - 16]
call sb_new
call get_file_content ; pop rdi
mov rdi, rax ; cmp rdi, 2
mov [rbp - 8], rax ; jne err_args
; mov rdi, [rsp + 8] ; argv[1]
mov rdi, rax ; push rbp
call lex ; mov rbp, rsp
;
mov rsp, rbp ; sub rsp, 16
pop rbp ;
; call get_file_content
; mov rdi, rax
; mov [rbp - 8], rax
;
; mov rdi, rax
; call lex
;
; mov rsp, rbp
; pop rbp
;
done: done:
xor rdi, rdi ; xor rdi, rdi
call exit ; call exit