diff --git a/Makefile b/Makefile index 58dfed1..706a415 100644 --- a/Makefile +++ b/Makefile @@ -1,104 +1,243 @@ +# Basic configuration TARGET := langc - - AFLAGS = -felf64 -F dwarf -g ifdef DEBUG AFLAGS += -DDEBUG_BUILD endif +# Directory structure SRCDIR := src +OBJDIR := obj +LIBDIR := lib + +# Core directories COREDIR := $(SRCDIR)/core MATHDIR := $(COREDIR)/math VECDIR := $(COREDIR)/vector STRDIR := $(COREDIR)/string +SBDIR := $(COREDIR)/string_builder PRINTDIR := $(COREDIR)/print MEMDIR := $(COREDIR)/mem SYSCALLDIR := $(COREDIR)/syscall FILEDIR := $(COREDIR)/file + +# Parser directories PARSEDIR := $(SRCDIR)/parse EXPRDIR := $(PARSEDIR)/expression TOKDIR := $(PARSEDIR)/token + +# Lexer directories LEXDIR := $(SRCDIR)/lexer VARDIR := $(LEXDIR)/vars + +# Other directories GLOBALDIR := $(SRCDIR)/global -# Define source files +# Object file directories (mirroring source structure) +OBJCOREDIR := $(OBJDIR)/core +OBJMATHDIR := $(OBJCOREDIR)/math +OBJVECDIR := $(OBJCOREDIR)/vector +OBJSTRDIR := $(OBJCOREDIR)/string +OBJSBDIR := $(OBJCOREDIR)/string_builder +OBJPRINTDIR := $(OBJCOREDIR)/print +OBJMEMDIR := $(OBJCOREDIR)/mem +OBJSYSCALLDIR := $(OBJCOREDIR)/syscall +OBJFILEDIR := $(OBJCOREDIR)/file +OBJPARSEDIR := $(OBJDIR)/parse +OBJEXPRDIR := $(OBJPARSEDIR)/expression +OBJTOKDIR := $(OBJPARSEDIR)/token +OBJLEXDIR := $(OBJDIR)/lexer +OBJVARDIR := $(OBJLEXDIR)/vars +OBJGLOBALDIR := $(OBJDIR)/global + +# All object directories in dependency order +OBJDIRS := $(OBJDIR) \ + $(OBJCOREDIR) \ + $(OBJMATHDIR) \ + $(OBJVECDIR) \ + $(OBJSTRDIR) \ + $(OBJSBDIR) \ + $(OBJPRINTDIR) \ + $(OBJMEMDIR) \ + $(OBJSYSCALLDIR) \ + $(OBJFILEDIR) \ + $(OBJPARSEDIR) \ + $(OBJEXPRDIR) \ + $(OBJTOKDIR) \ + $(OBJLEXDIR) \ + $(OBJVARDIR) \ + $(OBJGLOBALDIR) + +# Source file definitions by module MATHSRC := $(addprefix $(MATHDIR)/, $(addsuffix .s, \ operators \ )) + STRSRC := $(addprefix $(STRDIR)/, $(addsuffix .s, \ strlen split strcpy substr is_num strcmp is_alpha \ )) + MEMSRC := $(addprefix $(MEMDIR)/, $(addsuffix .s, \ malloc memchr memcpy memset \ )) + VECSRC := $(addprefix $(VECDIR)/, $(addsuffix .s, \ vec_create vec_push vec_get vec_pop \ )) + PRINTSRC := $(addprefix $(PRINTDIR)/, $(addsuffix .s, \ print putnumber \ )) + FILESRC := $(addprefix $(FILEDIR)/, $(addsuffix .s, \ read_file get_file_content \ )) + SYSCALLSRC := $(addprefix $(SYSCALLDIR)/, $(addsuffix .s, \ - exit file_ops syscall_err\ + exit file_ops syscall_err fork\ )) + TOKSRC := $(addprefix $(TOKDIR)/, $(addsuffix .s, \ parse_tokens debug_token \ )) + EXPRSRC := $(addprefix $(EXPRDIR)/, $(addsuffix .s, \ create_expressions debug_expression \ )) + LEXSRC := $(addprefix $(LEXDIR)/, $(addsuffix .s, \ lexer lex_err lex_load lex_func program_prologue \ func_boiler_plate \ )) + VARSRC := $(addprefix $(VARDIR)/, $(addsuffix .s, \ get_vars insert_var \ )) + GLOBALSRC := $(addprefix $(GLOBALDIR)/, $(addsuffix .s, \ function_table regs \ )) -# Collect all source files -SRC := $(SRCDIR)/start.s $(MATHSRC) $(STRSRC) $(PRINTSRC) $(FILESRC) $(VARSRC) $(SYSCALLSRC) $(MEMSRC) $(TOKSRC) $(EXPRSRC) $(LEXSRC) $(GLOBALSRC) $(VECSRC) +SBSRC := $(addprefix $(SBDIR)/, $(addsuffix .s, \ + string_builder\ + )) -# Fix: Preserve directory structure in object files -OBJDIR := obj -OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SRC)) +# Collect all sources and objects +MAIN_SRC := $(SRCDIR)/start.s +ALL_SRC := $(MAIN_SRC) $(MATHSRC) $(STRSRC) $(SBSRC) $(PRINTSRC) $(FILESRC) $(VARSRC) $(SYSCALLSRC) $(MEMSRC) $(TOKSRC) $(EXPRSRC) $(LEXSRC) $(GLOBALSRC) $(VECSRC) -LIBOBJ := $(filter-out start.o, $(OBJ)) -LIBNAME := core.lang +# Generate object file paths +ALL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(ALL_SRC)) -all: $(TARGET) +# Library settings +LIBNAME := $(LIBDIR)/core.a +LIB_OBJ := $(filter-out $(OBJDIR)/start.o, $(ALL_OBJ)) -lib: $(OBJ) - ar rcs $(LIBNAME) $(LIBOBJ) +# Module-specific object files for staged compilation +MATH_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MATHSRC)) +STR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(STRSRC)) +MEM_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MEMSRC)) +VEC_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(VECSRC)) +PRINT_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(PRINTSRC)) +FILE_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(FILESRC)) +SYSCALL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SYSCALLSRC)) +TOK_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(TOKSRC)) +EXPR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(EXPRSRC)) +LEX_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(LEXSRC)) +VAR_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(VARSRC)) +GLOBAL_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(GLOBALSRC)) +SB_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(SBSRC)) +MAIN_OBJ := $(patsubst $(SRCDIR)/%.s,$(OBJDIR)/%.o,$(MAIN_SRC)) -# Create output directories -$(OBJDIR)/core/math $(OBJDIR)/core/string $(OBJDIR)/core/print $(OBJDIR)/core/mem $(OBJDIR)/core/syscall $(OBJDIR)/core/file $(OBJDIR)/parse/expression $(OBJDIR)/parse/token $(OBJDIR)/lexer $(OBJDIR)/lexer/vars $(OBJDIR)/global: +# Main targets +all: prepare-build build-core build-parser build-lexer build-global build-main link-executable create-library + +# Stage 1: Prepare build environment +prepare-build: create-directories + +create-directories: | $(OBJDIRS) $(LIBDIR) + +$(OBJDIRS): mkdir -p $@ -# Main target -$(TARGET): $(OBJDIR) $(OBJDIR)/core/math $(OBJDIR)/core/string $(OBJDIR)/core/print $(OBJDIR)/core/mem $(OBJDIR)/core/syscall $(OBJDIR)/core/file $(OBJDIR)/parse/expression $(OBJDIR)/parse/token $(OBJDIR)/lexer $(OBJDIR)/lexer/vars $(OBJDIR)/global $(OBJ) - ld -g -o $@ $(OBJ) -nostdlib -static +$(LIBDIR): + mkdir -p $@ -# Fix: Use a more specific pattern rule that preserves paths -$(OBJDIR)/%.o: $(SRCDIR)/%.s - mkdir -p $(dir $@) +# Stage 2: Build core modules +build-core: build-math build-string build-memory build-vector build-print build-file build-syscall build-string-builder + +build-math: $(MATH_OBJ) + +build-string: $(STR_OBJ) + +build-memory: $(MEM_OBJ) + +build-vector: $(VEC_OBJ) + +build-print: $(PRINT_OBJ) + +build-file: $(FILE_OBJ) + +build-syscall: $(SYSCALL_OBJ) + +build-string-builder: $(SB_OBJ) + +# Stage 3: Build parser modules +build-parser: build-tokens build-expressions + +build-tokens: $(TOK_OBJ) + +build-expressions: $(EXPR_OBJ) + +# Stage 4: Build lexer modules +build-lexer: build-lex-core build-lex-vars + +build-lex-core: $(LEX_OBJ) + +build-lex-vars: $(VAR_OBJ) + +# Stage 5: Build global modules +build-global: $(GLOBAL_OBJ) + +# Stage 6: Build main entry point +build-main: $(MAIN_OBJ) + +# Stage 7: Link executable +link-executable: $(TARGET) + +$(TARGET): $(ALL_OBJ) + ld -g -o $@ $(ALL_OBJ) -nostdlib -static + +# Stage 8: Create library +create-library: $(LIBNAME) + +$(LIBNAME): $(LIB_OBJ) | $(LIBDIR) + ar rcs $@ $(LIB_OBJ) + +# Individual file compilation rule +$(OBJDIR)/%.o: $(SRCDIR)/%.s | $(OBJDIRS) nasm $(AFLAGS) $< -o $@ -$(OBJDIR): - mkdir -p $@ - +# Utility targets clean: rm -rf $(OBJDIR) -fclean: clean - rm -rf $(TARGET) +clean-library: + rm -f $(LIBNAME) -re: clean all +clean-executable: + rm -f $(TARGET) + +fclean: clean clean-library clean-executable + +re: fclean all + +# Debug target to show what would be built +show-config: + @echo "Target: $(TARGET)" + @echo "Flags: $(AFLAGS)" + @echo "Source files: $(words $(ALL_SRC)) files" + @echo "Object files: $(words $(ALL_OBJ)) files" + @echo "Object directories: $(OBJDIRS)" -.PHONY: all clean re diff --git a/langc b/langc new file mode 100755 index 0000000..38f77c9 Binary files /dev/null and b/langc differ diff --git a/core.lang b/lib/core.a similarity index 71% rename from core.lang rename to lib/core.a index 1d30553..e3ad679 100644 Binary files a/core.lang and b/lib/core.a differ diff --git a/src/core/string_builder/sappend.s b/src/core/string_builder/sappend.s new file mode 100644 index 0000000..cb91dd8 --- /dev/null +++ b/src/core/string_builder/sappend.s @@ -0,0 +1,77 @@ +%include "./src/core/string_builder/sb.s" + +section .text + extern strlen + extern strcpy + extern malloc + extern err_malloc + +%define SB [rbp - 16] +%define SB_LEN [rbp - 4] +%define SB_CAP [rbp - 8] +%define APPENDIX [rbp - 24] + +sb_append: ; (rdi: *sb, rsi: char*) + push rbp + mov rbp, rsp + sub rsp, 32 + + test rsi, rsi + jz .done + ; store sb on stack + mov SB, rdi + + ; store new str on stack + mov APPENDIX, rsi + + ; get sb len + mov eax, dword [rdi + STR_LEN] + mov dword SB_LEN, eax + + mov eax, dword [rdi + STR_CAP] + mov dword SB_CAP, eax + + push rdi + mov rdi, rsi + call strlen + + add rax, SB_LEN + cmp rax, SB_CAP + jl .copy_string + + mov r9, SB_CAP + ; new string will be to large for current cap, need to realloc +.get_new_len: + imul r9, 2 + cmp r9, rax + jl .get_new_len + mov SB_CAP, r9 + push r9 + push rax + mov rdi, r9 + call malloc + test rax, rax + jz err_malloc + mov rdi, rax + mov rsi, SB + mov rsi, [rsi + STR_DATA] + call strcpy + pop rax + mov rsi, SB + pop r9 + mov [SB + STR_CAP], r9 + +.copy_string: + pop rdi + mov r9, SB + mov dword [r9 + STR_LEN], eax + mov rdi, [r9 + STR_DATA] + mov eax, dword [rbp - 4] + lea rdi, [rdi + eax] + mov rsi, [rbp - 24] + call strcpy + +.done: + mov rsp, rbp + pop rbp + ret diff --git a/src/core/string_builder/sb.s b/src/core/string_builder/sb.s new file mode 100644 index 0000000..d8522c3 --- /dev/null +++ b/src/core/string_builder/sb.s @@ -0,0 +1,12 @@ +%define STRING_INIT_CAP 1024 + + +%define STR_SIZE 16 +%define STR_LEN 0 +%define STR_CAP 4 +%define STR_DATA 8 + +; struct string: +; .len 0 uint +; .cap 4 uint +; .data 8 char* diff --git a/src/core/string_builder/string_builder.s b/src/core/string_builder/string_builder.s new file mode 100644 index 0000000..b731e3d --- /dev/null +++ b/src/core/string_builder/string_builder.s @@ -0,0 +1,56 @@ +%include "./src/core/string_builder/sb.s" + +section .text + extern malloc + extern err_malloc + + extern memcpy + extern strlen + +global sb_new +sb_new: ; rax: str*(rdi: char* || NULL, rsi: *hidden_copy_ptr) + push rbx + push rdi + push rsi + + mov rbx, STRING_INIT_CAP + xor r9, r9 + + test rdi, rdi + jz .alloc_string + + push rdi + call strlen + pop rdi + mov r9, rax + cmp r9, STRING_INIT_CAP + jl .alloc_string + +.calc_init_len: + add rbx, STRING_INIT_CAP + cmp r9, rbx + jg .calc_init_len + + +.alloc_string: + push rdi + mov rdi, rbx + call malloc + test rax, rax + jz err_malloc + pop rdi + + pop rsi + mov dword [rsi + STR_CAP], ebx + mov dword [rsi + STR_LEN], r9d + mov [rsi + STR_DATA], rax + test r9, r9 + jz .done + mov rdi, rax + mov rdx, r9 + pop rsi + call memcpy + +.done: + pop rbx + ret diff --git a/src/core/syscall/fork.s b/src/core/syscall/fork.s new file mode 100644 index 0000000..c638e60 --- /dev/null +++ b/src/core/syscall/fork.s @@ -0,0 +1,9 @@ +%define SYS_FORK 57 + +section .text + +global fork +fork: ; rax: pid() + mov rdi, SYS_FORK + syscall + ret diff --git a/src/core/syscall/syscall_err.s b/src/core/syscall/syscall_err.s index f1a5d65..2eccd8d 100644 --- a/src/core/syscall/syscall_err.s +++ b/src/core/syscall/syscall_err.s @@ -1,7 +1,7 @@ section .data - EARGCNT: db "[ERROR] Invalid arg count: expected 1", 0xa, 0 - EMALLOC: db "[ERROR] Malloc fialed!", 0xa, 0 - ELSEEK: db "[ERROR] lseek failed!", 0xa, 0 + EARGCNT: db 0xa, "[ERROR] Invalid arg count: expected 1", 0xa, 0 + EMALLOC: db 0xa, "[ERROR] Malloc failed!", 0xa, 0 + ELSEEK: db 0xa, "[ERROR] lseek failed!", 0xa, 0 section .text global err_args diff --git a/src/lexer/lexer.s b/src/lexer/lexer.s index 465c1d2..b534236 100644 --- a/src/lexer/lexer.s +++ b/src/lexer/lexer.s @@ -19,6 +19,7 @@ section .text extern func_prologue extern func_epilogue + extern memset extern program_prologue @@ -37,6 +38,14 @@ lex: ; rax: lex* (rdi: char *file_content) cmp rax, 0 je err_malloc + ; zero out lexer + push rax + mov rdi, rax + mov rsi, 0 + mov rdx, LEX_SIZE + call memset + + pop rax mov [rbp - 24], rax ; store lex on stack lea rsi, [rbp - 16] ; int* expr_cnt diff --git a/src/start.s b/src/start.s index 6e20e0f..3cc3294 100644 --- a/src/start.s +++ b/src/start.s @@ -13,31 +13,40 @@ section .text extern vec_get extern putchar extern vec_pop + extern sb_new print_usage: mov rdi, usage call putstr + _start: - pop rdi - cmp rdi, 2 - jne err_args - mov rdi, [rsp + 8] ; argv[1] push rbp mov rbp, rsp - sub rsp, 16 + mov rdi, usage + lea rsi, [rsp - 16] + call sb_new - call get_file_content - mov rdi, rax - mov [rbp - 8], rax - - mov rdi, rax - call lex - - mov rsp, rbp - pop rbp - +; pop rdi +; cmp rdi, 2 +; jne err_args +; mov rdi, [rsp + 8] ; argv[1] +; push rbp +; mov rbp, rsp +; +; sub rsp, 16 +; +; call get_file_content +; mov rdi, rax +; mov [rbp - 8], rax +; +; mov rdi, rax +; call lex +; +; mov rsp, rbp +; pop rbp +; done: - xor rdi, rdi - call exit +; xor rdi, rdi +; call exit