first lexing

This commit is contained in:
victor 2025-04-29 01:30:30 +02:00
parent 26c2bec61a
commit d46febe6fc
17 changed files with 493 additions and 62 deletions

View File

@ -1,4 +1,3 @@
[executable] [executable]
path=/home/victor/git/ctools/lang/debug path=/home/victor/git/lang/debug
arguments= arguments=/home/victor/git/lang/test.lang
ask_directory=1

View File

@ -13,7 +13,7 @@ MATHSRC := $(addprefix $(MATHDIR)/, $(addsuffix .s, \
operators \ operators \
)) ))
STRSRC := $(addprefix $(STRDIR)/, $(addsuffix .s, \ STRSRC := $(addprefix $(STRDIR)/, $(addsuffix .s, \
strlen split strcpy substr is_num strcmp\ strlen split strcpy substr is_num strcmp is_alpha \
)) ))
MEMSRC := $(addprefix $(MEMDIR)/, $(addsuffix .s, \ MEMSRC := $(addprefix $(MEMDIR)/, $(addsuffix .s, \
malloc memchr \ malloc memchr \
@ -28,7 +28,8 @@ SYSCALLSRC := $(addprefix $(SYSCALLDIR)/, $(addsuffix .s, \
exit file_ops syscall_err\ exit file_ops syscall_err\
)) ))
PARSESRC := $(addprefix $(PARSEDIR)/, $(addsuffix .s, \ PARSESRC := $(addprefix $(PARSEDIR)/, $(addsuffix .s, \
parse debug_token create_expressions expression debug_expression \ parse debug_token create_expressions debug_expression \
lexer \
)) ))
# Collect all source files - now using the file variables, not directory variables # Collect all source files - now using the file variables, not directory variables

View File

@ -0,0 +1,46 @@
section .text
global is_alpha
global is_alpha_str
is_alpha: ; rax: bool (rdi: int c)
mov al, dil
sub al, 'A'
cmp al, 25
jc .is_alpha_true
mov al, dil
sub al, 'a'
cmp al, 25
ja .not_alpha
.is_alpha_true:
mov rax, 1
ret
.not_alpha:
xor rax, rax
ret
is_alpha_str: ; rax: bool (rdi: char *)
xor rcx, rcx
mov r8, rdi
.loop:
mov dil, byte [r8 + rcx]
cmp dil, 0
je .done
call is_alpha
test rax, rax
jz .not_alpha
inc rcx
jmp .loop
.done:
mov rax, 1
ret
.not_alpha:
xor rax, rax
ret

View File

@ -3,10 +3,10 @@ section .text
global is_num_str global is_num_str
is_num: ; rax: bool (rdi: int c) is_num: ; rax: bool (rdi: int c)
cmp rdi, 48 sub dil, '0'
jl not_num cmp dil, 9
cmp rdi, 57 jnc not_num
jg not_num
mov rax, 1 mov rax, 1
ret ret
not_num: not_num:

View File

@ -78,7 +78,7 @@ split: ; RAX: char ** split(RDI: char *, RSI: int)
mov [rbp - 16], rax mov [rbp - 16], rax
mov rcx, rax mov rcx, rax
cmp rbx, 1 cmp rbx, 0
je .no_match je .no_match
call strlen call strlen

2
src/inc/asm_output.s Normal file
View File

@ -0,0 +1,2 @@
section .data
ADD_INST: db "mov eax, esi", 0xa, "add eax, edi", 0xa, 0

View File

@ -1,8 +1,10 @@
section .data
%define EXPR_SIZE 32 %define EXPR_SIZE 32
%define EXPR_TYPE 0
%define EXPR_TOK_CNT 8
%define EXPR_TOK 16
; struct expression size = 32 ; struct expression size = 32
; .type ; .type
; .tok_count + 8 ; .tok_count + 8

22
src/inc/lexer.s Normal file
View File

@ -0,0 +1,22 @@
%define LEX_EXPR_CNT 0
%define LEX_VAR_CNT 4
%define LEX_EXPR 8
%define LEX_VAR 16
%define LEX_OUT 24
%define LEX_SIZE 32
%define VAR_NAME 0
%define VAR_OFFS 8
%define VAR_SIZE 16
; struct var
; .name* 0
; .stack_off 8
; struct lexer
; .expr_cnt 0
; .var_cnt 4
; .expr* 8
; .vars 16
; .output** 24

View File

@ -1,7 +0,0 @@
%define EXPR_SIZE 32
%define EXPR_TYPE 0
%define EXPR_TOK_CNT 8
%define EXPR_TOK 16

15
src/inc/token.s Normal file
View File

@ -0,0 +1,15 @@
%define SIZE_TOK 16
%define TOK_TYPE 0
%define TOK_VALUE 8
%define TOK_LOAD 0
%define TOK_VAR 1
%define TOK_CONST 2
%define TOK_ADD 3
%define TOK_SUB 4
%define TOK_FUNC 5
; struct token
; .type 0
; .value 8

View File

@ -1,4 +1,4 @@
%include "./lang/src/inc/macros.inc" %include "./src/inc/expression.s"
section .text section .text
global create_expressions global create_expressions
@ -11,11 +11,13 @@ section .text
extern print_expression extern print_expression
create_expressions: ; rax: exp* (rdi: char *filecontent) create_expressions: ; rax: exp* (rdi: char *filecontent, rsi: *cnt)
push rbp push rbp
mov rbp, rsp mov rbp, rsp
sub rsp, 32 ; allocate stack sub rsp, 32 ; allocate stack
push rsi
mov rsi, 0x0a mov rsi, 0x0a
call split call split
@ -55,7 +57,7 @@ create_expressions: ; rax: exp* (rdi: char *filecontent)
.splitting_done: .splitting_done:
; allocate expressions ; allocate expressions
mov rax, [expr_size] mov rax, EXPR_SIZE
mul rcx ; rcx contains the amount of splits aka expr count mul rcx ; rcx contains the amount of splits aka expr count
mov rdi, rax mov rdi, rax
call malloc call malloc
@ -88,10 +90,8 @@ create_expressions: ; rax: exp* (rdi: char *filecontent)
mul rcx mul rcx
lea rax, [rbx + rax] lea rax, [rbx + rax]
pop rbx pop rbx
mov rdx, [expr_tok] mov [rax + EXPR_TOK], rbx
mov [rax + rdx], rbx mov [rax + EXPR_TOK_CNT], rdi
mov rdx, [expr_tok_cnt]
mov [rax + rdx], rdi
inc rcx inc rcx
jmp .loop_expressions jmp .loop_expressions
@ -114,6 +114,9 @@ create_expressions: ; rax: exp* (rdi: char *filecontent)
jmp .expr_loop_print jmp .expr_loop_print
.done: .done:
pop rsi
mov rdi, [rbp - 24]
mov dword [rsi], edi
mov rax, [rbp - 16] mov rax, [rbp - 16]
add rsp, 32 add rsp, 32
mov rsp, rbp mov rsp, rbp

View File

@ -1,11 +1,10 @@
%include "./src/inc/expression.s"
section .data section .data
header: db 0xa, "---------", 0xa, "Expr ", 0xa, "---------", 0 header: db 0xa, "---------", 0xa, "Expr ", 0xa, "---------", 0
type: db 0xa, "type: ", 0 type: db 0xa, "type: ", 0
section .text section .text
extern print_tokens extern print_tokens
extern expr_type
extern expr_tok
extern expr_tok_cnt
extern putendl extern putendl
global print_expression global print_expression
@ -18,10 +17,8 @@ print_expression: ; (rdi: expr*)
call putendl call putendl
pop rdi pop rdi
mov rbx, [expr_tok_cnt] mov rsi, [rdi + EXPR_TOK_CNT]
mov rsi, [rdi + rbx] mov r12, [rdi + EXPR_TOK] ; r12 = tok*
add rdi, [expr_tok] ; tok**
mov r12, [rdi] ; r12 = tok*
mov rdi, r12 mov rdi, r12
call print_tokens call print_tokens

View File

@ -1,7 +1,15 @@
%include "./src/inc/token.s"
section .data section .data
token: db 0xa, "Token ", 0 token: db 0xa, "Token ", 0
type: db "type = ", 0 type: db "type = ", 0
value: db "value = ", 0 value: db "value = ", 0
VAL_CONST: db "const", 0
VAL_VAR: db "variable", 0
VAL_OP_ADD: db "operator '+'", 0
VAL_OP_SUB: db "operator '-'", 0
VAL_OP_LOAD: db "operator '='", 0
VAL_FUNC: db "function call", 0
section .text section .text
global print_tokens global print_tokens
@ -11,11 +19,47 @@ section .text
extern putnumberendl extern putnumberendl
extern get_split_count extern get_split_count
print_token_type: ; (rdi: int)
cmp rdi, TOK_LOAD
je .tok_load
cmp rdi, TOK_ADD
je .tok_add
cmp rdi, TOK_SUB
je .tok_sub
cmp rdi, TOK_CONST
je .tok_const
cmp rdi, TOK_VAR
je .tok_var
cmp rdi, TOK_FUNC
je .tok_func
.tok_load:
mov rdi, VAL_OP_LOAD
jmp .print
; struct token .tok_add:
; .type 0 mov rdi, VAL_OP_ADD
; .value +8 jmp .print
.tok_sub:
mov rdi, VAL_OP_SUB
jmp .print
.tok_const:
mov rdi, VAL_CONST
jmp .print
.tok_var:
mov rdi, VAL_VAR
jmp .print
.tok_func:
mov rdi, VAL_FUNC
jmp .print
.print:
call putendl
ret
print_tokens: ; (rdi: tok*, rsi: tok_count) print_tokens: ; (rdi: tok*, rsi: tok_count)
push rbp push rbp
@ -49,9 +93,9 @@ print_tokens: ; (rdi: tok*, rsi: tok_count)
mul r12 mul r12
mov rbx, [rbp - 8] mov rbx, [rbp - 8]
lea r13, [rbx + rax] lea r13, [rbx + rax]
mov rdi, [r13] mov rdi, [r13 + TOK_TYPE]
push rax push rax
call putnumberendl call print_token_type
mov rdi, value mov rdi, value
call putstr call putstr
@ -59,7 +103,7 @@ print_tokens: ; (rdi: tok*, rsi: tok_count)
mov rbx, [rbp - 8] mov rbx, [rbp - 8]
lea r13, [rbx + rax] lea r13, [rbx + rax]
mov rdi, [r13 + 8] mov rdi, [r13 + TOK_VALUE]
call putendl call putendl
mov rcx, r12 mov rcx, r12

View File

@ -1,8 +1,307 @@
section .text %include "./src/inc/token.s"
%include "./src/inc/lexer.s"
%include "./src/inc/expression.s"
%include "./src/inc/asm_output.s"
; struct lexer %define LEX_ERROR 0xa, "[LEX_ERROR] "
; .cnt 0
; .expr* 8 section .data
EEXPECT: db LEX_ERROR, "expected: ", 0
MOV: db "mov ", 0
OPEN_STACK_VAR: db "[rbp - ", 0
CLOSE_STACK_VAR: db "], ", 0
section .text
extern malloc
extern err_malloc
extern exit
extern putstr
extern create_expressions
extern strcmp
extern VAL_OP_LOAD
extern putchar
extern putnumber
extern putendl
lex_eexpect: ; (rdi: tok_type)
push rdi
mov rdi, EEXPECT
call putstr
pop rdi
call putstr
mov rdi, 1
call exit
count_vars: ; rdi: lex*
push rbp
mov rbp, rsp
sub rsp, 16
push rbx
push r12
xor r12, r12
mov rbx, [rdi + LEX_EXPR]
xor rcx, rcx
push rcx
.loop_expr:
pop rcx
cmp ecx, dword [rdi + LEX_EXPR_CNT]
je .done
mov rax, EXPR_SIZE
mul rcx
lea rax, [rbx + rax]
mov rdx, [rax + EXPR_TOK_CNT]
inc rcx
push rcx
xor rcx, rcx
mov rax, [rax + EXPR_TOK]
.loop_toks:
cmp rcx, rdx
je .loop_expr
cmp qword [rax + TOK_TYPE], TOK_VAR
jne .no_var
inc r12
.no_var:
inc rcx
add rax, SIZE_TOK
jmp .loop_toks
.done:
mov dword [rdi + LEX_VAR_CNT], r12d
pop r12
pop rbx
mov rsp, rbp
pop rbp
ret
get_vars: ; (rdi: lex*)
push rbp
mov rbp, rsp
sub rsp, 32
push rbx
push r12
xor r12, r12
xor r9, r9
call count_vars
mov [rbp - 24], rdi ; store lex
mov eax, dword [rdi + LEX_VAR_CNT]
mov [rbp - 32], rax
mov rdi, VAR_SIZE
mul rdi
mov rdi, rax
call malloc
cmp rax, 0
je err_malloc
mov rdi, [rbp - 24]
mov [rdi + LEX_VAR], rax
mov eax, dword [rdi + LEX_EXPR_CNT]
mov [rbp - 8], eax
mov rax, [rdi + LEX_EXPR]
mov [rbp - 16], rax
xor rcx, rcx
push rcx
.loop_expr:
pop rcx
cmp ecx, dword [rdi + LEX_EXPR_CNT]
je .done
mov rax, EXPR_SIZE
mul rcx
mov rbx, [rbp - 16]
lea rax, [rbx + rax]
mov rdx, [rax + EXPR_TOK_CNT]
inc rcx
push rcx
xor rcx, rcx
mov rax, [rax + EXPR_TOK]
.loop_toks:
cmp rcx, rdx
je .loop_expr
cmp qword [rax + TOK_TYPE], TOK_VAR
jne .skip_alloc
mov rbx, [rbp - 24]
mov rbx, [rbx + LEX_VAR]
lea rbx, [rbx + r12]
mov r8, [rax + TOK_VALUE]
mov [rbx + VAR_NAME], r8
inc r9
mov rax, r8
mov rax, 8
push rdx
mul r9
pop rdx
mov [rbx + VAR_OFFS], rax
mov rax, r8
.skip_alloc:
add rax, SIZE_TOK
inc rcx
jmp .loop_toks
.done:
pop r12
pop rbx
mov rsp, rbp
pop rbp
ret
global lex global lex
lex: ; (rdi: expr*, rsi: cnt) lex: ; rax: lex* (rdi: char *file_content)
push rbp
mov rbp, rsp
sub rsp, 32
push rbx
mov [rbp - 8], rdi
; allocate lexer
mov rdi, LEX_SIZE
call malloc
cmp rax, 0
je err_malloc
mov [rbp - 24], rax ; store lex on stack
lea rsi, [rbp - 16] ; int* expr_cnt
mov rdi, [rbp - 8] ; restore file_content
call create_expressions
mov rdi, [rbp - 24]
mov [rdi + LEX_EXPR], rax
mov rax, [rbp - 16]
mov [rdi + LEX_EXPR_CNT], rax
call get_vars
xor rcx, rcx
.process_expressions:
mov rdi, [rbp - 24]
mov esi, [rdi + LEX_EXPR_CNT]
cmp ecx, esi
je .done
mov rbx, [rdi + LEX_EXPR]
mov rax, EXPR_SIZE
mul rcx
push rcx
mov rdi, [rbx + rax + EXPR_TOK]
mov rdx, [rbp - 24]
call lex_assignment
pop rcx
inc rcx
jmp .process_expressions
.done:
pop rbx
mov rsp, rbp
pop rbp
ret
lex_assignment: ; (rdi: tok*, rsi: n, rdx: lex*)
push rbp
mov rbp, rsp
sub rsp, 32
mov [rbp - 16], rdi ; store tok array
mov rdi, [rdx + LEX_VAR_CNT]
mov [rbp - 8], edi ; var_cnt
mov rdi, [rdx + LEX_VAR]
mov [rbp - 24], rdi ; vars
; check first token: if not TOK_VAR, cant be assign
mov rdi, [rbp - 16]
mov rdx, [rdi + TOK_TYPE]
cmp rdx, TOK_VAR
jne .done_false
xor rcx, rcx
push rdi
mov rsi, [rdi + TOK_VALUE]
mov rdi, [rbp - 24]
mov rdx, [rbp - 8]
call look_up_var
push rax
mov rdi, MOV
call putstr
mov rdi, OPEN_STACK_VAR
call putstr
pop rdi
call putnumber
mov rdi, CLOSE_STACK_VAR
call putstr
pop rdi
add rdi, SIZE_TOK
mov rdx, [rdi + TOK_TYPE]
cmp rdx, TOK_LOAD
jne .err_found
add rdi, SIZE_TOK
mov rdx, [rdi + TOK_TYPE]
cmp rdx, TOK_CONST
je .print_const
cmp rdx, TOK_VAR
je .done_true
.done_false:
mov rax, 0
ret
.done_true:
mov rsp, rbp
pop rbp
cmp rcx, rsi
jne .done_false
mov rax, 1
ret
.err_found:
mov rdi, VAL_OP_LOAD
call lex_eexpect
.print_const:
mov rdi, [rdi + TOK_VALUE]
call putendl
jmp .done_true
look_up_var: ; rax: bool (rdi: vars*, rsi: name*, rdx: n)
push rbp
mov rbp, rsp
xor rax, rax
xor rcx, rcx
.loop_vars:
cmp rcx, rdx
je .done
cmp [rdi], rsi
je .found
inc rcx
add rdi, VAR_SIZE
jmp .loop_vars
.found:
mov rax, [rdi + VAR_OFFS]
.done:
mov rsp, rbp
pop rbp
ret

View File

@ -1,11 +1,24 @@
%define SIZE_TOK 16 %include "./src/inc/token.s"
%define TOK_ASSIGN 0
%define TOK_ADD 1
%define TOK_PRINT 2
%define TOK_VAR 3
%define TOK_CONST 4
section .data section .data
global VAL_CONST
VAL_CONST: db "const", 0
global VAL_VAR
VAL_VAR: db "variable", 0
global VAL_OP_ADD
VAL_OP_ADD: db "operator '+'", 0
global VAL_OP_SUB
VAL_OP_SUB: db "operator '-'", 0
global VAL_OP_LOAD
VAL_OP_LOAD: db "operator '='", 0
global VAL_FUNC
VAL_FUNC: db "function call", 0
OP_ASSIGN: db "=", 0 OP_ASSIGN: db "=", 0
OP_ADD: db "+", 0 OP_ADD: db "+", 0
OP_PRINT: db "print", 0 OP_PRINT: db "print", 0
@ -21,11 +34,6 @@ section .text
extern get_split_count extern get_split_count
; struct token
; .type 0
; .value +8
token_alloc: ; rax: tok* (rdi: int cnt) token_alloc: ; rax: tok* (rdi: int cnt)
mov rax, rdi mov rax, rdi
mov rdi, SIZE_TOK mov rdi, SIZE_TOK
@ -81,7 +89,7 @@ parse: ; rax: tok* (rdi: char**)
.is_assign: .is_assign:
pop rcx pop rcx
push rdi push rdi
mov rdi, TOK_ASSIGN mov rdi, TOK_LOAD
jmp .set_token jmp .set_token
.is_add: .is_add:
@ -93,7 +101,7 @@ parse: ; rax: tok* (rdi: char**)
.is_print: .is_print:
pop rcx pop rcx
push rdi push rdi
mov rdi, TOK_PRINT mov rdi, TOK_FUNC
jmp .set_token jmp .set_token
.is_const: .is_const:

View File

@ -23,6 +23,7 @@ section .text
extern err_malloc extern err_malloc
extern get_file_content extern get_file_content
extern create_expressions extern create_expressions
extern lex
print_usage: print_usage:
mov rdi, usage mov rdi, usage
@ -40,9 +41,11 @@ _start:
call get_file_content call get_file_content
mov rdi, rax mov rdi, rax
call create_expressions
mov [rbp - 8], rax mov [rbp - 8], rax
mov rdi, rax
call lex
mov rsp, rbp mov rsp, rbp
pop rbp pop rbp

View File

@ -1,4 +1 @@
a = 5 a = 5
b = a + 6
print b
b = a + a