From caf0a6a9eb720ce6dfcc1d487419bb96fca6b195 Mon Sep 17 00:00:00 2001 From: bunny Date: Sat, 4 Oct 2025 17:00:10 +0100 Subject: [PATCH] =?UTF-8?q?=D0=B1=D0=BB=D1=8F=20=D0=B1=D1=83=D0=B4=D1=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 4 + README.md | 0 labubu.c | 337 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100644 labubu.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c15eb5f --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +labubu: + gcc -o labubu labubu.c +clean: + rm -rf labubu diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/labubu.c b/labubu.c new file mode 100644 index 0000000..6f6fff5 --- /dev/null +++ b/labubu.c @@ -0,0 +1,337 @@ +#include +#include +#include +#include + +typedef enum { + LPAREN, + RPAREN, + LAMBDA, + VARIABLE, + DOT, + NL, + DECLARE +} labubu_token_type; + +typedef struct { + labubu_token_type type; + char* content; +} labubu_token; + +typedef struct LABUBU_TOKEN_LIST { + labubu_token token; + struct LABUBU_TOKEN_LIST* next; +} labubu_token_list; + +typedef struct { + labubu_token_list* tokens; + char* rest; +} labubu_lexer; + +labubu_token_list* labubu_token_list_append(labubu_token_list* tokens, labubu_token token) { + labubu_token_list* next = malloc(sizeof (labubu_token_list)); + next->next = NULL; + next->token = token; + tokens->next = next; + return next; +} + +void tokenize(labubu_lexer* lexer) { + labubu_token_list* current_token = lexer->tokens; + labubu_token token; + char* buffer; + + while (*lexer->rest != '\0') { + switch (*lexer->rest) { + case '\n': + case ';': + token = (labubu_token) { .type = NL, .content = "\n" }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + break; + case ' ': + lexer->rest++; + continue; + case '\\': + token = (labubu_token) {.type = LAMBDA, .content = "\\" }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + break; + case '.': + token = (labubu_token) {.type = DOT, .content = "." }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + break; + case '(': + token = (labubu_token) {.type = LPAREN, .content = "(" }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + break; + case ')': + token = (labubu_token) {.type = RPAREN, .content = ")" }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + break; + case ':': + lexer->rest++; + if (*lexer->rest == '=') { + token = (labubu_token) {.type = DECLARE, .content = ":=" }; + current_token = labubu_token_list_append( + current_token, + token); + lexer->rest++; + } + break; + default: + buffer = malloc(100 * sizeof (char)); + size_t j = 0; + char current = *lexer->rest; + while(current != '\0' && + *lexer->rest != ' ' + && current != '\n' + && (current != ':' && current+1 != '=') + && (current != '(') + && (current != ')') + && (current != '\\') + && (current != '.') + ) { + buffer[j++] = current; + current = *(++lexer->rest); + } + buffer[j] = '\0'; + token = (labubu_token) {.type = VARIABLE, .content = buffer}; + current_token = labubu_token_list_append( + current_token, + token); + } + } +} + +void labubu_tokens_print (labubu_token_list* tokens) { + labubu_token_list* current = tokens; + while ( (current = current->next) != NULL ) { + switch(current->token.type) { + case LPAREN: + printf("LPAREN"); + break; + case RPAREN: + printf("RPAREN"); + break; + case LAMBDA: + printf("LAMBDA"); + break; + case DOT: + printf("DOT"); + break; + case DECLARE: + printf("DECLARE"); + break; + case VARIABLE: + printf("VARIABLE"); + break; + case NL: + printf("NL"); + break; + } + if (current->token.type != NL) printf(" %s\n", current->token.content); + else printf(" \\n\n"); + } +} + +typedef struct AST { + enum { + AST_DECLARATION, // VARIABLE := AST + AST_APPLICATION, // (AST AST) + AST_FUNCTION, // LAMBDA VARIABLE . AST + AST_VARIABLE // VARIABLE + } tag; + union { + struct AST_DECLARATION { char* name; struct AST* value; } AST_DECLARATION; + struct AST_APPLICATION { struct AST* f; struct AST* v; } AST_APPLICATION; + struct AST_FUNCTION { char* lambda; struct AST* body; } AST_FUNCTION; + struct AST_VARIABLE { char* name; } AST_VARIABLE; + } data; +} labubu_ast; + +// shamelessly stolen: https://keleshev.com/abstract-syntax-tree-an-example-in-c/ +void ast_print(labubu_ast *ptr) { + labubu_ast ast = *ptr; + switch (ast.tag) { + case AST_FUNCTION: + printf("\\%s.", ast.data.AST_FUNCTION.lambda); + ast_print(ast.data.AST_FUNCTION.body); + return; + case AST_VARIABLE: + printf("%s", ast.data.AST_VARIABLE.name); + return; + case AST_APPLICATION: + printf("("); + ast_print(ast.data.AST_APPLICATION.f); + printf(" "); + ast_print(ast.data.AST_APPLICATION.v); + printf(")"); + return; + } +} + +bool tokens_lookahead_check(labubu_token_list* tokens, labubu_token_type type) { + if (tokens != NULL && tokens->token.type == type) return true; + return false; +} + +labubu_token_list* parse(labubu_token_list* tokens, labubu_ast *ast) { + if (tokens == NULL) return NULL; + while (tokens->next != NULL) { + labubu_token_list* current = tokens->next; + if (current->token.type == VARIABLE) { + //printf("VARIBLE NAMED: %s\n", current->token.content); + ast->tag = AST_VARIABLE; + ast->data.AST_VARIABLE.name = current->token.content; + return current; + } + if (current->token.type == LAMBDA && + tokens_lookahead_check(current->next, VARIABLE) && + tokens_lookahead_check(current->next->next, DOT)) { + ast->tag = AST_FUNCTION; + //printf("FUNCTION WITH VAR: %s\n", current->next->token.content); + //printf("\n"); + ast->data.AST_FUNCTION.lambda = current->next->token.content; + + ast->data.AST_FUNCTION.body = malloc(sizeof (labubu_ast)); + + labubu_token_list* current1 = parse(current->next->next, ast->data.AST_FUNCTION.body); + //printf("\n"); + return current1; + } + if (current->token.type == LPAREN) { + ///printf("\n"); + ast->tag = AST_APPLICATION; + ast->data.AST_APPLICATION.f = malloc(sizeof (labubu_ast)); + + current = parse(current, ast->data.AST_APPLICATION.f); + + //printf("\n"); + + ast->data.AST_APPLICATION.v = malloc(sizeof (labubu_ast)); + + labubu_token_list* current1 = parse(current, ast->data.AST_APPLICATION.v); + + //printf("\n"); + + return current1->next; + } + } + return NULL; +} + +typedef struct { + char* name; + labubu_ast* value; +} labubu_variable; + +typedef struct LABUBU_VARIABLES { + labubu_variable value; + struct LABUBU_VARIABLES* next; +} labubu_variables; + +typedef struct { + bool found; + labubu_ast* value; +} labubu_variables_lookup_response; + +labubu_variables_lookup_response lookup(labubu_variables* env, char* name) { + while( (env = env->next) != NULL ) { + if (strcmp(env->value.name, name) == 0) + return (labubu_variables_lookup_response) { .found = true, .value = env->value.value }; + } + return (labubu_variables_lookup_response) {.found = false, .value = NULL }; +} + +void push(labubu_variables* env, char* name, labubu_ast* value) { + labubu_variables* next1 = malloc(sizeof (labubu_variables)); + next1->value = (labubu_variable) { + .name = name, + .value = value + }; + next1->next=env->next; + env->next = next1; +} + +void pop(labubu_variables* env) { + if (env->next != NULL) env->next = env->next->next; +} + +labubu_ast* reduce (labubu_ast* ast, labubu_variables* env) { + labubu_variables_lookup_response res; + labubu_ast* f; + labubu_ast* v; + switch (ast->tag) { + case AST_FUNCTION: + f = malloc(sizeof (labubu_ast)); + f->tag = AST_FUNCTION; + f->data.AST_FUNCTION.body = reduce(ast->data.AST_FUNCTION.body, env); + f->data.AST_FUNCTION.lambda = ast->data.AST_FUNCTION.lambda; + return f; + case AST_VARIABLE: + res = lookup( + env, + ast->data.AST_VARIABLE.name); + if (res.found) { + if (res.value == NULL) return ast; + return reduce(res.value, env); + } + + return ast; + case AST_APPLICATION: + f = reduce(ast->data.AST_APPLICATION.f, env); + + v = reduce(ast->data.AST_APPLICATION.v, env); + + if (f->tag == AST_FUNCTION) { + push(env, f->data.AST_FUNCTION.lambda, v); + + labubu_ast* ast1 = reduce(f->data.AST_FUNCTION.body, env); + pop(env); + return ast1; + } else { + labubu_ast* ast1 = malloc(sizeof (labubu_ast)); + *ast1 = (labubu_ast) { + .tag = AST_APPLICATION, + .data.AST_APPLICATION = (struct AST_APPLICATION) { + .f = f, + .v = v + } + }; + return ast1; + } + break; + } + return NULL; +} + +int main (void) { + labubu_lexer lexer = { + .tokens = malloc(1024 * sizeof (labubu_token)), + .rest = "((\\a.\\b.((a b) \\x.\\y.y) \\x.\\y.x) \\x.\\y.x)" + }; + printf("%s = ", lexer.rest); + tokenize(&lexer); + //labubu_tokens_print(lexer.tokens); + labubu_ast ast; + //printf("\n"); + parse(lexer.tokens, &ast); + labubu_variables env = (labubu_variables) { .next = NULL } ; + labubu_ast* reduced = reduce(&ast, &env); + ast_print(reduced); + printf("\n"); +}