Files
labubu/labubu.c
2025-10-04 20:39:15 +01:00

343 lines
8.4 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef enum {
LPAREN,
RPAREN,
LAMBDA,
VARIABLE,
DOT,
NL,
DECLARE
} labubu_token_type;
typedef struct {
labubu_token_type type;
char* content;
} labubu_token;
typedef struct LABUBU_TOKEN_LIST {
labubu_token token;
struct LABUBU_TOKEN_LIST* next;
} labubu_token_list;
typedef struct {
labubu_token_list* tokens;
char* rest;
} labubu_lexer;
labubu_token_list* labubu_token_list_append(labubu_token_list* tokens, labubu_token token) {
labubu_token_list* next = malloc(sizeof (labubu_token_list));
next->next = NULL;
next->token = token;
tokens->next = next;
return next;
}
void tokenize(labubu_lexer* lexer) {
labubu_token_list* current_token = lexer->tokens;
labubu_token token;
char* buffer;
while (*lexer->rest != '\0') {
switch (*lexer->rest) {
case '\n':
case ';':
token = (labubu_token) { .type = NL, .content = "\n" };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
break;
case ' ':
lexer->rest++;
continue;
case '\\':
token = (labubu_token) {.type = LAMBDA, .content = "\\" };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
break;
case '.':
token = (labubu_token) {.type = DOT, .content = "." };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
break;
case '(':
token = (labubu_token) {.type = LPAREN, .content = "(" };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
break;
case ')':
token = (labubu_token) {.type = RPAREN, .content = ")" };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
break;
case ':':
lexer->rest++;
if (*lexer->rest == '=') {
token = (labubu_token) {.type = DECLARE, .content = ":=" };
current_token = labubu_token_list_append(
current_token,
token);
lexer->rest++;
}
break;
default:
buffer = malloc(100 * sizeof (char));
size_t j = 0;
char current = *lexer->rest;
while(current != '\0' &&
*lexer->rest != ' '
&& current != '\n'
&& (current != ':' && current+1 != '=')
&& (current != '(')
&& (current != ')')
&& (current != '\\')
&& (current != '.')
) {
buffer[j++] = current;
current = *(++lexer->rest);
}
buffer[j] = '\0';
token = (labubu_token) {.type = VARIABLE, .content = buffer};
current_token = labubu_token_list_append(
current_token,
token);
}
}
}
void labubu_tokens_print (labubu_token_list* tokens) {
labubu_token_list* current = tokens;
while ( (current = current->next) != NULL ) {
switch(current->token.type) {
case LPAREN:
printf("LPAREN");
break;
case RPAREN:
printf("RPAREN");
break;
case LAMBDA:
printf("LAMBDA");
break;
case DOT:
printf("DOT");
break;
case DECLARE:
printf("DECLARE");
break;
case VARIABLE:
printf("VARIABLE");
break;
case NL:
printf("NL");
break;
}
if (current->token.type != NL) printf(" %s\n", current->token.content);
else printf(" \\n\n");
}
}
typedef struct AST {
enum {
AST_DECLARATION, // VARIABLE := AST
AST_APPLICATION, // (AST AST)
AST_FUNCTION, // LAMBDA VARIABLE . AST
AST_VARIABLE // VARIABLE
} tag;
union {
struct AST_DECLARATION { char* name; struct AST* value; } AST_DECLARATION;
struct AST_APPLICATION { struct AST* f; struct AST* v; } AST_APPLICATION;
struct AST_FUNCTION { char* lambda; struct AST* body; } AST_FUNCTION;
struct AST_VARIABLE { char* name; } AST_VARIABLE;
} data;
} labubu_ast;
// shamelessly stolen: https://keleshev.com/abstract-syntax-tree-an-example-in-c/
void ast_print(labubu_ast *ptr) {
labubu_ast ast = *ptr;
switch (ast.tag) {
case AST_FUNCTION:
printf("\\%s.", ast.data.AST_FUNCTION.lambda);
ast_print(ast.data.AST_FUNCTION.body);
return;
case AST_VARIABLE:
printf("%s", ast.data.AST_VARIABLE.name);
return;
case AST_APPLICATION:
printf("(");
ast_print(ast.data.AST_APPLICATION.f);
printf(" ");
ast_print(ast.data.AST_APPLICATION.v);
printf(")");
return;
}
}
bool tokens_lookahead_check(labubu_token_list* tokens, labubu_token_type type) {
if (tokens != NULL && tokens->token.type == type) return true;
return false;
}
labubu_token_list* parse(labubu_token_list* tokens, labubu_ast *ast) {
if (tokens == NULL) return NULL;
while (tokens->next != NULL) {
labubu_token_list* current = tokens->next;
if (current->token.type == VARIABLE) {
//printf("VARIBLE NAMED: %s\n", current->token.content);
ast->tag = AST_VARIABLE;
ast->data.AST_VARIABLE.name = current->token.content;
return current;
}
if (current->token.type == LAMBDA &&
tokens_lookahead_check(current->next, VARIABLE) &&
tokens_lookahead_check(current->next->next, DOT)) {
ast->tag = AST_FUNCTION;
//printf("FUNCTION WITH VAR: %s\n", current->next->token.content);
//printf("<FUNCTION>\n");
ast->data.AST_FUNCTION.lambda = current->next->token.content;
ast->data.AST_FUNCTION.body = malloc(sizeof (labubu_ast));
labubu_token_list* current1 = parse(current->next->next, ast->data.AST_FUNCTION.body);
//printf("</FUNCTION>\n");
return current1;
}
if (current->token.type == LPAREN) {
///printf("<APPLICATION>\n");
ast->tag = AST_APPLICATION;
ast->data.AST_APPLICATION.f = malloc(sizeof (labubu_ast));
current = parse(current, ast->data.AST_APPLICATION.f);
//printf("\n");
ast->data.AST_APPLICATION.v = malloc(sizeof (labubu_ast));
labubu_token_list* current1 = parse(current, ast->data.AST_APPLICATION.v);
//printf("</APPLICATION>\n");
return current1->next;
}
}
return NULL;
}
typedef struct {
char* name;
labubu_ast* value;
} labubu_variable;
typedef struct LABUBU_VARIABLES {
labubu_variable value;
struct LABUBU_VARIABLES* next;
} labubu_variables;
typedef struct {
bool found;
labubu_ast* value;
} labubu_variables_lookup_response;
labubu_variables_lookup_response lookup(labubu_variables* env, char* name) {
while( (env = env->next) != NULL ) {
if (strcmp(env->value.name, name) == 0)
return (labubu_variables_lookup_response) { .found = true, .value = env->value.value };
}
return (labubu_variables_lookup_response) {.found = false, .value = NULL };
}
void push(labubu_variables* env, char* name, labubu_ast* value) {
labubu_variables* next1 = malloc(sizeof (labubu_variables));
next1->value = (labubu_variable) {
.name = name,
.value = value
};
next1->next=env->next;
env->next = next1;
}
void pop(labubu_variables* env) {
if (env->next != NULL) env->next = env->next->next;
}
labubu_ast* reduce (labubu_ast* ast, labubu_variables* env) {
labubu_variables_lookup_response res;
labubu_ast* f;
labubu_ast* v;
switch (ast->tag) {
case AST_FUNCTION:
f = malloc(sizeof (labubu_ast));
f->tag = AST_FUNCTION;
f->data.AST_FUNCTION.body = reduce(ast->data.AST_FUNCTION.body, env);
f->data.AST_FUNCTION.lambda = ast->data.AST_FUNCTION.lambda;
return f;
case AST_VARIABLE:
res = lookup(
env,
ast->data.AST_VARIABLE.name);
if (res.found && res.value != NULL) {
if (res.value->tag == AST_VARIABLE)
return reduce(res.value, env);
else return res.value;
}
labubu_ast* ast1 = malloc(sizeof (labubu_ast));
*ast1 = *ast;
return ast1;
case AST_APPLICATION:
v = reduce(ast->data.AST_APPLICATION.v, env);
f = reduce(ast->data.AST_APPLICATION.f, env);
if (f->tag == AST_FUNCTION) {
push(env, f->data.AST_FUNCTION.lambda, v);
labubu_ast* ast1 = reduce(f->data.AST_FUNCTION.body, env);
pop(env);
return ast1;
} else {
labubu_ast* ast1 = malloc(sizeof (labubu_ast));
*ast1 = (labubu_ast) {
.tag = AST_APPLICATION,
.data.AST_APPLICATION = (struct AST_APPLICATION) {
.f = f,
.v = v
}
};
return ast1;
}
break;
}
return NULL;
}
int main (void) {
labubu_lexer lexer = {
.tokens = malloc(1024 * sizeof (labubu_token)),
.rest = "((\\a.\\b.((a b) \\x.\\y.y) \\x.\\y.y) \\x.\\y.x)"
};
printf("%s = ", lexer.rest);
tokenize(&lexer);
//labubu_tokens_print(lexer.tokens);
labubu_ast ast;
//printf("\n");
parse(lexer.tokens, &ast);
labubu_variables env = (labubu_variables) { .next = NULL } ;
labubu_ast* reduced = reduce(&ast, &env);
ast_print(reduced);
printf("\n");
}