352 lines
8.7 KiB
C
352 lines
8.7 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
|
|
typedef enum {
|
|
LPAREN,
|
|
RPAREN,
|
|
LAMBDA,
|
|
VARIABLE,
|
|
DOT,
|
|
NL,
|
|
DECLARE
|
|
} labubu_token_type;
|
|
|
|
typedef struct {
|
|
labubu_token_type type;
|
|
char* content;
|
|
} labubu_token;
|
|
|
|
typedef struct LABUBU_TOKEN_LIST {
|
|
labubu_token token;
|
|
struct LABUBU_TOKEN_LIST* next;
|
|
} labubu_token_list;
|
|
|
|
typedef struct {
|
|
labubu_token_list* tokens;
|
|
char* rest;
|
|
} labubu_lexer;
|
|
|
|
labubu_token_list* labubu_token_list_append(labubu_token_list* tokens, labubu_token token) {
|
|
labubu_token_list* next = malloc(sizeof (labubu_token_list));
|
|
next->next = NULL;
|
|
next->token = token;
|
|
tokens->next = next;
|
|
return next;
|
|
}
|
|
|
|
void tokenize(labubu_lexer* lexer) {
|
|
labubu_token_list* current_token = lexer->tokens;
|
|
labubu_token token;
|
|
char* buffer;
|
|
|
|
while (*lexer->rest != '\0') {
|
|
switch (*lexer->rest) {
|
|
case '\n':
|
|
case ';':
|
|
token = (labubu_token) { .type = NL, .content = "\n" };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
break;
|
|
case ' ':
|
|
lexer->rest++;
|
|
continue;
|
|
case '\\':
|
|
token = (labubu_token) {.type = LAMBDA, .content = "\\" };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
break;
|
|
case '.':
|
|
token = (labubu_token) {.type = DOT, .content = "." };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
break;
|
|
case '(':
|
|
token = (labubu_token) {.type = LPAREN, .content = "(" };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
break;
|
|
case ')':
|
|
token = (labubu_token) {.type = RPAREN, .content = ")" };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
break;
|
|
case ':':
|
|
lexer->rest++;
|
|
if (*lexer->rest == '=') {
|
|
token = (labubu_token) {.type = DECLARE, .content = ":=" };
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
lexer->rest++;
|
|
}
|
|
break;
|
|
default:
|
|
buffer = malloc(100 * sizeof (char));
|
|
size_t j = 0;
|
|
char current = *lexer->rest;
|
|
while(current != '\0' &&
|
|
*lexer->rest != ' '
|
|
&& current != '\n'
|
|
&& (current != ':' && current+1 != '=')
|
|
&& (current != '(')
|
|
&& (current != ')')
|
|
&& (current != '\\')
|
|
&& (current != '.')
|
|
) {
|
|
buffer[j++] = current;
|
|
current = *(++lexer->rest);
|
|
}
|
|
buffer[j] = '\0';
|
|
token = (labubu_token) {.type = VARIABLE, .content = buffer};
|
|
current_token = labubu_token_list_append(
|
|
current_token,
|
|
token);
|
|
}
|
|
}
|
|
}
|
|
|
|
void labubu_tokens_print (labubu_token_list* tokens) {
|
|
labubu_token_list* current = tokens;
|
|
while ( (current = current->next) != NULL ) {
|
|
switch(current->token.type) {
|
|
case LPAREN:
|
|
printf("LPAREN");
|
|
break;
|
|
case RPAREN:
|
|
printf("RPAREN");
|
|
break;
|
|
case LAMBDA:
|
|
printf("LAMBDA");
|
|
break;
|
|
case DOT:
|
|
printf("DOT");
|
|
break;
|
|
case DECLARE:
|
|
printf("DECLARE");
|
|
break;
|
|
case VARIABLE:
|
|
printf("VARIABLE");
|
|
break;
|
|
case NL:
|
|
printf("NL");
|
|
break;
|
|
}
|
|
if (current->token.type != NL) printf(" %s\n", current->token.content);
|
|
else printf(" \\n\n");
|
|
}
|
|
}
|
|
|
|
typedef struct AST {
|
|
enum {
|
|
AST_DECLARATION, // VARIABLE := AST
|
|
AST_APPLICATION, // (AST AST)
|
|
AST_FUNCTION, // LAMBDA VARIABLE . AST
|
|
AST_VARIABLE // VARIABLE
|
|
} tag;
|
|
union {
|
|
struct AST_DECLARATION { char* name; struct AST* value; } AST_DECLARATION;
|
|
struct AST_APPLICATION { struct AST* f; struct AST* v; } AST_APPLICATION;
|
|
struct AST_FUNCTION { char* lambda; struct AST* body; } AST_FUNCTION;
|
|
struct AST_VARIABLE { char* name; } AST_VARIABLE;
|
|
} data;
|
|
} labubu_ast;
|
|
|
|
// shamelessly stolen: https://keleshev.com/abstract-syntax-tree-an-example-in-c/
|
|
void ast_print(labubu_ast *ptr) {
|
|
labubu_ast ast = *ptr;
|
|
switch (ast.tag) {
|
|
case AST_FUNCTION:
|
|
printf("\\%s.", ast.data.AST_FUNCTION.lambda);
|
|
ast_print(ast.data.AST_FUNCTION.body);
|
|
return;
|
|
case AST_VARIABLE:
|
|
printf("%s", ast.data.AST_VARIABLE.name);
|
|
return;
|
|
case AST_APPLICATION:
|
|
printf("(");
|
|
ast_print(ast.data.AST_APPLICATION.f);
|
|
printf(" ");
|
|
ast_print(ast.data.AST_APPLICATION.v);
|
|
printf(")");
|
|
return;
|
|
}
|
|
}
|
|
|
|
bool tokens_lookahead_check(labubu_token_list* tokens, labubu_token_type type) {
|
|
if (tokens != NULL && tokens->token.type == type) return true;
|
|
return false;
|
|
}
|
|
|
|
labubu_token_list* parse(labubu_token_list* tokens, labubu_ast *ast) {
|
|
if (tokens == NULL) return NULL;
|
|
while (tokens->next != NULL) {
|
|
labubu_token_list* current = tokens->next;
|
|
if (current->token.type == VARIABLE) {
|
|
//printf("VARIBLE NAMED: %s\n", current->token.content);
|
|
ast->tag = AST_VARIABLE;
|
|
ast->data.AST_VARIABLE.name = current->token.content;
|
|
return current;
|
|
}
|
|
if (current->token.type == LAMBDA &&
|
|
tokens_lookahead_check(current->next, VARIABLE) &&
|
|
tokens_lookahead_check(current->next->next, DOT)) {
|
|
ast->tag = AST_FUNCTION;
|
|
//printf("FUNCTION WITH VAR: %s\n", current->next->token.content);
|
|
//printf("<FUNCTION>\n");
|
|
ast->data.AST_FUNCTION.lambda = current->next->token.content;
|
|
|
|
ast->data.AST_FUNCTION.body = malloc(sizeof (labubu_ast));
|
|
|
|
labubu_token_list* current1 = parse(current->next->next, ast->data.AST_FUNCTION.body);
|
|
//printf("</FUNCTION>\n");
|
|
return current1;
|
|
}
|
|
if (current->token.type == LPAREN) {
|
|
///printf("<APPLICATION>\n");
|
|
ast->tag = AST_APPLICATION;
|
|
ast->data.AST_APPLICATION.f = malloc(sizeof (labubu_ast));
|
|
|
|
current = parse(current, ast->data.AST_APPLICATION.f);
|
|
|
|
//printf("\n");
|
|
|
|
ast->data.AST_APPLICATION.v = malloc(sizeof (labubu_ast));
|
|
|
|
labubu_token_list* current1 = parse(current, ast->data.AST_APPLICATION.v);
|
|
|
|
//printf("</APPLICATION>\n");
|
|
|
|
return current1->next;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
typedef struct {
|
|
char* name;
|
|
labubu_ast* value;
|
|
} labubu_variable;
|
|
|
|
typedef struct LABUBU_VARIABLES {
|
|
labubu_variable value;
|
|
struct LABUBU_VARIABLES* next;
|
|
} labubu_variables;
|
|
|
|
typedef struct {
|
|
bool found;
|
|
labubu_ast* value;
|
|
} labubu_variables_lookup_response;
|
|
|
|
labubu_variables_lookup_response lookup(labubu_variables* env, char* name) {
|
|
while( (env = env->next) != NULL ) {
|
|
if (strcmp(env->value.name, name) == 0)
|
|
return (labubu_variables_lookup_response) { .found = true, .value = env->value.value };
|
|
}
|
|
return (labubu_variables_lookup_response) {.found = false, .value = NULL };
|
|
}
|
|
|
|
void push(labubu_variables* env, char* name, labubu_ast* value) {
|
|
labubu_variables* next1 = malloc(sizeof (labubu_variables));
|
|
next1->value = (labubu_variable) {
|
|
.name = name,
|
|
.value = value
|
|
};
|
|
next1->next=env->next;
|
|
env->next = next1;
|
|
}
|
|
|
|
void pop(labubu_variables* env) {
|
|
if (env->next != NULL) env->next = env->next->next;
|
|
}
|
|
|
|
labubu_ast* reduce (labubu_ast* ast, labubu_variables* env) {
|
|
labubu_variables_lookup_response res;
|
|
labubu_ast* f;
|
|
labubu_ast* v;
|
|
switch (ast->tag) {
|
|
case AST_FUNCTION:
|
|
f = malloc(sizeof (labubu_ast));
|
|
f->tag = AST_FUNCTION;
|
|
f->data.AST_FUNCTION.body = ast->data.AST_FUNCTION.body;
|
|
f->data.AST_FUNCTION.lambda = ast->data.AST_FUNCTION.lambda;
|
|
return f;
|
|
case AST_VARIABLE:
|
|
res = lookup(
|
|
env,
|
|
ast->data.AST_VARIABLE.name);
|
|
|
|
|
|
if (res.found && res.value != NULL) {
|
|
if (res.value->tag == AST_VARIABLE &&
|
|
strcmp(res.value->data.AST_VARIABLE.name, ast->data.AST_VARIABLE.name) != 0)
|
|
return reduce(res.value, env);
|
|
else return res.value;
|
|
}
|
|
|
|
labubu_ast* ast1 = malloc(sizeof (labubu_ast));
|
|
*ast1 = *ast;
|
|
return ast1;
|
|
case AST_APPLICATION:
|
|
f = reduce(ast->data.AST_APPLICATION.f, env);
|
|
|
|
if (f->tag == AST_FUNCTION) {
|
|
push(env, f->data.AST_FUNCTION.lambda, ast->data.AST_APPLICATION.v);
|
|
labubu_ast* ast1 = reduce(f->data.AST_FUNCTION.body, env);
|
|
pop(env);
|
|
return ast1;
|
|
} else {
|
|
v = reduce(ast->data.AST_APPLICATION.v, env);
|
|
labubu_ast* ast1 = malloc(sizeof (labubu_ast));
|
|
*ast1 = (labubu_ast) {
|
|
.tag = AST_APPLICATION,
|
|
.data.AST_APPLICATION = (struct AST_APPLICATION) {
|
|
.f = f,
|
|
.v = v
|
|
}
|
|
};
|
|
return ast1;
|
|
}
|
|
break;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void read (char* content) {
|
|
labubu_lexer lexer = {
|
|
.tokens = malloc(1024 * sizeof (labubu_token)),
|
|
.rest = content
|
|
};
|
|
printf("%s = ", lexer.rest);
|
|
tokenize(&lexer);
|
|
//labubu_tokens_print(lexer.tokens);
|
|
labubu_ast ast;
|
|
//printf("\n");
|
|
parse(lexer.tokens, &ast);
|
|
labubu_variables env = (labubu_variables) { .next = NULL } ;
|
|
labubu_ast* reduced = reduce(&ast, &env);
|
|
ast_print(reduced);
|
|
printf("\n");
|
|
}
|
|
|
|
int main (void) {
|
|
read("x");
|
|
read("\\x.x");
|
|
read("(\\x.x x)");
|
|
read("(\\x.x \\y.y)");
|
|
read("((\\a.\\b.((a b) \\x.\\y.y) \\x.\\y.y) \\x.\\y.x)");
|
|
read("\\f.(\\x.(f (x x)) \\x.(f (x x)))");
|
|
read("\\f.(\\x.(f(x x))\\x.(f(x x)))");
|
|
}
|