commit bc6aee7bb75d1662ecf3c072bf487355d6bdff90 Author: bunny Date: Fri Sep 19 19:54:59 2025 +0100 starter diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fb8e1cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.cmo +*.cmi +*.cmx +*.o +*.out +*.cma diff --git a/.ocamlinit b/.ocamlinit new file mode 100644 index 0000000..702eca1 --- /dev/null +++ b/.ocamlinit @@ -0,0 +1,3 @@ +#load "parser.cmo";; +#load "periodic.cmo";; +#load "main.cmo";; diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a5e656b --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +common_files = parser.ml +extensions = *.cmo *.cmi *.cma *.o *.out *.cmx *.a + +lib: + ocamlc -c $(common_files) + ocamlc -a parser.cmo -o parser.cma + +clean: + rm -f $(extensions) diff --git a/README b/README new file mode 100644 index 0000000..4b8c2f4 --- /dev/null +++ b/README @@ -0,0 +1,13 @@ +# usagi - simple ocaml parser combinator library + +## compilation + +```bash +$ make lib +``` + +generate `.cma` that can be linked with `ocamlc -I . parser.cma` + +## todo + +- [ ] error recovery diff --git a/parser.ml b/parser.ml new file mode 100644 index 0000000..aa20ad6 --- /dev/null +++ b/parser.ml @@ -0,0 +1,111 @@ +type input = + { content : string + ; line : int + ; offset : int + } + +let make_input s = { content = s; line = 0; offset = 0 } + +type error = + { content : string + ; line : int + ; offset : int + } + +type 'a parser_result = (('a * input) list, error list) result + +type 'a parser = + { run : input -> 'a parser_result + } + +let fold_left1 (f: 'a -> 'a -> 'a) (xs: 'a list) : 'a = match xs with + | [] -> failwith "TODO: make method total; empty list" + | x :: xs -> List.fold_left f x xs + +let append (a: 'a parser_result) (b: 'a parser_result) : 'a parser_result = + match (a,b) with + | (Ok v, Ok v') -> Ok (List.append v v') + | (Error e, Error e') -> Error (List.append e e') + | (v, _) -> v + +let fail (s: string) : 'a parser = + { run = fun x -> Error [{content = s; line = x.line; offset = x.offset}] + } + +let get : char parser = + { run = function + | { content = ""; line = l; offset = o } -> + Error [{ content = "unexpected EOF"; line = l; offset = o }] + | { content = xs; line = l; offset = o } -> + let x = String.get xs 0 in + let xs' = String.sub xs 1 (String.length xs - 1) in + let (line', offset') = if x == '\n' then (l+1,0) else (l,o+1) in + Ok [(x, {content = xs'; line = line'; offset = offset'})] + } + +let return (x: 'a) : 'a parser = + { run = fun inp -> + Ok [(x,inp)] + } + +let (>>=) (p: 'a parser) (f: 'a -> 'b parser) : 'b parser = + { run = fun inp -> + match p.run inp with + | Ok v -> + let f (x: 'a * input) = + let (v',out) = x + in + (f v').run out + in + fold_left1 (fun acc v -> append acc v) @@ List.map f v + | Error e -> Error e + } + +let (let*) = (>>=) + +let sat (p: char -> bool) : char parser = + let* x = get in + if p x then return x else fail "unexpected character" + +let (+++) (p: 'a parser) (q: 'a parser) : 'a parser = + { run = fun inp -> match (p.run inp,q.run inp) with + | (Error _, Ok v) -> Ok v + | (a,b) -> append a b + } + +let (<++) (p: 'a parser) (q: 'a parser) : 'a parser = + { run = fun inp -> match (p.run inp,q.run inp) with + | (Error _, Ok v) -> Ok v + | (Ok v , _) -> Ok v + | (a,b) -> append a b + } + +let digit : 'a parser = sat (fun x -> x >= '0' && x <= '9') +let lower : 'a parser = sat (fun x -> x >= 'a' && x <= 'z') +let upper : 'a parser = sat (fun x -> x >= 'A' && x <= 'Z') + +let rec many (p: 'a parser): 'a list parser = + ( + let* x = p in + let* xs = many p in + x::xs |> return + ) +++ return [] + +let many1 (p: 'a parser): 'a list parser = + let* x = p in + let* xs = many p in + x::xs |> return + +let rec manyTill (p: 'a parser) (q: 'b parser): 'a list parser = + ( + let* x = p in + let* xs = manyTill p q in + x::xs |> return + ) <++ (q >>= fun _ -> return []) + +let eof : unit parser = + { run = fun inp -> + if inp.content = "" + then Ok [(), inp] + else Error[{content = "expected EOF"; line = inp.line; offset = inp.offset+1}] + }