open ExtLib
open Textile
open Textile_parsercomb
let (>>) f g = g f
let ($) a b = fun x -> a (b x)
let default_options = ([], None, (0, 0))
let default_tableoptions = (default_options, None)
let default_celloptions = (Data, default_tableoptions, (None, None))
let empty_line = []
let num_of_char c = (int_of_char c) - 48
let rec njunk stream n =
if n > 0
then begin Stream.junk stream; njunk stream (n-1) end
let rec peekn stream n =
let l = Stream.npeek (n+1) stream in
try Some (List.nth l n)
with Failure _ | ExtList.List.Invalid_index _ ->
None
let p_string_not_empty = function "" -> fail | s -> return s
let whitespace = function ' ' | '\t' -> true | _ -> false
let punct = function
| '!' | '"' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | ':' | ';' | '<' | '=' | '>' | '?' -> true | _ -> false
let p_whitespace = p_pred whitespace
let p_not_whitespace = p_pred (fun c -> not (whitespace c))
let p_punct = p_pred punct
let check_prev p (s, pos) =
let prev_pos = pos - 1 in
(p >>= fun r -> fun _ -> Parsed (r, (s, pos))) (s, prev_pos)
let check_current p (s, pos) =
(p >>= fun r -> fun _ -> Parsed (r, (s, pos))) (s, pos)
let class_and_ids =
p_char '(' >>>
p_until (p_pred ((<>) '(')) (p_char ')') >>=
fun (s, _) ->
match String.nsplit s "#" with
| [] -> fail
| "" :: ids ->
return (List.map (fun x -> Id x) ids)
| classname :: ids ->
return ((Class classname) :: (List.map (fun x -> Id x) ids))
let style = p_char '{' >>> p_str_until (p_char '}') >>= p_string_not_empty
let language = p_char '[' >>> p_str_until (p_char ']') >>= p_string_not_empty
let attr_decl =
class_and_ids |||
(style >>= fun s -> return [Style s]) |||
(language >>= fun s -> return [Language s])
let try_attrs f =
(p_seq attr_decl >>= (return $ List.flatten) >>= f) |||
(f [])
let img_float =
(p_char '<' >>> return Float_left) |||
(p_char '>' >>> return Float_right)
let img_opts =
let add_opt (attrs, float_opt) = function
| `Attr a -> (a @ attrs, float_opt)
| `Img_float f -> (attrs, Some f) in
p_manyf
((attr_decl >>= fun a -> return (`Attr a)) ||| (img_float >>= fun f -> return (`Img_float f)))
add_opt
([], None)
let begin_of_phrase begin_of_line follow =
(p_pos begin_of_line >>> follow) |||
(
((p_whitespace) |||
(p_pred (function '(' | '\'' | '"' -> true | _ -> false))) >>> follow
)
let end_of_phrase =
dont_jump
(p_end |||
(p_whitespace >>> return ()) |||
(p_many p_punct >>> (p_end ||| (p_whitespace >>> return ()))))
let collect_phrases_with phrase until (s, begin_of_line) =
let rec loop acc beg (s, pos) =
let go_on () = loop acc beg (s, succ pos) in
match phrase (s, pos) with
| Parsed ((phrase_r, last_cdata_pos), (s, next_p)) ->
let acc_values =
if last_cdata_pos <= beg
then
[phrase_r]
else
let prev_cdata =
CData (String.slice ~first:beg ~last:last_cdata_pos s) in
[prev_cdata; phrase_r] in
loop (List.rev_append acc_values acc) next_p (s, next_p)
| Failed ->
(match until (s, pos) with
| Parsed (until_r, (s, new_pos)) ->
if pos = begin_of_line
then go_on ()
else
let acc =
if beg = pos
then acc
else
let last_cdata =
CData (String.slice ~first:beg ~last:pos s) in
last_cdata::acc in
Parsed ((List.rev acc, until_r), (s, new_pos))
| Failed ->
if pos >= String.length s
then
Failed
else go_on ()) in
loop [] begin_of_line (s, begin_of_line)
let phrase_surrounding end_of_phrase beg_of_line phrase =
(
begin_of_phrase beg_of_line (
current_pos >>= fun last_cdata_pos ->
phrase end_of_phrase >>= fun r ->
return (r, last_cdata_pos))
)
|||
(
p_char '[' >>>
current_pos >>= fun _pos ->
phrase (p_char ']' >>> return ()) >>= fun r ->
return (r, (_pos-1))
)
let reference beg_of_line =
(p_pos beg_of_line |||
(p_not_whitespace >>> current_pos)) >>= fun bracket ->
p_unsign_int >>= fun i ->
p_char ']' >>> end_of_phrase >>>
return ((Reference i), bracket-1)
let collect ~what ~ended_with ~from ~until =
collect_phrases_with
((phrase_surrounding
ended_with
from
what) ||| reference from)
until
let rec phrases_except_hyperlinks end_of_phrase =
let opened_modifier m =
m >>= fun r -> check_current p_not_whitespace >>> return r in
let closed_modifier m =
check_prev p_not_whitespace >>> m >>> end_of_phrase in
let sp modifier =
opened_modifier modifier >>= fun (f, cm) ->
try_attrs (fun a ->
current_pos >>= fun from ->
let until = closed_modifier cm in
collect
~what:all_phrases
~ended_with:(end_of_phrase ||| (dont_jump until >>> return ()))
~from
~until >>= fun (line, _) ->
return (f (a, line))) in
sp (p_str "__" >>> return ((fun x -> Italic x), p_str "__")) |||
sp (p_str "**" >>> return ((fun x -> Bold x), p_str "**")) |||
sp (p_pred2 (function
| '_' -> Some (((fun x -> Emphasis x), p_char '_'))
| '*' -> Some (((fun x -> Strong x), p_char '*'))
| '-' -> Some (((fun x -> Deleted x), p_char '-'))
| '+' -> Some (((fun x -> Inserted x), p_char '+'))
| '^' -> Some (((fun x -> Superscript x), p_char '^'))
| '~' -> Some (((fun x -> Subscript x), p_char '~'))
| '%' -> Some (((fun x -> Span x), p_char '%'))
| _ -> None)) |||
sp (p_str "??" >>> return ((fun x -> Citation x), p_str "??")) |||
(
opened_modifier (p_char '@') >>>
try_attrs (fun a ->
p_str_until (closed_modifier (p_char '@')) >>= fun s ->
return (Code (a, s)))
) |||
(
opened_modifier (p_str "==") >>>
p_str_until (closed_modifier (p_str "==")) >>= fun s ->
return (Notextile s)
) |||
(
let link_opt =
(p_char ':' >>>
p_until (p_not_whitespace) end_of_phrase >>= fun (url, _) ->
return (Some url)) |||
(end_of_phrase >>> return None) in
let end_with_title =
p_char '(' >>>
p_str_until (p_str ")!") >>= fun title ->
link_opt >>= fun link_opt ->
return (title, link_opt) in
let end_with_no_title =
p_char '!' >>>
link_opt in
p_char '!' >>>
img_opts >>= fun (attrs, float) ->
p_until p_not_whitespace (
(end_with_title >>= fun (title, link_opt) -> return (Some title, link_opt)) |||
(end_with_no_title >>= fun link_opt -> return (None, link_opt))
) >>= fun (src, (title_opt, link_opt)) ->
let r =
let image = Image (attrs, float, src, title_opt) in
match link_opt with
| Some url -> Link (([], [image]), None, url)
| None -> image in
return r
) ||| (
p_until
(p_pred (fun c -> c >= 'A' && c <= 'Z'))
(p_char '(') >>= fun (acr, _) ->
p_string_not_empty acr >>>
p_str_until (p_char ')' >>> end_of_phrase) >>= fun desc ->
return (Acronym (acr, desc))
)
and all_phrases end_of_phrase =
(phrases_except_hyperlinks end_of_phrase) |||
(
let url =
p_char ':' >>>
p_until (p_not_whitespace) end_of_phrase >>= fun (url, _) -> return url in
let end_with_title =
p_char '(' >>>
p_str_until (p_str ")\"") >>= fun title ->
url >>= fun url ->
return (title, url) in
let end_with_no_title =
p_char '"' >>> url in
p_char '"' >>>
check_current p_not_whitespace >>>
try_attrs (fun a ->
current_pos >>= fun from ->
collect
~what:phrases_except_hyperlinks
~ended_with:(end_of_phrase ||| dont_jump ((end_with_title >>> return ()) ||| (end_with_no_title >>> return ())))
~from
~until:(
(end_with_title >>= fun (title, url) -> return (Some title, url)) |||
(end_with_no_title >>= fun url -> return (None, url))
) >>= fun (line, (title_opt, url)) ->
let r = Link ((a, line), title_opt, url) in
return r)
)
let line (s, pos) =
(collect
~what:all_phrases
~ended_with:end_of_phrase
~from:pos
~until:p_end >>= fun (line, _) ->
return line) (s, pos)
let line_of_string s =
match line (s, 0) with
| Parsed (r, _) -> r
| Failed -> empty_line
let align =
(p_str "<>" >>> return Justify) |||
(p_char '<' >>> return Left) |||
(p_char '=' >>> return Center) |||
(p_char '>' >>> return Right)
let option =
(attr_decl >>= fun x -> return (`Attr x)) |||
(align >>= fun x -> return (`Align x)) |||
(p_char '(' >>> return `Left_padding) |||
(p_char ')' >>> return `Right_padding)
let add_option (attrs, talign, (lp, rp)) = function
| `Attr a -> (a @ attrs, talign, (lp, rp))
| `Align a -> (attrs, Some a, (lp, rp))
| `Left_padding -> (attrs, talign, (succ lp, rp))
| `Right_padding -> (attrs, talign, (lp, succ rp))
let options =
p_manyf option add_option default_options
let valign =
(p_char '^' >>> return Top ) |||
(p_char '-' >>> return Middle) |||
(p_char '~' >>> return Bottom)
let tableoption =
(option >>= fun x -> return (`Option x)) |||
(valign >>= fun x -> return (`Valign x))
let add_tableoption (opts, valign) = function
| `Valign x -> (opts, Some x)
| `Option x -> (add_option opts x, valign)
let tableoptions =
p_manyf tableoption add_tableoption default_tableoptions
let tableoptions_plus =
p_plusf tableoption add_tableoption default_tableoptions
let block_type =
(p_char 'h' >>>
p_pred (fun c -> c >= '1' && c <= '6') >>= fun c ->
return (`Textblock (`Header (num_of_char c)))) |||
(p_str "bq" >>> return (`Textblock `Blockquote)) |||
(p_str "fn" >>> p_unsign_int >>= fun i ->
return (`Textblock (`Footnote i))) |||
(p_str "bc" >>> return (`Textblock `Blockcode)) |||
(p_str "pre" >>> return (`Textblock `Pre)) |||
(p_str "notextile" >>> return (`Textblock `Blocknott)) |||
(p_char 'p' >>> return (`Textblock `Paragraph)) |||
(p_str "table" >>> return `Table)
let block_modifier =
p_many p_whitespace >>>
block_type >>= function
| `Table ->
tableoptions >>= fun topts ->
p_opt () (p_char '.' >>> return ()) >>>
p_many p_whitespace >>>
p_end >>>
return (`Table topts)
| `Textblock bm ->
options >>= fun opts ->
p_char '.' >>>
((p_char '.' >>> return true) ||| (return false)) >>= fun extended ->
p_char ' ' >>>
return (`Textblock (bm, opts, extended))
let of_stream stream =
let get_content parse_first parse empty extended (s, pos) =
let rec loop acc =
try
let str = Stream.next stream in
(match parse (str, 0) with
| Parsed (r, _) -> loop (r::acc)
| Failed when extended ->
(match Stream.peek stream with
| Some next_str ->
(match (block_modifier (next_str, 0)) with
| Parsed _ -> List.rev acc
| Failed -> (loop (empty::acc)))
| None -> List.rev acc)
| Failed -> List.rev acc)
with Stream.Failure -> List.rev acc in
match parse_first (s, pos) with
| Parsed (first, _) -> Parsed (loop [first], (s, pos))
| Failed -> Failed in
let get_lines extended (s, pos) =
let parse_line = line in
let parse_first_line = line in
get_content parse_first_line parse_line [] extended (s, pos) in
let get_strings extended (s, pos) =
let parse_string (s, pos) =
match s with
| "" -> Failed
| _ -> Parsed (s, (s, (String.length s))) in
let parse_first_string (s, first) =
let s = String.slice ~first s in
parse_string (s, first) in
get_content parse_first_string parse_string "" extended (s, pos) in
let celloptions =
let option =
(p_char '_' >>> return `Head) |||
(tableoption >>= fun x -> return (`Topt x)) |||
(p_char '\\' >>> p_int >>= fun x -> return (`Colspan x)) |||
(p_char '/' >>> p_int >>= fun x -> return (`Rowspan x)) in
let add (celltype, topts, ((colspan, rowspan) as cellspan)) = function
| `Head -> (Head, topts, cellspan)
| `Topt x -> (celltype, add_tableoption topts x, cellspan)
| `Colspan x -> (celltype, topts, (Some x, rowspan))
| `Rowspan x -> (celltype, topts, (colspan, Some x)) in
p_plusf option add default_celloptions in
let element c prev_level =
let bullet = p_many p_whitespace >>> c in
bullet >>>
p_upto_timesf prev_level
(p_many p_whitespace >>> c)
(fun l _ -> succ l) 1 >>= fun lvl ->
p_plus p_whitespace >>>
line >>= fun line ->
return (lvl, line) in
let get_element c prev_level x =
match Stream.peek stream with
| Some s ->
(element c prev_level >>= fun e ->
return (Stream.junk stream; e)) (s, 0)
| None -> Failed in
let get_elements c =
element (p_char c) 0 >>= fun ((f_e_lvl, _) as first_element) ->
p_manyf_arg
(fun (prev_lvl, elements) -> get_element (p_char c) prev_lvl)
(fun (_, acc) (lvl, line) -> lvl, (lvl, line)::acc)
(f_e_lvl, [first_element]) >>= fun (_, rev_elements) ->
return (List.rev (rev_elements)) in
let row peeks =
let peeks = ref peeks in
let get_cell =
let continue_cell x =
let rec loop acc cell_peeks x =
match peekn stream (!peeks + cell_peeks) with
| None -> Failed
| Some s ->
(collect
~what:all_phrases
~ended_with:(end_of_phrase |||
dont_jump (
p_many p_punct >>>
p_char '|' >>> return ()))
~from:0
~until:(
(p_char '|' >>> return true) |||
(p_end >>> return false)
) >>= function
| line, true ->
return (peeks := !peeks + (succ cell_peeks); List.rev (line::acc))
| line, false ->
loop (line::acc) (succ cell_peeks)
) (s, 0) in
loop [] 0 x in
p_opt default_celloptions (
celloptions >>= fun copts ->
p_str ". " >>>
return copts) >>= fun copts ->
(
(p_char '|' >>> return (empty_line, true)) |||
(current_pos >>= fun beg_of_line ->
collect
~what:all_phrases
~ended_with:(end_of_phrase |||
dont_jump (p_many p_punct >>> p_char '|' >>> return ()))
~from:beg_of_line
~until:(
(p_char '|' >>> return true) |||
(p_end >>> return false)
))
) >>= function
| first_line, true -> return (copts, [first_line])
| first_line, false -> continue_cell >>= fun lines ->
return (copts, first_line::lines) in
p_many p_whitespace >>>
p_opt default_tableoptions (
tableoptions_plus >>= fun topts ->
p_char '.' >>>
p_plus p_whitespace >>>
return topts) >>= fun topts ->
p_char '|' >>>
get_cell >>= fun first_cell ->
p_manyf_ends_with
get_cell
(fun acc x -> x :: acc)
[first_cell]
p_end >>= fun rev_cells ->
return (njunk stream !peeks; (topts, List.rev rev_cells)) in
let get_extra_rows =
p_seq
(fun _ ->
match Stream.peek stream with
| None -> Failed
| Some s -> row 1 (s, 0)) in
let get_rows =
row 0 >>= fun first_row ->
get_extra_rows >>= fun extra_rows ->
return (first_row::extra_rows) in
let get_block s =
(
(block_modifier >>= function
| `Textblock (bm, opts, extended) ->
let lines f = get_lines extended >>= fun r -> return (f r) in
let strings f = get_strings extended >>= fun r -> return (f r) in
(match bm with
| `Header lvl -> lines (fun x -> Header (lvl, (opts, x)))
| `Blockquote -> lines (fun x -> Blockquote (opts, x))
| `Footnote n -> lines (fun x -> Footnote (n, (opts, x)))
| `Blockcode -> strings (fun x -> Blockcode (opts, x))
| `Pre -> strings (fun x -> Pre (opts, x))
| `Blocknott -> strings (fun x -> Blocknott (opts, x))
| `Paragraph -> lines (fun x -> Paragraph (opts, x)))
| `Table topts ->
(get_extra_rows >>= function
| [] -> fail
| rows -> return (Table (topts, rows)))
) ||| (
get_rows >>= fun rows ->
return (Table (default_tableoptions, rows))
) ||| (
get_elements '*' >>= fun el -> return (Bulllist el)
) ||| (
get_elements '#' >>= fun el -> return (Numlist el)
) ||| (
get_lines false >>= fun lines ->
return (Paragraph (default_options, lines))
)
) (s, 0) >> function
| Parsed (r, _) -> r
| Failed -> assert false in
let rec next_block () =
try
match Stream.next stream with
| "" -> next_block ()
| fstr -> Some (get_block fstr)
with Stream.Failure -> None in
Stream.from (fun _ -> next_block ())