Module Cmarkit_base

Low-level internal tools.

module Dict : sig ... end

Heterogeneous dictionaries.

module Textloc : sig ... end

Text locations.

module Meta : sig ... end

Node metadata.

type line_span = {
  1. line_pos : Textloc.line_pos;
  2. first : Textloc.byte_pos;
  3. last : Textloc.byte_pos;
}

The type for line spans. A line position, the first and last bytes of the span. If the former is greater than the latter, the span is empty.

type line_start = Textloc.byte_pos

The type for denoting a line start inside a CommonMark container (i.e. may not match the text line's first character).

type rev_spans = (line_start * line_span) list

A reversed list of line spans, tupled with the byte position on where the line starts (inside a CommonMark container). The line_start is the start of line in the container, the line_span has the actual data. The characters in the [line_start;line_span.first - 1] are blanks.

type 'a next_line = 'a -> ('a * line_span) option

The type for getting a new line of input. This is used by certain multi-line matchers (e.g. raw HTML).

US-ASCII matching

module Ascii : sig ... end

US-ASCII matching.

Unicode matching

val prev_uchar : string -> first:int -> before:int -> Stdlib.Uchar.t

prev_uchar s ~first ~before is the first Unicode character before byte position before in the range [first;before-1]. If before <= first, U+0020 is returned (a Unicode whitespace character).

val next_uchar : string -> last:int -> after:int -> Stdlib.Uchar.t

next_uchar s ~last ~after is the next Unicode character after byte position after in the range [after+1;last]. If after >= last, U+0020 is returned (a Unicode whitespace character).

Textual content

module Text : sig ... end

Textual content.

Result types

type indent = int

The type for indentation magnitude.

type byte_pos = Textloc.byte_pos

The type for positions.

type first = Textloc.byte_pos

The type for the first first byte position of a parsed construct.

type last = Textloc.byte_pos

The type for the last byte position of a parsed construct.

type next = Textloc.byte_pos

The type for a byte position after a parsed construct. The byte position may be invalid (end of input range) or on the newline.

type heading_level = int

The type for heading levels.

Newlines, runs, blanks and indents

val run_of : char:char -> string -> last:byte_pos -> start:byte_pos -> last

run_of ~char s ~last ~start is the last byte position of a consecutive run of char in the range [start;last] or start - 1 if start is not char.

val first_non_blank : string -> last:byte_pos -> start:byte_pos -> byte_pos

first_non_blank s ~last ~start is the first byte position in the range [start;last] that is not blank and last + 1 if there is none.

val first_non_blank_in_span : string -> line_span -> byte_pos

first_non_blank_in_span s span is first_non_blank s ~last:span.last ~start:span.first.

val last_non_blank : string -> first:byte_pos -> start:byte_pos -> byte_pos

last_non_blank s ~first ~start is the last position in the range [first;start] that is non blank and first - 1 if there is none.

val rev_drop_spaces : string -> first:byte_pos -> start:byte_pos -> byte_pos

rev_drop_spaces s ~first ~start is the last position in the range [first;start] that is not U+0020 and first - 1 if there is none.

val first_non_blank_over_nl : next_line:'a next_line -> string -> 'a -> line:line_span -> start:int -> [ `None | `This_line of byte_pos | `Next_line of 'a * line_span * byte_pos ]

first_non_blank_over_nl ~next_line s ~line ~start is the first byte position starting with start that is not blank in line or on the next line as determined by next_line. Returns `None if there is no such position.

val first_non_escaped_char : char -> string -> last:byte_pos -> start:byte_pos -> byte_pos

first_non_escaped_char c s ~last ~start is the first byte position in the range [start;last] that has c unescaped and last + 1 if there is none.

autolink_email s ~last ~start matches an email autolink starting at start in the range [start;last] (assumed on the same line).

autolink_uri s ~last ~start matches an URI autolink starting at start in the range [start;last] (assumed on the same line).

Raw HTML

val raw_html : next_line:'a next_line -> string -> 'a -> line:line_span -> start:byte_pos -> ('a * line_span * rev_spans * last) option

raw_html ~next_line s lines ~line ~start matches raw HTML on line line starting at start. next_line is used to get new lines on lines. Returns Some (lines, last_line, spans, last_byte) with lines the lines after consuming the raw HTML, last_line the line where it stops spans the byte ranges of s that make up the raw HTML in reverse order and last_byte the last byte included in it (guaranteed to be on last_line).

link_destination s ~last ~start matches a link destination starting at start in the range [start;last] (assumed on the same line). This is Some (delimited, first, last) with the data in [first;last] the destination data. delimited is true if first-1 is '<' and last + 1 is '>'.

link_title ~next_line s lines ~line ~last is a link title on line line starting at start. Returns Some (lines, last_line, spans, last) with lines the lines after consuming the title, last_line the line where it stops, spans the byte ranges of s that make up the title in reverse order, last is on the closing delimiter and guaranteed to be on last_line.

link_label buf ~next_line s lines ~line ~start matches a link label on line line starting at start. The byte ranges have the label's content, the string is the normalized label. buf is used as scratch space.

Leaf blocks

Unless otherwise noted start is always after leading blanks.

type html_block_end_cond = [
  1. | `End_str of string
  2. | `End_cond_1
  3. | `End_blank
  4. | `End_blank_7
]

The type for HTML block end conditions.

type line_type =
  1. | Atx_heading_line of heading_level * byte_pos * first * last
  2. | Blank_line
  3. | Block_quote_line
  4. | Fenced_code_block_line of first * last * (first * last) option
  5. | Html_block_line of html_block_end_cond
  6. | Indented_code_block_line
  7. | List_marker_line of [ `Ordered of int * char | `Unordered of char ] * last
  8. | Paragraph_line
  9. | Setext_underline_line of heading_level * last
  10. | Thematic_break_line of last
  11. | Ext_table_row of last
  12. | Ext_footnote_label of rev_spans * last * string
  13. | Nomatch
val thematic_break : string -> last:byte_pos -> start:byte_pos -> line_type

thematic_break s ~last ~start matches a thematic break in the range in the range [start;last]. The returned position is the last non-blank.

val atx_heading : string -> last:byte_pos -> start:byte_pos -> line_type

atx_heading s ~first ~last is an ATX heading in the range [start;last].

val setext_heading_underline : string -> last:byte_pos -> start:byte_pos -> line_type

setext_heading_underline s ~last ~start is a setext heading underline in the range [start;last]. The returned position is the last underline char.

val fenced_code_block_start : string -> last:byte_pos -> start:byte_pos -> line_type

fenced_code_block_start s ~last ~start is the start of a fenced code block line in the range [start;last]. The first span is the fence and the second one is the info string (if any).

val fenced_code_block_continue : fence:(char * int) -> string -> last:byte_pos -> start:byte_pos -> [ `Close of first * last | `Code ]

fenced_code_block_continue ~fence s ~last ~start indicates whether the fence code continues or closes in the the range [start;last] given the opening open which indicates the indent, fence char and number of fence chars.

val html_block_start : string -> last:byte_pos -> start:byte_pos -> line_type

html_block_start s ~last ~start matches the start of an HTML block starting at start in the range [start;last] and on success returns the condition to end it.

val html_block_end : end_cond:html_block_end_cond -> string -> last:byte_pos -> start:byte_pos -> bool

html_block ~end_code s ~last ~start is true if the HTML block end with end_code in the the range [start;last]

val ext_table_row : string -> last:byte_pos -> start:byte_pos -> line_type

ext_table s ~last ~start matches a table row in the range [start;last]. The returned position is the rightmost |.

val ext_footnote_label : Stdlib.Buffer.t -> string -> line_pos:Textloc.line_pos -> last:byte_pos -> start:byte_pos -> line_type

ext_footnote_label s ~last ~start matches a footnote label the range [start;last]. The returned position is the rightmost :. This remains on the same line.

could_be_link_reference_definition s ~last ~start is true if in the range [start;last] could hold a link reference definition.

Container blocks

val list_marker : string -> last:byte_pos -> start:byte_pos -> line_type

list_marker s ~last ~start is a list marker in the range [start;last]. This checks there's at least one space following unless the item is empty.

val ext_task_marker : string -> last:byte_pos -> start:byte_pos -> (Stdlib.Uchar.t * last) option

ext_task_marker s ~last ~start is a list task item marker in the range [start;last].