Cmarkit_base
Low-level internal tools.
module Dict : sig ... end
Heterogeneous dictionaries.
module Textloc : sig ... end
Text locations.
module Meta : sig ... end
Node metadata.
The type for line spans. A line position, the first and last bytes of the span. If the former is greater than the latter, the span is empty.
type line_start = Textloc.byte_pos
The type for denoting a line start inside a CommonMark container (i.e. may not match the text line's first character).
type rev_spans = (line_start * line_span) list
A reversed list of line spans, tupled with the byte position on where the line starts (inside a CommonMark container). The line_start
is the start of line in the container, the line_span
has the actual data. The characters in the [line_start
;line_span.first - 1
] are blanks.
type 'a next_line = 'a -> ('a * line_span) option
The type for getting a new line of input. This is used by certain multi-line matchers (e.g. raw HTML).
module Ascii : sig ... end
US-ASCII matching.
prev_uchar s ~first ~before
is the first Unicode character before byte position before
in the range [first
;before-1
]. If before <= first
, U+0020 is returned (a Unicode whitespace character).
next_uchar s ~last ~after
is the next Unicode character after byte position after
in the range [after+1
;last
]. If after
>= last
, U+0020 is returned (a Unicode whitespace character).
module Text : sig ... end
Textual content.
type byte_pos = Textloc.byte_pos
The type for positions.
type first = Textloc.byte_pos
The type for the first first byte position of a parsed construct.
type last = Textloc.byte_pos
The type for the last byte position of a parsed construct.
type next = Textloc.byte_pos
The type for a byte position after a parsed construct. The byte position may be invalid (end of input range) or on the newline.
run_of ~char s ~last ~start
is the last byte position of a consecutive run of char
in the range [start
;last
] or start - 1
if start
is not char
.
first_non_blank s ~last ~start
is the first byte position in the range [start
;last
] that is not blank and last + 1
if there is none.
first_non_blank_in_span s span
is first_non_blank s ~last:span.last ~start:span.first
.
last_non_blank s ~first ~start
is the last position in the range [first
;start
] that is non blank and first - 1
if there is none.
rev_drop_spaces s ~first ~start
is the last position in the range [first
;start
] that is not U+0020 and first - 1
if there is none.
val first_non_blank_over_nl :
next_line:'a next_line ->
string ->
'a ->
line:line_span ->
start:int ->
[ `None | `This_line of byte_pos | `Next_line of 'a * line_span * byte_pos ]
first_non_blank_over_nl ~next_line s ~line ~start
is the first byte position starting with start
that is not blank in line
or on the next line as determined by next_line
. Returns `None
if there is no such position.
first_non_escaped_char c s ~last ~start
is the first byte position in the range [start
;last
] that has c
unescaped and last + 1
if there is none.
autolink_email s ~last ~start
matches an email autolink starting at start
in the range [start
;last
] (assumed on the same line).
autolink_uri s ~last ~start
matches an URI autolink starting at start
in the range [start
;last
] (assumed on the same line).
val raw_html :
next_line:'a next_line ->
string ->
'a ->
line:line_span ->
start:byte_pos ->
('a * line_span * rev_spans * last) option
raw_html ~next_line s lines ~line ~start
matches raw HTML on line line
starting at start
. next_line
is used to get new lines on lines
. Returns Some (lines, last_line, spans,
last_byte)
with lines
the lines after consuming the raw HTML, last_line
the line where it stops spans
the byte ranges of s
that make up the raw HTML in reverse order and last_byte
the last byte included in it (guaranteed to be on last_line
).
link_destination s ~last ~start
matches a link destination starting at start
in the range [start
;last
] (assumed on the same line). This is Some (delimited, first, last)
with the data in [first
;last
] the destination data. delimited
is true
if first-1
is '<' and last + 1
is '>'.
val link_title :
next_line:'a next_line ->
string ->
'a ->
line:line_span ->
start:byte_pos ->
('a * line_span * rev_spans * last) option
link_title ~next_line s lines ~line ~last
is a link title on line line
starting at start
. Returns Some (lines, last_line, spans, last)
with lines
the lines after consuming the title, last_line
the line where it stops, spans
the byte ranges of s
that make up the title in reverse order, last
is on the closing delimiter and guaranteed to be on last_line
.
val link_label :
Stdlib.Buffer.t ->
next_line:'a next_line ->
string ->
'a ->
line:line_span ->
start:byte_pos ->
('a * line_span * rev_spans * last * string) option
link_label buf ~next_line s lines ~line ~start
matches a link label on line line
starting at start
. The byte ranges have the label's content, the string is the normalized label. buf
is used as scratch space.
Unless otherwise noted start
is always after leading blanks.
The type for HTML block end conditions.
type line_type =
| Atx_heading_line of heading_level * byte_pos * first * last
| Blank_line
| Block_quote_line
| Fenced_code_block_line of first * last * (first * last) option
| Html_block_line of html_block_end_cond
| Indented_code_block_line
| List_marker_line of [ `Ordered of int * char | `Unordered of char ] * last
| Paragraph_line
| Setext_underline_line of heading_level * last
| Thematic_break_line of last
| Ext_table_row of last
| Ext_footnote_label of rev_spans * last * string
| Nomatch
thematic_break s ~last ~start
matches a thematic break in the range in the range [start
;last
]. The returned position is the last non-blank.
atx_heading s ~first ~last
is an ATX heading in the range [start
;last
].
setext_heading_underline s ~last ~start
is a setext heading underline in the range [start
;last
]. The returned position is the last underline char.
fenced_code_block_start s ~last ~start
is the start of a fenced code block line in the range [start
;last
]. The first span is the fence and the second one is the info string (if any).
val fenced_code_block_continue :
fence:(char * int) ->
string ->
last:byte_pos ->
start:byte_pos ->
[ `Close of first * last | `Code ]
fenced_code_block_continue ~fence s ~last ~start
indicates whether the fence code continues or closes in the the range [start
;last
] given the opening open
which indicates the indent, fence char and number of fence chars.
html_block_start s ~last ~start
matches the start of an HTML block starting at start
in the range [start
;last
] and on success returns the condition to end it.
val html_block_end :
end_cond:html_block_end_cond ->
string ->
last:byte_pos ->
start:byte_pos ->
bool
html_block ~end_code s ~last ~start
is true
if the HTML block end with end_code
in the the range [start
;last
]
ext_table s ~last ~start
matches a table row in the range [start
;last
]. The returned position is the rightmost |
.
val ext_footnote_label :
Stdlib.Buffer.t ->
string ->
line_pos:Textloc.line_pos ->
last:byte_pos ->
start:byte_pos ->
line_type
ext_footnote_label s ~last ~start
matches a footnote label the range [start
;last
]. The returned position is the rightmost :
. This remains on the same line.
could_be_link_reference_definition s ~last ~start
is true
if in the range [start
;last
] could hold a link reference definition.
list_marker s ~last ~start
is a list marker in the range [start
;last
]. This checks there's at least one space following unless the item is empty.