module Jsonm:sig
..end
Jsonm
is a non-blocking streaming codec to
decode and encode the
JSON data format. It can
process JSON text without blocking on IO and without a complete
in-memory representation of the data.
The uncut codec also processes whitespace and (non-standard) JSON with JavaScript comments.
Consult the data model, limitations and examples of use.
v1.0.1-1-g57d84fe - homepage
typelexeme =
[ `Ae
| `As
| `Bool of bool
| `Float of float
| `Name of string
| `Null
| `Oe
| `Os
| `String of string ]
`As
and `Ae
start and end arrays and `Os
and `Oe
start
and end objects. `Name
is for the member names of objects.
A well-formed sequence of lexemes belongs to the language of
the json
grammar:
json = value
object = `Os *member `Oe
member = (`Name s) value
array = `As *value `Ae
value = `Null / `Bool b / `Float f / `String s / object / array
A decoder returns only well-formed sequences of
lexemes or `Error
s are returned. The
UTF-8,
UTF-16, UTF-16LE and
UTF-16BE encoding schemes are supported. The strings of decoded
`Name
and `String
lexemes are however always UTF-8 encoded. In
these strings, characters originally escaped in the input are in
their unescaped representation.
An encoder accepts only well-formed sequences
of lexemes or Invalid_argument
is raised. Only the UTF-8
encoding scheme is supported. The strings of encoded `Name
and
`String
lexemes are assumed to be immutable and must be UTF-8
encoded, this is not checked by the module. In these strings,
the delimiter characters U+0022
and U+005C
('"'
, '\'
)
aswell as the control characters U+0000-U+001F
are automatically
escaped by the encoders, as mandated by the standard.
val pp_lexeme : Format.formatter -> [< lexeme ] -> unit
pp_lexeme ppf l
prints a unspecified non-JSON representation of l
on ppf
.typeerror =
[ `Expected of
[ `Aval of bool
| `Comment
| `Eoi
| `Json
| `Name
| `Name_sep
| `Omem of bool
| `Value ]
| `Illegal_BOM
| `Illegal_bytes of string
| `Illegal_escape of
[ `Lone_hi_surrogate of int
| `Lone_lo_surrogate of int
| `Not_esc_uchar of Uchar.t
| `Not_hex_uchar of Uchar.t
| `Not_lo_surrogate of int ]
| `Illegal_literal of string
| `Illegal_number of string
| `Illegal_string_uchar of Uchar.t
| `Unclosed of [ `As | `Comment | `Os | `String ] ]
val pp_error : Format.formatter -> [< error ] -> unit
pp_error e
prints an unspecified UTF-8 representation of e
on ppf
.typeencoding =
[ `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 ]
typesrc =
[ `Channel of Pervasives.in_channel | `Manual | `String of string ]
`Manual
source the client
must provide input with Jsonm.Manual.src
.type
decoder
val decoder : ?encoding:[< encoding ] -> [< src ] -> decoder
decoder encoding src
is a JSON decoder that inputs from src
.
encoding
specifies the character encoding of the data. If unspecified
the encoding is guessed as
suggested by
the old RFC4627 standard.val decode : decoder ->
[> `Await | `End | `Error of error | `Lexeme of lexeme ]
decode d
is:
`Await
if d
has a `Manual
source and awaits for more input.
The client must use Jsonm.Manual.src
to provide it.`Lexeme l
if a lexeme l
was decoded.`End
if the end of input was reached.`Error e
if a decoding error occured. If the client is interested
in a best-effort decoding it can still continue to decode
after an error (see Error recovery) although the resulting sequence
of `Lexeme
s is undefined and may not be well-formed.
The Jsonm.Uncut.pp_decode
function can be used to inspect decode results.
Note. Repeated invocation always eventually returns `End
, even
in case of errors.
val decoded_range : decoder -> (int * int) * (int * int)
decoded_range d
is the range of characters spanning the last
`Lexeme
or `Error
(or `White
or `Comment
for an
Jsonm.Uncut.decode
) decoded by d
. A pair of line and column numbers
respectively one and zero based.val decoder_encoding : decoder -> encoding
decoder_encoding d
is d
's encoding.
Warning. If the decoder guesses the encoding, rely on this
value only after the first `Lexeme
was decoded.
val decoder_src : decoder -> src
decoder_src d
is d
's input source.typedst =
[ `Buffer of Buffer.t | `Channel of Pervasives.out_channel | `Manual ]
`Manual
destination the
client must provide output storage with Jsonm.Manual.dst
.type
encoder
val encoder : ?minify:bool -> [< dst ] -> encoder
encoder minify dst
is an encoder that outputs to dst
. If
minify
is true
(default) the output is made as compact as
possible, otherwise the output is indented. If you want better
control on whitespace use minify = true
and Jsonm.Uncut.encode
.val encode : encoder ->
[< `Await | `End | `Lexeme of lexeme ] -> [ `Ok | `Partial ]
encode e v
is:
`Partial
iff e
has a `Manual
destination and needs more
output storage. The client must use Jsonm.Manual.dst
to provide
a new buffer and then call Jsonm.encode
with `Await
until `Ok
is returned.`Ok
when the encoder is ready to encode a new `Lexeme
or `End
.`Manual
destinations, encoding `End
always returns `Partial
,
the client should as usual use Jsonm.Manual.dst
and continue with `Await
until `Ok
is returned at which point Jsonm.Manual.dst_rem
e
is guaranteed
to be the size of the last provided buffer (i.e. nothing was written).
Raises. Invalid_argument
if a non well-formed
sequence of lexemes is encoded or if `Lexeme
or `End
is
encoded after a `Partial
encode.
val encoder_dst : encoder -> dst
encoder_dst e
is e
's output destination.val encoder_minify : encoder -> bool
encoder_minify e
is true
if e
's output is minified.module Manual:sig
..end
module Uncut:sig
..end
Decoders parse valid JSON with the following limitations:
float
values.
This means that it can only represent integers exactly
in the in the interval [-253;253]. This is equivalent
to the contraints JavaScript has.Pervasives.float_of_string
, is
used. In particular this parses number with leading zeros, which are
specifically prohibited by the standard.`String
, `Name
, `White
and `Comment
are limited by Sys.max_string_length
. There is no built-in
protection against the fact that the internal OCaml Buffer.t
value may raise Failure
on Jsonm.decode
. This should
however only be a problem on 32-bits platforms if your
strings are greater than 16Mo.
Position tracking assumes that each decoded Unicode scalar value
has a column width of 1. The same assumption may not be made by
the display program (e.g. for emacs
' compilation mode you need
to set compilation-error-screen-columns
to nil
).
The newlines LF (U+000A
), CR (U+000D
), and CRLF are all normalized
to LF internally. This may have an impact in some corner `Error
cases. For example the invalid escape sequence <U+005C,U+000D>
in
a string will be reported as being `Illegal_escape (`Not_esc_uchar
0x000A)
.
Encoders produce valid JSON provided the client ensures that the following holds.
Jsonm
.`Float
lexemes must not be, Pervasives.nan
,
Pervasives.infinity
or Pervasives.neg_infinity
. They
are encoded with the format string "%.16g"
, this allows
to roundtrip all the integers that can be precisely represented
in OCaml float
values, i.e. the integers in the interval
[-253;253]. This is equivalent to the constraints
JavaScript has.`White
must be made
of JSON whitespace and `Comment
must never be encoded.After a decoding error, if best-effort decoding is performed. The following happens before continuing:
`Illegal_BOM
, the initial
BOM is skipped.`Illegal_bytes
, `Illegal_escape
, `Illegal_string_uchar
, a
Unicode
replacement
character (U+FFFD
) is substituted to the illegal sequence.`Illegal_literal
, `Illegal_number
the corresponding
`Lexeme
is skipped.`Expected r
, input is discarded until a synchronyzing lexeme
that depends on r
is found.`Unclosed
, the end of input is reached, further decodes will be
`End
The result of trip src dst
has the JSON from src
written on dst
.
let trip ?encoding ?minify
(src : [`Channel of in_channel | `String of string])
(dst : [`Channel of out_channel | `Buffer of Buffer.t])
=
let rec loop d e = match Jsonm.decode d with
| `Lexeme _ as v -> ignore (Jsonm.encode e v); loop d e
| `End -> ignore (Jsonm.encode e `End); `Ok
| `Error err -> `Error (Jsonm.decoded_range d, err)
| `Await -> assert false
in
let d = Jsonm.decoder ?encoding src in
let e = Jsonm.encoder ?minify dst in
loop d e
Using the `Manual
interface, trip_fd
does the same but between Unix
file descriptors.
let trip_fd ?encoding ?minify
(fdi : Unix.file_descr)
(fdo : Unix.file_descr)
=
let rec encode fd s e v = match Jsonm.encode e v with `Ok -> ()
| `Partial ->
let rec unix_write fd s j l =
let rec write fd s j l = try Unix.single_write fd s j l with
| Unix.Unix_error (Unix.EINTR, _, _) -> write fd s j l
in
let wc = write fd s j l in
if wc < l then unix_write fd s (j + wc) (l - wc) else ()
in
unix_write fd s 0 (Bytes.length s - Jsonm.Manual.dst_rem e);
Jsonm.Manual.dst e s 0 (String.length s);
encode fd s e `Await
in
let rec loop fdi fdo ds es d e = match Jsonm.decode d with
| `Lexeme _ as v -> encode fdo es e v; loop fdi fdo ds es d e
| `End -> encode fdo es e `End; `Ok
| `Error err -> `Error (Jsonm.decoded_range d, err)
| `Await ->
let rec unix_read fd s j l = try Unix.read fd s j l with
| Unix.Unix_error (Unix.EINTR, _, _) -> unix_read fd s j l
in
let rc = unix_read fdi ds 0 (Bytes.length ds) in
Jsonm.Manual.src d ds 0 rc; loop fdi fdo ds es d e
in
let ds = Bytes.create 65536 (* UNIX_BUFFER_SIZE in 4.0.0 *) in
let es = Bytes.create 65536 (* UNIX_BUFFER_SIZE in 4.0.0 *) in
let d = Jsonm.decoder ?encoding `Manual in
let e = Jsonm.encoder ?minify `Manual in
Jsonm.Manual.dst e es 0 (Bytes.length es);
loop fdi fdo ds es d e
The result of memsel names src
is the list of string values of
members of src
that have their name in names
. In this example,
decoding errors are silently ignored.
let memsel ?encoding names
(src : [`Channel of in_channel | `String of string])
=
let rec loop acc names d = match Jsonm.decode d with
| `Lexeme (`Name n) when List.mem n names ->
begin match Jsonm.decode d with
| `Lexeme (`String s) -> loop (s :: acc) names d
| _ -> loop acc names d
end
| `Lexeme _ | `Error _ -> loop acc names d
| `End -> List.rev acc
| `Await -> assert false
in
loop [] names (Jsonm.decoder ?encoding src)
A generic OCaml representation of JSON text is the following one.
type json =
[ `Null | `Bool of bool | `Float of float| `String of string
| `A of json list | `O of (string * json) list ]
The result of json_of_src src
is the JSON text from src
in this
representation. The function is tail recursive.
exception Escape of ((int * int) * (int * int)) * Jsonm.error
let json_of_src ?encoding
(src : [`Channel of in_channel | `String of string])
=
let dec d = match Jsonm.decode d with
| `Lexeme l -> l
| `Error e -> raise (Escape (Jsonm.decoded_range d, e))
| `End | `Await -> assert false
in
let rec value v k d = match v with
| `Os -> obj [] k d | `As -> arr [] k d
| `Null | `Bool _ | `String _ | `Float _ as v -> k v d
| _ -> assert false
and arr vs k d = match dec d with
| `Ae -> k (`A (List.rev vs)) d
| v -> value v (fun v -> arr (v :: vs) k) d
and obj ms k d = match dec d with
| `Oe -> k (`O (List.rev ms)) d
| `Name n -> value (dec d) (fun v -> obj ((n, v) :: ms) k) d
| _ -> assert false
in
let d = Jsonm.decoder ?encoding src in
try `JSON (value (dec d) (fun v _ -> v) d) with
| Escape (r, e) -> `Error (r, e)
The result of json_to_dst dst json
has the JSON text json
written
on dst
. The function is tail recursive.
let json_to_dst ~minify
(dst : [`Channel of out_channel | `Buffer of Buffer.t ])
(json : json)
=
let enc e l = ignore (Jsonm.encode e (`Lexeme l)) in
let rec value v k e = match v with
| `A vs -> arr vs k e
| `O ms -> obj ms k e
| `Null | `Bool _ | `Float _ | `String _ as v -> enc e v; k e
and arr vs k e = enc e `As; arr_vs vs k e
and arr_vs vs k e = match vs with
| v :: vs' -> value v (arr_vs vs' k) e
| [] -> enc e `Ae; k e
and obj ms k e = enc e `Os; obj_ms ms k e
and obj_ms ms k e = match ms with
| (n, v) :: ms -> enc e (`Name n); value v (obj_ms ms k) e
| [] -> enc e `Oe; k e
in
let e = Jsonm.encoder ~minify dst in
let finish e = ignore (Jsonm.encode e `End) in
value json finish e