# 1 "camomileLibrary.mlp"
module ConfigInt = ConfigInt
module DefaultConfig = CamomileDefaultConfig
(** Individual modules *)
module OOChannel = OOChannel
module UChar = UChar
module USet = USet
module UMap = UMap
module UCharTbl = UCharTbl
module UnicodeString = UnicodeString
module UText = UText
module XString = XString
module SubText = SubText
module ULine = ULine
module Locale = Locale
module UTF8 = UTF8
module UTF16 = UTF16
module UCS4 = UCS4
module UPervasives = UPervasives
module URe = URe
module CharEncoding = CharEncoding
module UCharInfo = UCharInfo
module UNF = UNF
module UCol = UCol
module CaseMap = CaseMap
module UReStr = UReStr
module StringPrep = StringPrep
(** All-in-one, configure once at beginning module*)
module type Type = sig
module OOChannel : sig
# 1 "Camomile/public/oOChannel.mli"
# 1 "Camomile/public/oOChannel.mli"
(** Object Oriented Channel *)
(* Copyright (C) 2002, 2003, 2010 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Generic input channel
Have the same interface of Polymorphic input channel of
http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm
the behaviour defined in the recommendation above.
*)
class type ['a] obj_input_channel =
object
method close_in : unit -> unit
method get : unit -> 'a
end
(** Generic output channel
Have the same interface of Polymorphic output channel of
http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm
the behaviour defined in the recommendation above.
*)
class type ['a] obj_output_channel =
object
(** If close_oout cannot output all buffered objects, flush raises
Failure *)
method close_out : unit -> unit
(** If flush cannot output all buffered objects, flush raises
Failure *)
method flush : unit -> unit
method put : 'a -> unit
end
(** Convert stream to obj_input_channel *)
class ['a] channel_of_stream : 'a Stream.t -> ['a] obj_input_channel
(** Convert obj_input_channel to stream *)
val stream_of_channel : 'a #obj_input_channel -> 'a Stream.t
(** Character(byte) input channel. Have the same interface of octet
input channel of http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm the
behaviour defined in the recommendation above. In addition, all
channels are assumed to be blocking. If you supply a non-blocking
channel to Camomile API, the outcome is undefined.
*)
class type char_input_channel =
object
method input : Bytes.t -> int -> int -> int
method close_in : unit -> unit
end
(** Character(byte) output channel. Have the same interface of octet
input channel of http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm the
behaviour defined in the recommendation above. In addition, all
channels are assumed to be blocking. If you supply a non-blocking
channel to Camomile API, the outcome is undefined.
*)
class type char_output_channel =
object
method output : Bytes.t -> int -> int -> int
method flush : unit -> unit
method close_out : unit -> unit
end
(** Convert a polymorphic input channel to a character input channel *)
class char_input_channel_of : char #obj_input_channel ->
char_input_channel
(** Convert a character input channel to a polymorphic input channel*)
class char_obj_input_channel_of : char_input_channel ->
[char] obj_input_channel
(** Convert a polymorphic output channel to a character output channel *)
class char_output_channel_of : char #obj_output_channel -> char_output_channel
(** Convert a character output channel to a polymorphic output channel *)
class char_obj_output_channel_of : char_output_channel ->
[char] obj_output_channel
(** Convert an OCaml input channel to an OO-based character input channel *)
class of_in_channel : Pervasives.in_channel -> char_input_channel
(** Convert an OCaml output channel to an OO-based character output channel *)
class of_out_channel : Pervasives.out_channel -> char_output_channel
# 60 "camomileLibrary.mlp"
end
module UChar : sig
# 1 "Camomile/public/uChar.mli"
# 1 "Camomile/public/uChar.mli"
(** Unicode (ISO-UCS) characters.
This module implements Unicode (actually ISO-UCS) characters. All
31-bit code points are allowed.
*)
(* Copyright (C) 2002, 2003, 2004 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Unicode characters. All 31bit code points are allowed.*)
type t
exception Out_of_range
(** [char_of u] returns the Latin-1 representation of [u].
If [u] can not be represented by Latin-1, raises Out_of_range *)
val char_of : t -> char
(** [of_char c] returns the Unicode character of the Latin-1 character [c] *)
val of_char : char -> t
(** [code u] returns the Unicode code number of [u].
If the value can not be represented by a positive integer,
raise Out_of_range *)
val code : t -> int
(** [code n] returns the Unicode character with the code number [n].
If n >= 2^32 or n < 0, raises [invalid_arg] *)
val chr : int -> t
(** [uint_code u] returns the Unicode code number of [u].
The returned int is unsigned, that is, on 32-bits platforms,
the sign bit is used for storing the 31-th bit of the code number. *)
external uint_code : t -> int = "%identity"
(** [chr_of_uint n] returns the Unicode character of the code number [n].
[n] is interpreted as unsigned, that is, on 32-bits platforms,
the sign bit is treated as the 31-th bit of the code number.
If n exceed 31-bits values, then raise [invalid_arg]. *)
val chr_of_uint : int -> t
(** Equality by code point comparison *)
val eq : t -> t -> bool
(** [compare u1 u2] returns,
a value > 0 if [u1] has a larger Unicode code number than [u2],
0 if [u1] and [u2] are the same Unicode character,
a value < 0 if [u1] has a smaller Unicode code number than [u2]. *)
val compare : t -> t -> int
(** Aliases of [type t] *)
type uchar = t
(** Alias of [uint_code] *)
val int_of : uchar -> int
(** Alias of [chr_of_uint] *)
val of_int : int -> uchar
# 64 "camomileLibrary.mlp"
end
module USet : sig
# 1 "Camomile/public/uSet.mli"
# 1 "Camomile/public/uSet.mli"
(** Sets of Unicode characters, implemented as sets of intervals.
The signature is mostly same to Set.S in stdlib *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
type t
val empty : t
val is_empty : t -> bool
val mem : UChar.t -> t -> bool
val add : UChar.t -> t -> t
(** [add_range u1 u2 s] adds the characters in the range [u1] - [u2]
to [s]. The range is determined by the code point order. *)
val add_range : UChar.t -> UChar.t -> t -> t
val singleton : UChar.t -> t
val remove : UChar.t -> t -> t
(** [remove_range u1 u2 s] removes the characters in the range [u1] - [u2]
from [s]. The range is determined by the code point order. *)
val remove_range : UChar.t -> UChar.t -> t -> t
val union : t -> t -> t
val inter : t -> t -> t
val diff : t -> t -> t
(** [compl s] returns the compliment of [s]. *)
val compl : t -> t
val compare : t -> t -> int
val equal : t -> t -> bool
val subset : t -> t -> bool
(** [from u s] returns the set of elements of [s]
whose code points are equal or greater than [u]. *)
val from : UChar.t -> t -> t
(** [after u s] returns the set of elements of [s]
whose code points are greater than [u]. *)
val after : UChar.t -> t -> t
(** [until u s] returns the set of elements of [s]
whose code points are equal or smaller than [u]. *)
val until : UChar.t -> t -> t
(** [until u s] returns the set of elements of [s]
whose code points are smaller than [u]. *)
val before : UChar.t -> t -> t
val iter : (UChar.t -> unit) -> t -> unit
(** [iter_range proc s] feeds the intervals contained in [s] to
[proc] in increasing order. The intervals given to [proc]
are always separated by the character not in [s]. *)
val iter_range : (UChar.t -> UChar.t -> unit) -> t -> unit
val fold : (UChar.t -> 'a -> 'a) -> t -> 'a -> 'a
(** [fold_range f s x] is equivalent to
[f u_i u_(i+1) (... (f u_3 u_4 (f u_1 u_2 x)))] if [s] is consisted of
the intervals [u1]-[u2], [u3]-[u4], ..., [u_i]-[u_(i + 1)]
in increasing order. The intervals given to [proc]
are always separated by the character not in [s]. *)
val fold_range : (UChar.t -> UChar.t -> 'a -> 'a) -> t -> 'a -> 'a
val for_all : (UChar.t -> bool) -> t -> bool
val exists : (UChar.t -> bool) -> t -> bool
val filter : (UChar.t -> bool) -> t -> t
val partition : (UChar.t -> bool) -> t -> t * t
val cardinal : t -> int
val elements : t -> UChar.t list
(** The list of the intervals contained in the set.
The returned intervals are always separated
by the character not in [s]. *)
val ranges : t -> (UChar.t * UChar.t) list
val min_elt : t -> UChar.t
val max_elt : t -> UChar.t
(** Returns a element roughly in the middle of the set.
It is not guaranteed to return the same element for
the sets with the same elements *)
val choose : t -> UChar.t
val uset_of_iset : ISet.t -> t
val iset_of_uset : t -> ISet.t
# 68 "camomileLibrary.mlp"
end
module UMap : sig
# 1 "Camomile/public/uMap.mli"
# 1 "Camomile/public/uMap.mli"
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Maps over Unicode characters. *)
type 'a t
val empty : 'a t
val is_empty : 'a t -> bool
(** [add ?eq u v m] returns the new map which is same to [m]
except it maps [u] to some value [v'] which satisfies [eq v v'].
If [eq] is not supplied, structural equality is used. *)
val add : ?eq:('a -> 'a -> bool) -> UChar.t -> 'a -> 'a t -> 'a t
(** [add ?eq u1 u2 v m] returns the new map which is same to [m]
except it maps characters in the range [u1]-[u2]
to some value [v'] which satisfies [eq v v'].
If [eq] is not supplied, structural equality is used. *)
val add_range : ?eq:('a -> 'a -> bool) ->
UChar.t -> UChar.t -> 'a -> 'a t -> 'a t
val find : UChar.t -> 'a t -> 'a
val remove : UChar.t -> 'a t -> 'a t
(** [remove_range u1 u2 m] removes [u1]-[u2] from the domain of [m] *)
val remove_range : UChar.t -> UChar.t -> 'a t -> 'a t
(** [from u m] restricts the domain of [m] to the characters whose
code points are equal or greater than [u]. *)
val from : UChar.t -> 'a t -> 'a t
(** [after u m] restricts the domain of [m] to the characters whose
code points are greater than [u]. *)
val after : UChar.t -> 'a t -> 'a t
(** [until u m] restricts the domain of [m] to the characters whose
code points are equal or smaller than [u]. *)
val until : UChar.t -> 'a t -> 'a t
(** [before u m] restricts the domain of [m] to the characters whose
code points are smaller than [u]. *)
val before : UChar.t -> 'a t -> 'a t
val mem : UChar.t -> 'a t -> bool
val iter : (UChar.t -> 'a -> unit) -> 'a t -> unit
(** [iter proc m] : For each contingent region [u1]-[u2]
that is mapped to a constant [v], [proc u1 u2 v] is called.
The order of call is determined by increasing order on [u1]. *)
val iter_range : (UChar.t -> UChar.t -> 'a -> unit) -> 'a t -> unit
(** [map ?eq f m] and [mapi ?eq f m] : Similar to [map] and [mapi]
in stdlib Map, but if the map [m'] is returned, it is only guaranteed
that [eq (find u m') (f (find u m ))] is true for [map] and
[eq (find u m') (f u (find u m ))] is true for [mapi]. If [eq] is
not specified, structural equality is used. *)
val map : ?eq:('b -> 'b -> bool) -> ('a -> 'b) -> 'a t -> 'b t
val mapi : ?eq:('b -> 'b -> bool) -> (UChar.t -> 'a -> 'b) -> 'a t -> 'b t
val fold : (UChar.t -> 'b -> 'a -> 'a) -> 'b t -> 'a -> 'a
(** [fold_range f m x] is equivalent to
[f u_(2n) u_(2n+1) v_n (... (f u_1 u_2 v_1 x))] where all characters in
the range [u_(2k)]-[u_(2k+1)] are mapped to [v_k] and
[u_1] < [u_3] < ... in code point order.
For each range [u_(2k)]-[u_(2k+1)] is separated by a character
which is not mapped to [v_k]. *)
val fold_range : (UChar.t -> UChar.t -> 'b -> 'a -> 'a) -> 'b t -> 'a -> 'a
(** Constant map.*)
val set_to_map : USet.t -> 'a -> 'a t
(** Domain. *)
val domain : 'a t -> USet.t
(** [map_to_set p m] returns the set of characters which are mapped
to values satisfying the predicate [p] by [m]. *)
val map_to_set : ('a -> bool) -> 'a t -> USet.t
val umap_of_imap : 'a IMap.t -> 'a t
val imap_of_umap : 'a t -> 'a IMap.t
# 72 "camomileLibrary.mlp"
end
module UCharTbl : sig
# 1 "Camomile/public/uCharTbl.mli"
# 1 "Camomile/public/uCharTbl.mli"
(** Fast lookup tables for Unicode. Accessible by constant time. *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Fast lookup tables. Accessible by constant time. *)
type 'a tbl
type 'a t = 'a tbl
val get : 'a tbl -> UChar.t -> 'a
module type Type = sig
type elt
type t = elt tbl
val get : elt tbl -> UChar.t -> elt
(** [of_map def m] creates the table which has the same value to [m].
The table returns [def] for the characters for which [m] is undefined. *)
val of_map : elt -> elt UMap.t -> t
end
(** Equality and hash are necessary for table generation. *)
module Make :
functor (H : Hashtbl.HashedType) -> (Type with type elt = H.t)
(** Tables for boolean values. *)
module Bool : sig
type t
val get : t -> UChar.t -> bool
val of_set : USet.t -> t
end
(** Tables for small (< 256, >=0) integers *)
module Bits : sig
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
(** Tables for integers. If integers are not span the whole 31-bit or
63-bit values, [Bytes.t] is more space efficient than [int tbl]. *)
module Bytes : sig
type t
val of_map : int -> int UMap.t -> t
val get : t -> UChar.t -> int
end
(** Tables for bytes. *)
module Char : sig
type t
val of_map : char -> char UMap.t -> t
val get : t -> UChar.t -> char
end
# 76 "camomileLibrary.mlp"
end
module UnicodeString : sig
# 1 "Camomile/public/unicodeString.mli"
# 1 "Camomile/public/unicodeString.mli"
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Signature for Unicode strings.
{!UText}, {!XString}, {!UTF8}, {!UTF16}, {!UCS4}
have matched signatures to UStorage
and satisfy the semantics described below. If users want to supply
their own Unicode strings, please design the module with the
following signature and properties. *)
module type Type = sig
(** The type of string. *)
type t
(** [get t i] : [i]-th character of the storage.*)
val get : t -> int -> UChar.t
(** [init len f] creates a new storage.
the returned storage has length [len], its nth-element is [f n].
[f] is called with integers [0 ... len - 1], only once for each integer.
The call is in the increasing order f 0, f 1, f 2, ... *)
val init : int -> (int -> UChar.t) -> t
(** The number of Unicode characters in the storage *)
val length : t -> int
(** locations in storages.*)
type index
(** [look t i] : The character in the location [i] of [t].*)
val look : t -> index -> UChar.t
(** [nth t n] : the location of the [n]-th character in [t].*)
val nth : t -> int -> index
(** [next x i, prev x i] :
The operation is valid if [i] points the valid element, i.e. the
returned value may point the location beyond valid elements by one.
If [i] does not point a valid element, the results are unspecified. *)
val next : t -> index -> index
val prev : t -> index -> index
(* [out_of_range t i] tests whether [i] is inside of [t]. *)
val out_of_range : t -> index -> bool
val iter : (UChar.t -> unit) -> t -> unit
(* Code point comparison *)
val compare : t -> t -> int
(** The location of the first character in the storage. *)
val first : t -> index
(** The location of the last character in the storage. *)
val last : t -> index
(** [move t i n] :
if [n] >= 0, then returns [n]-th character after [i] and
otherwise returns -[n]-th character before [i].
If there is no such character, or [i] does not point
a valid character, the result is unspecified. *)
val move : t -> index -> int -> index
(** [compare_index t i j] returns
a positive integer if [i] is the location placed after [j] in [t],
0 if [i] and [j] point the same location, and
a negative integer if [i] is the location placed before [j] in [t]. *)
val compare_index : t -> index -> index -> int
(** Character buffers. Similar to Buffer. *)
module Buf : sig
type buf
(** [create n] creates the buffer. [n] is used to determine
the initial size of the buffer. The meaning of [n] differs from
modules to modules. *)
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
end
# 80 "camomileLibrary.mlp"
end
module UText : sig
# 1 "Camomile/public/uText.mli"
# 1 "Camomile/public/uText.mli"
(** An implementation of Unicode string. *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** An implementation of Unicode string.
Internally, it uses integer array.
The semantics matches the description of UStorage. *)
(** Phantom type for distinguishing mutability *)
type mutability = [ `Mutable | `Immutable ]
type 'a text
type utext = [`Immutable] text
type ustring = [`Mutable] text
type t = utext
val utext_of_ustring : ustring -> utext
val ustring_of_utext : utext -> ustring
val get : 'a text -> int -> UChar.t
(** [set s i u] sets the [i]-th character in [s] to [u]. *)
val set : ustring -> int -> UChar.t -> unit
type index
val look : 'a text -> index -> UChar.t
val nth : 'a text -> int -> index
val first : 'a text -> index
val last : 'a text -> index
val out_of_range : 'a text -> index -> bool
val compare_index : 'a text -> index -> index -> int
val next : 'a text -> index -> index
val prev : 'a text -> index -> index
val move : 'a text -> index -> int -> index
val length : 'a text -> int
(** Conversion from Latin-1 strings. *)
val of_string : string -> utext
val init : int -> (int -> UChar.t) -> utext
val init_ustring : int -> (int -> UChar.t) -> ustring
(** The semantics of these function are similar to
the equivalents of string. *)
val make : int -> UChar.t -> ustring
val copy : ustring -> ustring
val sub : 'a text -> int -> int -> 'a text
val fill : ustring -> int -> int -> UChar.t -> unit
val blit : 'a text -> int -> ustring -> int -> int -> unit
val append : 'a text -> 'b text -> 'a text
val iter : (UChar.t -> unit) -> 'a text -> unit
val compare : 'a text -> 'b text -> int
module Buf : sig
type buf
(** [create n] creates the buffer which initially can contain
[n] Unicode characters. *)
val create : int -> buf
val contents : buf -> t
val contents_string : buf -> ustring
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> 'a text -> unit
val add_buffer : buf -> buf -> unit
end
# 84 "camomileLibrary.mlp"
end
module XString : sig
# 1 "Camomile/public/xString.mli"
# 1 "Camomile/public/xString.mli"
(** eXtensible Unicode string.
The semantics matches the description of UStorage.
The detail may be going to change.*)
(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
type xstring
type t = xstring
val get : xstring -> int -> UChar.t
val set : xstring -> int -> UChar.t -> unit
val length : xstring -> int
val init : int -> (int -> UChar.t) -> xstring
type index
val look : xstring -> index -> UChar.t
val nth : xstring -> int -> index
val first : xstring -> index
val last : xstring -> index
val out_of_range : xstring -> index -> bool
val next : xstring -> index -> index
val prev : xstring -> index -> index
val move : xstring -> index -> int -> index
val compare_index : xstring -> index -> index -> int
val make : ?bufsize:int -> int -> UChar.t -> xstring
val clear : xstring -> unit
val reset : xstring -> unit
val copy : xstring -> xstring
val sub : xstring -> int -> int -> xstring
val add_char : xstring -> UChar.t -> unit
val add_text : xstring -> 'a UText.text -> unit
val add_xstring : xstring -> xstring -> unit
val shrink : xstring -> int -> unit
val append : xstring -> xstring -> xstring
val utext_of : xstring -> UText.t
val ustring_of : xstring -> UText.ustring
val iter : (UChar.t -> unit) -> xstring -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val length : buf -> int
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
# 88 "camomileLibrary.mlp"
end
module SubText : sig
# 1 "Camomile/public/subText.mli"
# 1 "Camomile/public/subText.mli"
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Sub-texts, parts of original (ur-) texts.
The signature and semantics matches those of UStorage. *)
module type Type = sig
type t
val get : t -> int -> UChar.t
val init : int -> (int -> UChar.t) -> t
val length : t -> int
type index
val look : t -> index -> UChar.t
val nth : t -> int -> index
val first : t -> index
val last : t -> index
val next : t -> index -> index
val prev : t -> index -> index
val move : t -> index -> int -> index
val out_of_range : t -> index -> bool
val compare_index : t -> index -> index -> int
val iter : (UChar.t -> unit) -> t -> unit
val compare : t -> t -> int
module Buf : sig
type buf
val create : int -> buf
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
(** The type of original texts. *)
type ur_text
(** The type of indexes of original texts. *)
type ur_index
(** [refer t i j] returns the part of [t] from [i] until [j].
The character pointed by [j] is not included in the result.
If [j] is equal to [i] or located before [j], the result is
an empty string. *)
val refer : ur_text -> ur_index -> ur_index -> t
(** [excerpt t] copies the contents of [t] as a new ur_text. *)
val excerpt : t -> ur_text
(** [context t] returns the tuple [(s, i, j)] such that
[t = refer s i j]. *)
val context : t -> ur_text * ur_index * ur_index
(** Conversion from indexes of sub-texts to ur_texts. *)
val ur_index_of : t -> index -> ur_index
end
module Make : functor (Text : UnicodeString.Type) ->
(Type with type ur_text = Text.t and type ur_index = Text.index)
# 92 "camomileLibrary.mlp"
end
module ULine : sig
# 1 "Camomile/public/uLine.mli"
# 1 "Camomile/public/uLine.mli"
(** Line IO *)
(* Copyright (C) 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Line I/O, conversion of line separators. *)
open OOChannel
(** Line separators.
- [`CR] specifies carriage return.
- [`LF] specifies linefeed.
- [`CRLF] specifies the sequence of carriage return and linefeed.
- [`NEL] specifies next line (\u0085).
- [`LS] specifies Unicode line separator (\u2028).
- [`PS] specifies Unicode paragraph separator (\u2029). *)
type separator =
[ `CR
| `LF
| `CRLF
| `NEL
| `LS
| `PS ]
(** [new input separator input_obj] creates the new input channel object
{!OOChannel.obj_input_channel} which reads from [input_obj] and
converts line separators (all of CR, LF, CRLF, NEL, LS, PS) to
[separator]. *)
class input : separator ->
UChar.t #obj_input_channel -> [UChar.t] obj_input_channel
(** [new output separator output_obj] creates the new output channel
object {!OOChannel.obj_output_channel} which receives Unicode characters
and converts line separators (all of CR, LF, CRLF, NEL, LS, PS) to
[separator]. *)
class output : separator ->
UChar.t #obj_output_channel -> [UChar.t] obj_output_channel
module type Type = sig
type text
(** [new input_line input_obj] creates the new input channel object
{!OOChannel.obj_input_channel} which reads Unicode characters
from [input_obj] and output lines. All of CR, LF, CRLF, NEL, LS, PS,
as well as FF (formfeed) are recognised as a line separator. *)
class input_line : UChar.t #obj_input_channel -> [text] obj_input_channel
(** [new output_line ~sp output_obj] create the new output channel object
{!OOChannel.obj_output_channel} which output each line to [output_obj]
using [sp] as a line separator.
If [sp] is omitted, linefeed (LF) is used. *)
class output_line : ?sp:separator ->
UChar.t #obj_output_channel -> [text] obj_output_channel
end
module Make : functor (Text : UnicodeString.Type) ->
(Type with type text = Text.t)
# 96 "camomileLibrary.mlp"
end
module Locale : sig
# 1 "Camomile/public/locale.mli"
# 1 "Camomile/public/locale.mli"
(* Copyright (C) 2003 Yamagata Yoriyuki *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Camomile has a locale system similar to Java.
A locale is a string with a form as
"<LANG>_<COUNTRY>_<MODIFIER>..." where <LANG> is
a 2-letter ISO 639 language code, <COUNTRY> is a 2-letter ISO 3166
country code. Some field may not present. *)
(** Type of locales. *)
type t = string
(** [read root suffix reader locale]
reads locale information using [reader].
Locale data is supposed to reside in [root] directory with
the name [locale].[suffix].
[reader] takes [in_channel] as an argument and read data from in_channel.
If data is not found, then [reader] should raise Not_found.
If the file is not found or [reader] raises Not_found, then
more generic locales are tried.
For example, if fr_CA.[suffix] is not found, then [read] tries fr.[suffix].
If fr.[suffix] is also not found, then the file [root].[suffix] is tried.
Still the data is not found, then [Not_found] is raised. *)
val read : string -> string -> (in_channel -> 'a) -> string -> 'a
(** [contain loc1 loc2] :
If [loc1] is contained in [loc2] then true otherwise false.
For example, "fr" is contained in "fr_CA" while "en_CA"
does not contain "fr" *)
val contain : string -> string -> bool
# 100 "camomileLibrary.mlp"
end
module CharEncoding : CharEncoding.Interface
module UTF8 : sig
# 1 "Camomile/public/uTF8.mli"
# 1 "Camomile/public/uTF8.mli"
(** UTF-8 encoded Unicode strings. The type is normal string. *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** UTF-8 encoded Unicode strings. The type is normal string. *)
type t = string
exception Malformed_code
(** [validate s]
successes if s is valid UTF-8, otherwise raises Malformed_code.
Other functions assume strings are valid UTF-8, so it is prudent
to test their validity for strings from untrusted origins. *)
val validate : t -> unit
(* All functions below assume string are valid UTF-8. If not,
* the result is unspecified. *)
(** [get s n] returns [n]-th Unicode character of [s].
The call requires O(n)-time. *)
val get : t -> int -> UChar.t
(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialized by [f i] *)
val init : int -> (int -> UChar.t) -> t
(** [length s] returns the number of Unicode characters contained in s *)
val length : t -> int
(** Positions in the string represented by the number of bytes from the head.
The location of the first character is [0] *)
type index = int
(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)
val nth : t -> int -> index
(** The position of the head of the first Unicode character. *)
val first : t -> index
(** The position of the head of the last Unicode character. *)
val last : t -> index
(** [look s i]
returns the Unicode character of the location [i] in the string [s]. *)
val look : t -> index -> UChar.t
(** [out_of_range s i]
tests whether [i] is a position inside of [s]. *)
val out_of_range : t -> index -> bool
(** [compare_index s i1 i2] returns
a value < 0 if [i1] is the position located before [i2],
0 if [i1] and [i2] points the same location,
a value > 0 if [i1] is the position located after [i2]. *)
val compare_index : t -> index -> index -> int
(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
If [i] is inside of [s], the function always successes.
If [i] is inside of [s] and there is no Unicode character after [i],
the position outside [s] is returned.
If [i] is not inside of [s], the behaviour is unspecified. *)
val next : t -> index -> index
(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
If [i] is inside of [s], the function always successes.
If [i] is inside of [s] and there is no Unicode character before [i],
the position outside [s] is returned.
If [i] is not inside of [s], the behaviour is unspecified. *)
val prev : t -> index -> index
(** [move s i n]
returns [n]-th Unicode character after [i] if n >= 0,
[n]-th Unicode character before [i] if n < 0.
If there is no such character, the result is unspecified. *)
val move : t -> index -> int -> index
(** [iter f s]
applies [f] to all Unicode characters in [s].
The order of application is same to the order
of the Unicode characters in [s]. *)
val iter : (UChar.t -> unit) -> t -> unit
(** Code point comparison by the lexicographic order.
[compare s1 s2] returns
a positive integer if [s1] > [s2],
0 if [s1] = [s2],
a negative integer if [s1] < [s2]. *)
val compare : t -> t -> int
(** Buffer module for UTF-8 strings *)
module Buf : sig
(** Buffers for UTF-8 strings. *)
type buf
(** [create n] creates the buffer with the initial size [n]-bytes. *)
val create : int -> buf
(* The rest of functions is similar to the ones of Buffer in stdlib. *)
(** [contents buf] returns the contents of the buffer. *)
val contents : buf -> t
(** Empty the buffer,
but retains the internal storage which was holding the contents *)
val clear : buf -> unit
(** Empty the buffer and de-allocate the internal storage. *)
val reset : buf -> unit
(** Add one Unicode character to the buffer. *)
val add_char : buf -> UChar.t -> unit
(** Add the UTF-8 string to the buffer. *)
val add_string : buf -> t -> unit
(** [add_buffer b1 b2] adds the contents of [b2] to [b1].
The contents of [b2] is not changed. *)
val add_buffer : buf -> buf -> unit
end with type buf = Buffer.t
# 106 "camomileLibrary.mlp"
end
module UTF16 : sig
# 1 "Camomile/public/uTF16.mli"
# 1 "Camomile/public/uTF16.mli"
(* Copyright (C) 2002, 2003, Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** UTF-16 encoded string. the type is the bigarray of 16-bit integers.
The characters must be 21-bits code points, and not surrogate points,
0xfffe, 0xffff.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used. *)
type t =
(int, Bigarray.int16_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
exception Malformed_code
(** [validate s]
If [s] is valid UTF-16 then successes otherwise raises [Malformed_code].
Other functions assume strings are valid UTF-16, so it is prudent
to test their validity for strings from untrusted origins. *)
val validate : t -> unit
(** All functions below assume strings are valid UTF-16. If not,
the result is unspecified. *)
(** [get s n] returns [n]-th Unicode character of [s].
The call requires O(n)-time. *)
val get : t -> int -> UChar.t
exception Out_of_range
(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialized by [f i]
if the character is not representable, raise [Out_of_range]. *)
val init : int -> (int -> UChar.t) -> t
(** [length s] returns the number of Unicode characters contained in s *)
val length : t -> int
(** Positions in the string represented by the number of 16-bit unit
from the head.
The location of the first character is [0] *)
type index = int
(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)
val nth : t -> int -> index
(** [first s] : The position of the head of the last Unicode character. *)
val first : t -> index
(** [last s] : The position of the head of the last Unicode character. *)
val last : t -> index
(** [look s i ]
returns the Unicode character of the location [i] in the string [s]. *)
val look : t -> index -> UChar.t
(** [out_of_range s i] tests whether [i] is inside of [s]. *)
val out_of_range : t -> index -> bool
(** [compare_aux s i1 i2] returns
- If [i1] is the position located before [i2], a value < 0,
- If [i1] and [i2] points the same location, 0,
- If [i1] is the position located after [i2], a value > 0.
*)
val compare_index : t -> index -> index -> int
(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
- If [i] is a valid position, the function always success.
- If [i] is a valid position and there is no Unicode character after [i],
the position outside [s] is returned.
- If [i] is not a valid position, the behaviour is undefined.
*)
val next : t -> index -> index
(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
- If [i] is a valid position, the function always success.
- If [i] is a valid position and there is no Unicode character before [i],
the position outside [s] is returned.
- If [i] is not a valid position, the behaviour is undefined.
*)
val prev : t -> index -> index
(* [move s i n]
- If n >= 0, returns [n]-th Unicode character after [i].
- If n < 0, returns [-n]-th Unicode character before [i].
0 If there is no such character, the result is unspecified.
*)
val move : t -> index -> int -> index
(** [iter f s]
Apply [f] to all Unicode characters in [s].
The order of application is same to the order
in the Unicode characters in [s]. *)
val iter : (UChar.t -> unit) -> t -> unit
(** Code point comparison *)
val compare : t -> t -> int
(** Buffer module for UTF-16 *)
module Buf : sig
type buf
(** create n : creates the buffer with the initial size [n]. *)
val create : int -> buf
(** The rest of functions is similar to the ones of Buffer in stdlib. *)
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
(** if the character is not representable, raise Out_of_range *)
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
# 110 "camomileLibrary.mlp"
end
module UCS4 : sig
# 1 "Camomile/public/uCS4.mli"
# 1 "Camomile/public/uCS4.mli"
(** UCS4 encoded string. The type is the bigarray of 32-bit integers.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used. *)
(* Copyright (C) 2002, 2003, 2004 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
type t =
(int32, Bigarray.int32_elt, Bigarray.c_layout) Bigarray.Array1.t
exception Malformed_code
(** [validate s]
If [s] is valid UCS4 then successes otherwise raises [Malformed_code].
Other functions assume strings are valid UCS4, so it is prudent
to test their validity for strings from untrusted origins. *)
val validate : t -> unit
(** All functions below assume strings are valid UCS4. If not,
the result is unspecified. *)
(** [get s n] returns [n]-th Unicode character of [s]. *)
val get : t -> int -> UChar.t
(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialised by [f i] *)
val init : int -> (int -> UChar.t) -> t
(** [length s] returns the number of Unicode characters contained in [s] *)
val length : t -> int
(** Positions in the string represented by the number of characters
from the head.
The location of the first character is [0] *)
type index = int
(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)
val nth : t -> int -> index
(** [first s] : The position of the head of the last Unicode character. *)
val first : t -> index
(** [last s] : The position of the head of the last Unicode character. *)
val last : t -> index
(** [look s i]
returns the Unicode character of the location [i] in the string [s]. *)
val look : t -> index -> UChar.t
(** [out_of_range s i]
tests whether [i] points the valid position of [s]. *)
val out_of_range : t -> index -> bool
(** [compare_aux s i1 i2] returns
If [i1] is the position located before [i2], a value < 0,
If [i1] and [i2] points the same location, 0,
If [i1] is the position located after [i2], a value > 0. *)
val compare_index : t -> index -> index -> int
(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
If [i] is a valid position, the function always success.
If [i] is a valid position and there is no Unicode character after [i],
the position outside [s] is returned.
If [i] is not a valid position, the behaviour is undefined. *)
val next : t -> index -> index
(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
If [i] is a valid position, the function always success.
If [i] is a valid position and there is no Unicode character before [i],
the position outside [s] is returned.
If [i] is not a valid position, the behaviour is undefined. *)
val prev : t -> index -> index
(** [move s i n] :
If n >= 0, returns [n]-th Unicode character after [i].
If n < 0, returns [-n]-th Unicode character before [i].
If there is no such character, the result is unspecified. *)
val move : t -> index -> int -> index
(** [iter f s] :
Apply [f] to all Unicode characters in [s].
The order of application is same to the order
in the Unicode characters in [s]. *)
val iter : (UChar.t -> unit) -> t -> unit
(** Code point comparison *)
val compare : t -> t -> int
(** Buffer module for UCS4 *)
module Buf : sig
type buf
(** [create n] creates the buffer with the initial size [n]. *)
val create : int -> buf
(** The rest of functions is similar to the ones of Buffer in stdlib. *)
val contents : buf -> t
val clear : buf -> unit
val reset : buf -> unit
val add_char : buf -> UChar.t -> unit
val add_string : buf -> t -> unit
val add_buffer : buf -> buf -> unit
end
# 114 "camomileLibrary.mlp"
end
module UPervasives : sig
# 1 "Camomile/public/uPervasives.mli"
# 1 "Camomile/public/uPervasives.mli"
(** Functions for toplevel *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
type uchar = UChar.t
(** Aliases for UChar.uint_code, UChar.chr_of_uint *)
val int_of_uchar : uchar -> int
val uchar_of_int : int -> uchar
val escaped_uchar : uchar -> string
val escaped_utf8 : string -> string
val printer_utf8 : Format.formatter -> string -> unit
val printer_uchar : Format.formatter -> uchar -> unit
# 118 "camomileLibrary.mlp"
end
module URe : sig
# 1 "Camomile/public/uRe.mli"
# 1 "Camomile/public/uRe.mli"
(** Regular expression engine. *)
(* Copyright (C) 2003 Yamagata Yoriyuki. distributed with LGPL *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)
(** Abstract syntax trees of regular expressions. *)
type regexp =
[ `Alt of regexp * regexp
| `Seq of regexp * regexp
| `Rep of regexp
| `Repn of regexp * int * int option
| `After of regexp
| `Before of regexp
| `Epsilon
| `Group of regexp
| `OneChar
| `String of UChar.t list
| `Set of USet.t
| `BoS
| `EoS ]
(** Match semantics. *)
type match_semantics = [ `First | `Shortest | `Longest ]
(** Remove [`Group] from the regular expressions. *)
val no_group : regexp -> regexp
module type Type = sig
type text
type index
type compiled_regexp
module SubText :
SubText.Type with type ur_text = text and type ur_index = index
(** Compile regular expressions. *)
val compile : regexp -> compiled_regexp
(** [regexp_match ?sem r t i] tries matching [r] and substrings
of [t] beginning from [i]. If match successes, [Some g] is
returned where [g] is the array containing the matched
string of [n]-th group in the [n]-element.
The matched string of the whole [r] is stored in the [0]-th element.
If matching fails, [None] is returned. *)
val regexp_match : ?sem:match_semantics ->
compiled_regexp -> text -> index -> SubText.t option array option
(** [string_match r t i] tests whether [r] can match a substring
of [t] beginning from [i]. *)
val string_match : compiled_regexp -> text -> index -> bool
(** [search_forward ?sem r t i] searches a substring of [t]
matching [r] from [i]. The returned value is similar to
{!URe.Type.regexp_match}. *)
val search_forward : ?sem:match_semantics ->
compiled_regexp -> text -> index -> SubText.t option array option
end
module Make : functor (Text : UnicodeString.Type) ->
Type with type text = Text.t and type index = Text.index
# 122 "camomileLibrary.mlp"
end
module UCharInfo : UCharInfo.Type
module UNF : sig
module type Type = UNF.Type
module Make (Text : UnicodeString.Type) :
Type with type text = Text.t and type index = Text.index
end
module UCol : sig
(** How variables are handled *)
type variable_option =
[ `Blanked
| `Non_ignorable
| `Shifted
| `Shift_Trimmed ]
(** Strength of comparison. For European languages, each strength
roughly means as
`Primary : Ignore accents and case
`Secondary : Ignore case but accents are counted in.
`Tertiary : Accents and case are counted in.
For the case of `Shifted, `Shift_Trimmed, there is the fourth strength.
`Quaternary : Variables such as - (hyphen) are counted in. *)
type precision = [ `Primary | `Secondary | `Tertiary | `Quaternary ]
module type Type = UCol.Type
module Make (Text : UnicodeString.Type) :
Type with type text = Text.t and type index = Text.index
end
module CaseMap : sig
module type Type = CaseMap.Type
module Make (Text : UnicodeString.Type) : (Type with type text = Text.t)
end
module UReStr : UReStr.Interface
module StringPrep : sig
module type Type = StringPrep.Type
module Make (Text : UnicodeString.Type) : (Type with type text = Text.t)
end
end
module Make(Config : ConfigInt.Type) = struct
module OOChannel = OOChannel
module UChar = UChar
module USet = USet
module UMap = UMap
module UCharTbl = UCharTbl
module UnicodeString = UnicodeString
module UText = UText
module XString = XString
module SubText = SubText
module ULine = ULine
module Locale = Locale
module CharEncoding = CharEncoding.Configure(Config)
module UTF8 = UTF8
module UTF16 = UTF16
module UCS4 = UCS4
module UPervasives = UPervasives
module URe = URe
module UCharInfo = UCharInfo.Make(Config)
module UNF = struct
module type Type = UNF.Type
module Make = UNF.Make(Config)
end
module UCol = struct
type variable_option =
[ `Blanked
| `Non_ignorable
| `Shifted
| `Shift_Trimmed ]
type precision = [ `Primary | `Secondary | `Tertiary | `Quaternary ]
module type Type = UCol.Type
module Make = UCol.Make(Config)
end
module CaseMap = struct
module type Type = CaseMap.Type
module Make = CaseMap.Make(Config)
end
module UReStr = UReStr.Configure(Config)
module StringPrep = struct
module type Type = StringPrep.Type
module Make = StringPrep.Make(Config)
end
end