(* $Id: uCharInfo.ml,v 1.22 2004/06/05 16:42:07 yori Exp $ *)
(* Copyright 2002 Yamagata Yoriyuki. distributed with LGPL *)

include Unidata

(* General category *)

let general_category_tbl : UCharTbl.Bits.t = read_data "general_category"
let general_category u =
  match UCharTbl.Bits.get general_category_tbl u with
    0 ->
      let n = UChar.uint_code u in
      if n >= 0x0f0000 && n <= 0x100000 then `Co else
      if n >= 0xe00000 && n <= 0xff0000 then `Co else
      if n >= 0x60000000 && n <= 0x7f000000 then `Co else `Cn
  | x -> cat_of_num x

let load_general_category_map () = read_data "general_category_map"

(* character property *)

type character_property_type =
  [ `Math				(*Derived Core Properties*)
  | `Alphabetic
  | `Lowercase
  | `Uppercase
  | `ID_Start
  | `ID_Continue
  | `XID_Start
  | `XID_Continue
  | `Default_Ignorable_Code_Point
  | `Grapheme_Extend
  | `Grapheme_Base
  | `Bidi_Control			(*Extended Properties*)
  | `White_Space
  | `Hyphen
  | `Quotation_Mark
  | `Terminal_Punctuation
  | `Other_Math
  | `Hex_Digit
  | `Ascii_Hex_Digit
  | `Other_Alphabetic
  | `Ideographic
  | `Diacritic
  | `Extender
  | `Other_Lowercase
  | `Other_Uppercase
  | `Noncharacter_Code_Point
  | `Other_Grapheme_Extend
  | `Grapheme_Link
  | `IDS_Binary_Operator
  | `IDS_Trinary_Operator
  | `Radical
  | `Unified_Ideograph
  | `Other_default_Ignorable_Code_Point
  | `Deprecated
  | `Soft_Dotted
  | `Logical_Order_Exception ]

let name_of_property p =
  match p with
    `Math -> "Math"  			
  | `Alphabetic -> "Alphabetic"
  | `Lowercase -> "Lowercase"
  | `Uppercase -> "Uppercase"
  | `ID_Start -> "ID_Start"
  | `ID_Continue -> "ID_Continue"
  | `XID_Start -> "XID_Start"
  | `XID_Continue -> "XID_Continue"
  | `Default_Ignorable_Code_Point -> "Default_Ignorable_Code_Point"
  | `Grapheme_Extend -> "Grapheme_Extend"
  | `Grapheme_Base -> "Grapheme_Base"
  | `Bidi_Control -> "Bidi_Control"			
  | `White_Space -> "White_Space"
  | `Hyphen -> "Hyphen"
  | `Quotation_Mark -> "Quotation_Mark"
  | `Terminal_Punctuation -> "Terminal_Punctuation"
  | `Other_Math -> "Other_Math"
  | `Hex_Digit -> "Hex_Digit"
  | `Ascii_Hex_Digit -> "Ascii_Hex_Digit"
  | `Other_Alphabetic -> "Other_Alphabetic"
  | `Ideographic -> "Ideographic"
  | `Diacritic -> "Diacritic"
  | `Extender -> "Extender"
  | `Other_Lowercase -> "Other_Lowercase"
  | `Other_Uppercase -> "Other_Uppercase"
  | `Noncharacter_Code_Point -> "Noncharacter_Code_Point"
  | `Other_Grapheme_Extend -> "Other_Grapheme_Extend"
  | `Grapheme_Link -> "Grapheme_Link"
  | `IDS_Binary_Operator -> "IDS_Binary_Operator"
  | `IDS_Trinary_Operator -> "IDS_Trinary_Operator"
  | `Radical -> "Radical"
  | `Unified_Ideograph -> "Unified_Ideograph"
  | `Other_default_Ignorable_Code_Point -> "Other_default_Ignorable_Code_Point"
  | `Deprecated -> "Deprecated"
  | `Soft_Dotted -> "Soft_Dotted"
  | `Logical_Order_Exception -> "Logical_Order_Exception"

let property_of_name : string -> character_property_type = function
    "Math" -> `Math
  | "Alphabetic"  -> `Alphabetic
  | "Lowercase" -> `Lowercase
  | "Uppercase" -> `Uppercase
  | "ID_Start" -> `ID_Start
  | "ID_Continue" -> `ID_Continue
  | "XID_Start" -> `XID_Start
  | "XID_Continue" -> `XID_Continue
  | "Default_Ignorable_Code_Point" -> `Default_Ignorable_Code_Point
  | "Grapheme_Extend" -> `Grapheme_Extend
  | "Grapheme_Base" -> `Grapheme_Base
  | "Bidi_Control" -> `Bidi_Control
  | "White_Space" -> `White_Space
  | "Hyphen" -> `Hyphen
  | "Quotation_Mark" -> `Quotation_Mark
  | "Terminal_Punctuation" -> `Terminal_Punctuation
  | "Other_Math" -> `Other_Math
  | "Hex_Digit" -> `Hex_Digit
  | "Ascii_Hex_Digit" -> `Ascii_Hex_Digit
  | "Other_Alphabetic" -> `Other_Alphabetic
  | "Ideographic" -> `Ideographic
  | "Diacritic" -> `Diacritic
  | "Extender" -> `Extender
  | "Other_Lowercase" -> `Other_Lowercase
  | "Other_Uppercase" -> `Other_Uppercase
  | "Noncharacter_Code_Point" -> `Noncharacter_Code_Point
  | "Other_Grapheme_Extend" -> `Other_Grapheme_Extend
  | "Grapheme_Link" -> `Grapheme_Link
  | "IDS_Binary_Operator" -> `IDS_Binary_Operator
  | "IDS_Trinary_Operator" -> `IDS_Trinary_Operator
  | "Radical" -> `Radical
  | "Unified_Ideograph" -> `Unified_Ideograph
  | "Other_default_Ignorable_Code_Point" -> `Other_default_Ignorable_Code_Point
  | "Deprecated" -> `Deprecated
  | "Soft_Dotted" -> `Soft_Dotted
  | "Logical_Order_Exception" -> `Logical_Order_Exception
  | _ -> raise Not_found

let loaded_props = Hashtbl.create 0

let load_property_tbl p = 
  try 
    let b = Hashtbl.find loaded_props p in
    match Weak.get b 0 with
      None -> 
	Hashtbl.remove loaded_props p;
	raise Not_found
    | Some x -> x
  with Not_found ->
    let tbl = read_data (name_of_property p) in
    let b = Weak.create 1 in
    Weak.set b 0 (Some tbl);
    Hashtbl.add loaded_props p b;
    tbl

let load_property_tbl_by_name s = 
   load_property_tbl (property_of_name s)

let loaded_prop_sets = Hashtbl.create 0

let load_property_set p = 
  try 
    let b = Hashtbl.find loaded_prop_sets p in
    match Weak.get b 0 with
      None -> 
	Hashtbl.remove loaded_prop_sets p;
	raise Not_found
    | Some x -> x
  with Not_found ->
    let tbl = read_data ((name_of_property p) ^ "_set") in
    let b = Weak.create 1 in
    Weak.set b 0 (Some tbl);
    Hashtbl.add loaded_prop_sets p b;
    tbl

let load_property_set_by_name s =
  load_property_set (property_of_name s)

(* Scripts *)

let script_tbl : UCharTbl.Bits.t = read_data "scripts"

let script u = script_of_num (UCharTbl.Bits.get script_tbl u)
let load_script_map () = read_data "scripts_map"

(* Casing *)

let cache = Weak.create 3

let load_to_lower1_tbl () =
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "to_lower1" in
      Weak.set cache 0 (Some t);
      t
      
let load_to_upper1_tbl () =
  match Weak.get cache 1 with
    Some t -> t
  | None ->
      let t = read_data "to_upper1" in
      Weak.set cache 1 (Some t);
      t

let load_to_title1_tbl () =
  match Weak.get cache 2 with
    Some t -> t
  | None ->
      let t = read_data "to_title1" in
      Weak.set cache 2 (Some t);
      t

type casemap_condition =
  [ `Locale of string
  | `FinalSigma
  | `AfterSoftDotted
  | `MoreAbove
  | `BeforeDot
  | `Not of casemap_condition ]

type special_casing_property =
  {lower : UChar.t list;
  title : UChar.t list;
  upper : UChar.t list;
  condition : casemap_condition list;} 

let cache = Weak.create 1

let load_conditional_casing_tbl () =
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "special_casing" in
      Weak.set cache 0 (Some t);
      t

let cache = Weak.create 1

let load_casefolding_tbl () =
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "case_folding" in
      Weak.set cache 0 (Some t);
      t

(* Combined class *)

let combined_class_tbl : UCharTbl.Char.t =
  read_data "combined_class"

let combined_class u = Char.code (UCharTbl.Char.get combined_class_tbl u)

(* Decomposition *)

let cache = Weak.create 1

let load_decomposition_tbl () =
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "decomposition" in
      Weak.set cache 0 (Some t);
      t

(* Composition *)

let cache = Weak.create 1

let load_composition_tbl () =
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "composition" in
      Weak.set cache 0 (Some t);
      t

let cache = Weak.create 1

let load_composition_exclusion_tbl () = 
  match Weak.get cache 0 with
    Some t -> t
  | None ->
      let t = read_data "composition_exclusion" in
      Weak.set cache 0 (Some t);
      t
