UTF-8 ⇔ UTF-16 変換関数 (OCaml)
UTF-8とUTF-16の文字列を相互変換する関数を作成した。
リスト処理を贅沢に使っているので,遅いかもしれない。
open ExtString (* UTF-8 → UTF-16BE(BOM付) 変換 *) let utf8_to_utf16be s = let rec loop = function | [] -> [] | ch0 :: rest when (ch0 land 0x80) = 0x00 -> (ch0 land 0x7f) :: loop rest | ch0 :: ch1 :: rest when (ch0 land 0xe0) = 0xc0 -> (((ch0 land 0x3f) lsl 6) lor (ch1 land 0x3f)) :: loop rest | ch0 :: ch1 :: ch2 :: rest -> (((ch0 land 0x0f) lsl 12) lor ((ch1 land 0x3f) lsl 6) lor (ch2 land 0x3f)) :: loop rest | _ -> raise (Invalid_argument s) in let chars_of_int i = [char_of_int ((i lsr 8) land 0xff); char_of_int (i land 0xff)] in let utf16s = loop (List.map int_of_char (String.explode s)) in String.implode (List.flatten (List.map chars_of_int (0xfeff :: utf16s))) (* UTF-16BE(BOM付) → UTF-8 変換 *) let utf16be_to_utf8 s = let rec loop = function | [] -> [] | ch0 :: ch1 :: rest -> let wch = (ch0 lsl 8) lor ch1 in let bytes = if wch <= 0x007f then [wch land 0x007f] else if wch <= 0x07ff then [((wch land 0x07c0) lsr 6) lor 0xc0; ((wch land 0x003f) ) lor 0x80] else [((wch land 0xf000) lsr 12) lor 0xe0; ((wch land 0x0fc0) lsr 6) lor 0x80; ((wch land 0x003f) ) lor 0x80] in bytes @ loop rest | _ -> raise (Invalid_argument s) in let chars = match String.explode s with | '\xfe' :: '\xff' :: rest -> List.map char_of_int (loop (List.map int_of_char rest)) | s -> s in String.implode chars