/* ahk正则判断双字节正则为[^\x00-\xff],判断汉字字集范围表达方式为:[\x{范围}-\x{范围}]。例如判断是否是基本汉字[\x{4E00}-\x{9FA5}]+ */ str:="字集顚亾𢎁礱龑龖瘇龘〇㠩" result:={},chars:="" GetStrLen(str,Array) for key,value In Array { if Type:=CheckChineseCharset(value){ if !result[Type]{ result[Type]:=value "、" }Else{ result[Type].=value "、" } } } for key,value In result { if key chars.=key ":{ " Trim(value,"、") " }`n" } MsgBox % chars ; 中文字集判断 ; GBK汉字范围->8140-FEFE ; GB2312汉字范围->B0A1-F7FE CheckChineseCharset(String){ VarSetCapacity(str,StrPut(String, "CP936")*4),StrPut(String,&str, "CP936") L1:=Format("0x{:X}",NumGet(&str,0,"UChar")),L2:=Format("0x{:X}",NumGet(&str,1,"UChar")) if String~="^[\x{4E00}-\x{9FFF}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "非中文" }Else if String~="^[\x{3400}-\x{4DBF}]$"{ ; 扩展A Return "扩A" }Else if String~="^[\x{20000}-\x{2A6DF}]$"{ ; 扩展B Return "扩B" }Else if String~="^[\x{2A700}-\x{2B73F}]$"{ ; 扩展C Return "扩C" }Else if String~="^[\x{2B740}-\x{2B81F}]$"{ ; 扩展D Return "扩D" }Else if String~="^[\x{2B820}-\x{2CEAF}]$"{ ; 扩展E Return "扩E" }Else if String~="^[\x{2CEB0}-\x{2EBEF}]$"{ ; 扩展F Return "扩F" }Else if String~="^[\x{30000}-\x{3134F}]$"{ ; 扩展G Return "扩G" }Else if String~="^[\x{31350}-\x{323BC}]$"{ ; 扩展H Return "扩H" }Else if String~="^[\x{3007}]$"{ ; 〇 if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "〇" }Else if String~="^[\x{2F00}-\x{2FD5}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "康熙部首" }Else if String~="^[\x{2E80}-\x{2EF3}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "部首扩展" }Else if String~="^[\x{F900}-\x{FAD9}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "CJK兼容汉字" }Else if String~="^[\x{2F800}-\x{2FA1D}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "CJK兼容扩展" }Else if String~="^[\x{E815}-\x{E86F}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "PUA(GBK)部件" }Else if String~="^[\x{E400}-\x{E5E8}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "部件扩展" Return "部件扩展" }Else if String~="^[\x{E600}-\x{E6CF}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "PUA增补" }Else if chars~="^[\x{31C0}-\x{31E3}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "汉字笔画" }Else if String~="^[\x{2FF0}-\x{2FFB}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "汉字结构" }Else if String~="^[\x{3105}-\x{312F}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "汉语注音" }Else if String~="^[\x{31A0}-\x{31BA}]$"{ if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){ Return "GB2312" }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){ Return "GBK" }Else Return "注音扩展" }Else if String~="^[\x00-\xff]$"{ Return "未知分类" } } GetStrLen(str,ByRef obj:="") { Static Count:=0 str:=RegExReplace(str,"[\s\t\r\n]") obj:=StrSplit(RTrim(RegExReplace(str,"(.)","$1☯☯☯",Count),"☯☯☯"),"☯☯☯") Return Count }
声明:站内资源为整理优化好的代码上传分享与学习研究,如果是开源代码基本都会标明出处,方便大家扩展学习路径。请不要恶意搬运,破坏站长辛苦整理维护的劳动成果。本站为爱好者分享站点,所有内容不作为商业行为。如若本站上传内容侵犯了原著者的合法权益,请联系我们进行删除下架。
评论(0)