/*
  ahk正则判断双字节正则为[^\x00-\xff],判断汉字字集范围表达方式为:[\x{范围}-\x{范围}]。例如判断是否是基本汉字[\x{4E00}-\x{9FA5}]+
*/
str:="字集顚亾𢎁礱龑龖瘇龘〇㠩"
result:={},chars:=""
GetStrLen(str,Array)
for key,value In Array
{
  if Type:=CheckChineseCharset(value){
    if !result[Type]{
      result[Type]:=value "、"
    }Else{
      result[Type].=value "、"
    }
  }
}

for key,value In result
{
  if key
    chars.=key ":{ " Trim(value,"、") " }`n"
}
MsgBox % chars
; 中文字集判断
; GBK汉字范围->8140-FEFE
; GB2312汉字范围->B0A1-F7FE
CheckChineseCharset(String){
  VarSetCapacity(str,StrPut(String, "CP936")*4),StrPut(String,&str, "CP936")
  L1:=Format("0x{:X}",NumGet(&str,0,"UChar")),L2:=Format("0x{:X}",NumGet(&str,1,"UChar"))
  if String~="^[\x{4E00}-\x{9FFF}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "非中文"
  }Else if String~="^[\x{3400}-\x{4DBF}]$"{  ; 扩展A
    Return "扩A"
  }Else if String~="^[\x{20000}-\x{2A6DF}]$"{  ; 扩展B
    Return "扩B"
  }Else if String~="^[\x{2A700}-\x{2B73F}]$"{  ; 扩展C
    Return "扩C"
  }Else if String~="^[\x{2B740}-\x{2B81F}]$"{  ; 扩展D
    Return "扩D"
  }Else if String~="^[\x{2B820}-\x{2CEAF}]$"{  ; 扩展E
    Return "扩E"
  }Else if String~="^[\x{2CEB0}-\x{2EBEF}]$"{  ; 扩展F
    Return "扩F"
  }Else if String~="^[\x{30000}-\x{3134F}]$"{  ; 扩展G
    Return "扩G"
  }Else if String~="^[\x{31350}-\x{323BC}]$"{  ; 扩展H
    Return "扩H"
  }Else if String~="^[\x{3007}]$"{  ; 〇
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "〇"
  }Else if String~="^[\x{2F00}-\x{2FD5}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "康熙部首"
  }Else if String~="^[\x{2E80}-\x{2EF3}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "部首扩展"
  }Else if String~="^[\x{F900}-\x{FAD9}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "CJK兼容汉字"
  }Else if String~="^[\x{2F800}-\x{2FA1D}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "CJK兼容扩展"
  }Else if String~="^[\x{E815}-\x{E86F}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "PUA(GBK)部件"
  }Else if String~="^[\x{E400}-\x{E5E8}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "部件扩展"
    Return "部件扩展"
  }Else if String~="^[\x{E600}-\x{E6CF}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "PUA增补"
  }Else if chars~="^[\x{31C0}-\x{31E3}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "汉字笔画"
  }Else if String~="^[\x{2FF0}-\x{2FFB}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "汉字结构"
  }Else if String~="^[\x{3105}-\x{312F}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "汉语注音"
  }Else if String~="^[\x{31A0}-\x{31BA}]$"{
    if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
      Return "GB2312"
    }Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
      Return "GBK"
    }Else
      Return "注音扩展"
  }Else if String~="^[\x00-\xff]$"{
    Return "未知分类"
  }
}

GetStrLen(str,ByRef obj:="") {
  Static Count:=0
  str:=RegExReplace(str,"[\s\t\r\n]")
  obj:=StrSplit(RTrim(RegExReplace(str,"(.)","$1☯☯☯",Count),"☯☯☯"),"☯☯☯")
  Return Count
}

 

声明:站内资源为整理优化好的代码上传分享与学习研究,如果是开源代码基本都会标明出处,方便大家扩展学习路径。请不要恶意搬运,破坏站长辛苦整理维护的劳动成果。本站为爱好者分享站点,所有内容不作为商业行为。如若本站上传内容侵犯了原著者的合法权益,请联系我们进行删除下架。