打包下载地址【包含v1和v2示例】:

 

代码片段:

SetBatchLines -1
SetWorkingDir %A_ScriptDir%

dllcall("LoadLibrary", "str", "Jieba.dll")

jieba := New cppJieba()
MsgBox % jieba.cut("我来到北京清华大学") ; => 我/来到/北京/清华大学
MsgBox % jieba.cut("我来到北京清华大学", true) ; => 我/来到/北京/清华/清华大学/华大/大学
MsgBox % jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") ; => 小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/后/在/日本/京都/大学/日本京都大学/深造
MsgBox % jieba.cut("行走的银行") ; => 行走/的/银行

class cppJieba {
  __new(DICT_PATH := "./dict/jieba.dict.utf8", HMM_PATH := "./dict/hmm_model.utf8", USER_DICT_PATH := "./dict/user.dict.utf8", IDF_PATH := "./dict/idf.utf8", STOP_WORD_PATH := "./dict/stop_words.utf8") {
    this.DICT_PATH := DICT_PATH
    this.HMM_PATH := HMM_PATH
    this.USER_DICT_PATH := USER_DICT_PATH
    this.IDF_PATH := IDF_PATH
    this.STOP_WORD_PATH := STOP_WORD_PATH
    this.loadFlag := false
  }

  load() {
    cppJieba.strToUtf8(this.DICT_PATH, buf_DICT_PATH)
    cppJieba.strToUtf8(this.HMM_PATH, buf_HMM_PATH)
    cppJieba.strToUtf8(this.USER_DICT_PATH, buf_USER_DICT_PATH)
    cppJieba.strToUtf8(this.IDF_PATH, buf_IDF_PATH)
    cppJieba.strToUtf8(this.STOP_WORD_PATH, buf_STOP_WORD_PATH)

    this.cppJieba := dllcall("Jieba.dll\init", "ptr", &buf_DICT_PATH, "ptr", &buf_HMM_PATH, "ptr", &buf_USER_DICT_PATH, "ptr", &buf_IDF_PATH, "ptr", &buf_STOP_WORD_PATH)

    this.loadFlag := true
  }
  
  cut(str, cut_all := false, hmm_flag := true, segFlag := "/") {
    lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",")
    if !this.loadFlag
      this.load()
    for _, i in lst_str {
      if !i
        continue
      cppJieba.strToUtf8(i, cutStr)
      if cut_all
        cppJieba.strToUtf8(segFlag, buf_segFlag)
        , ret .= strget(dllcall("Jieba.dll\cut_all", "ptr", this.cppJieba, "ptr", &cutStr, "ptr", &buf_segFlag), , "utf-8") "/"
      else
        cppJieba.strToUtf8(segFlag, buf_segFlag)
        , ret .= strget(dllcall("Jieba.dll\cut", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/"
    }
    return substr(ret, 1, strlen(ret) - 1)
  }
  
  cut_for_search(str, hmm_flag := true, segFlag := "/") {
    lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",")
    if !this.loadFlag
      this.load()
    for _, i in lst_str {
      if !i
        continue
      cppJieba.strToUtf8(i, cutStr)
      cppJieba.strToUtf8(segFlag, buf_segFlag)
      ret .= strget(dllcall("Jieba.dll\cut_for_search", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/"
    }
    return substr(ret, 1, strlen(ret) - 1)
  }
  
  free() {
    try dllcall("Jieba.dll\free_jieba", "ptr", this.cppJieba)
  }
  
  load_userdict(path) {
    cppJieba.strToUtf8(path, buf_path)
    dllcall("Jieba.dll\load_userdict", "ptr", this.cppJieba, "ptr", &buf_path)
  }
  
  __delete() {
    this.free()
  }

  strToUtf8(str, ByRef buf) {
    VarSetCapacity(buf, strput(str, "utf-8"))
    return strput(str, &buf, "utf-8")
  }
}
声明:站内资源为整理优化好的代码上传分享与学习研究,如果是开源代码基本都会标明出处,方便大家扩展学习路径。请不要恶意搬运,破坏站长辛苦整理维护的劳动成果。本站为爱好者分享站点,所有内容不作为商业行为。如若本站上传内容侵犯了原著者的合法权益,请联系我们进行删除下架。