打包下载地址【包含v1和v2示例】:
代码片段:
SetBatchLines -1 SetWorkingDir %A_ScriptDir% dllcall("LoadLibrary", "str", "Jieba.dll") jieba := New cppJieba() MsgBox % jieba.cut("我来到北京清华大学") ; => 我/来到/北京/清华大学 MsgBox % jieba.cut("我来到北京清华大学", true) ; => 我/来到/北京/清华/清华大学/华大/大学 MsgBox % jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") ; => 小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/后/在/日本/京都/大学/日本京都大学/深造 MsgBox % jieba.cut("行走的银行") ; => 行走/的/银行 class cppJieba { __new(DICT_PATH := "./dict/jieba.dict.utf8", HMM_PATH := "./dict/hmm_model.utf8", USER_DICT_PATH := "./dict/user.dict.utf8", IDF_PATH := "./dict/idf.utf8", STOP_WORD_PATH := "./dict/stop_words.utf8") { this.DICT_PATH := DICT_PATH this.HMM_PATH := HMM_PATH this.USER_DICT_PATH := USER_DICT_PATH this.IDF_PATH := IDF_PATH this.STOP_WORD_PATH := STOP_WORD_PATH this.loadFlag := false } load() { cppJieba.strToUtf8(this.DICT_PATH, buf_DICT_PATH) cppJieba.strToUtf8(this.HMM_PATH, buf_HMM_PATH) cppJieba.strToUtf8(this.USER_DICT_PATH, buf_USER_DICT_PATH) cppJieba.strToUtf8(this.IDF_PATH, buf_IDF_PATH) cppJieba.strToUtf8(this.STOP_WORD_PATH, buf_STOP_WORD_PATH) this.cppJieba := dllcall("Jieba.dll\init", "ptr", &buf_DICT_PATH, "ptr", &buf_HMM_PATH, "ptr", &buf_USER_DICT_PATH, "ptr", &buf_IDF_PATH, "ptr", &buf_STOP_WORD_PATH) this.loadFlag := true } cut(str, cut_all := false, hmm_flag := true, segFlag := "/") { lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",") if !this.loadFlag this.load() for _, i in lst_str { if !i continue cppJieba.strToUtf8(i, cutStr) if cut_all cppJieba.strToUtf8(segFlag, buf_segFlag) , ret .= strget(dllcall("Jieba.dll\cut_all", "ptr", this.cppJieba, "ptr", &cutStr, "ptr", &buf_segFlag), , "utf-8") "/" else cppJieba.strToUtf8(segFlag, buf_segFlag) , ret .= strget(dllcall("Jieba.dll\cut", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/" } return substr(ret, 1, strlen(ret) - 1) } cut_for_search(str, hmm_flag := true, segFlag := "/") { lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",") if !this.loadFlag this.load() for _, i in lst_str { if !i continue cppJieba.strToUtf8(i, cutStr) cppJieba.strToUtf8(segFlag, buf_segFlag) ret .= strget(dllcall("Jieba.dll\cut_for_search", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/" } return substr(ret, 1, strlen(ret) - 1) } free() { try dllcall("Jieba.dll\free_jieba", "ptr", this.cppJieba) } load_userdict(path) { cppJieba.strToUtf8(path, buf_path) dllcall("Jieba.dll\load_userdict", "ptr", this.cppJieba, "ptr", &buf_path) } __delete() { this.free() } strToUtf8(str, ByRef buf) { VarSetCapacity(buf, strput(str, "utf-8")) return strput(str, &buf, "utf-8") } }
声明:站内资源为整理优化好的代码上传分享与学习研究,如果是开源代码基本都会标明出处,方便大家扩展学习路径。请不要恶意搬运,破坏站长辛苦整理维护的劳动成果。本站为爱好者分享站点,所有内容不作为商业行为。如若本站上传内容侵犯了原著者的合法权益,请联系我们进行删除下架。
评论(0)