mecabでinput-buffer overflow問題の回避策
改行を末尾につけると解決する模様.
#!/usr/bin/env python # -*- coding: utf-8 -*- import os message = "Hello World" # 下記警告が出て,同じ分かち書き文章が2回表示される. # input-buffer overflow. The line is split. use -b #SIZE option. print "[START] TEST1" with open("/tmp/test1", "w") as fp: fp.write(message) os.system("mecab /tmp/test1") print "[END] TEST1\n" # bオプションを追加しても同様のメッセージが追加される. # input-buffer overflow. The line is split. use -b #SIZE option. print "[START] TEST2" with open("/tmp/test2", "w") as fp: fp.write(message) os.system("mecab /tmp/test2 -b 100000") print "[END] TEST2\n" # 改行コードを入れるとでなくなる. print "[START] TEST3" with open("/tmp/test3", "w") as fp: fp.write(message + "\n") os.system("mecab /tmp/test3") print "[END] TEST3"
出力結果
[START] TEST1 Hello 名詞,固有名詞,*,*,*,*,Hello,*,*,wikipedia, World 名詞,固有名詞,組織,*,*,*,* EOS input-buffer overflow. The line is split. use -b #SIZE option. Hello 名詞,固有名詞,*,*,*,*,Hello,*,*,wikipedia, World 名詞,固有名詞,組織,*,*,*,* EOS [END] TEST1 [START] TEST2 Hello 名詞,固有名詞,*,*,*,*,Hello,*,*,wikipedia, World 名詞,固有名詞,組織,*,*,*,* EOS input-buffer overflow. The line is split. use -b #SIZE option. Hello 名詞,固有名詞,*,*,*,*,Hello,*,*,wikipedia, World 名詞,固有名詞,組織,*,*,*,* EOS [END] TEST2 [START] TEST3 Hello 名詞,固有名詞,*,*,*,*,Hello,*,*,wikipedia, World 名詞,固有名詞,組織,*,*,*,* EOS [END] TEST3