TASK=ja-ko
SCRIPT=${path}/script.segmentation.distribution
MOSES_SCRIPT=${path}/mosesdecoder-RELEASE-2.1.1/scripts
RIBES=${path}/RIBES-1.02.4
mkdir tests.tok
mkdir results.tok
MeCab-ko for file in tests results; do
cat ${file}.org/${TASK}.txt | \
perl -Mencoding=utf8 -pe 's/(.)[[0-9.]+]$/${1}/;' | \
perl ${SCRIPT}/h2z-utf8-without-space.pl | \
mecab -O wakati | |
perl -Mencoding=utf8 -pe 'while(s/([0-9]) ([0-9])/$1$2/g){} s/([0-9]) (.) ([0-9])/$1$2$3/g; while(s/([A-Z]) ([A-Za-z])/$1$2/g){} while(s/([a-z]) ([a-z])/$1$2/g){} s/ $//;' \
> ${file}.tok/${TASK}.mecab.txt
done
BLEU for segmenter in mecab; do
perl ${MOSES_SCRIPT}/generic/multi-bleu.perl tests.tok/${TASK}.${segmenter}.txt < results.tok/${TASK}.${segmenter}.txt
done
RIBES for segmenter in mecab; do
python3 ${RIBES}/RIBES.py -c -r tests.tok/${TASK}.${segmenter}.txt results.tok/${TASK}.${segmenter}.txt
done
JST (Japan Science and Technology Agency)
NICT (National Institute of Information and Communications Technology)
Kyoto University
Last Modified: 2015-03-20