TASK=hi-en
SCRIPT=${path}/script.segmentation.distribution
INDIC_SCRIPT=${path}/${INDIC_LIBRARY}/src/indicnlp
MOSES_SCRIPT=${path}/mosesdecoder-RELEASE-2.1.1/scripts
RIBES=${path}/RIBES-1.02.4
mkdir tests.tok
mkdir results.tok
Indic tokenizer (for bn, hi, ml, ta, and te) for file in tests results; do
python ${INDIC_SCRIPT}/normalize/indic_normalize.py ${file}.org/${TASK}.txt ${file}.org/${TASK}.normalized.txt {bn,hi,ml,ta,te}
python ${INDIC_SCRIPT}/tokenize/indic_tokenize.py ${file}.org/${TASK}.normalized.txt ${file}.tok/${TASK}.indic.txt {bn,hi,ml,ta,te}
done
Indic tokenizer (for ur and si) for file in tests results; do
python ${INDIC_SCRIPT}/tokenize/indic_tokenize.py ${file}.org/${TASK}.txt ${file}.tok/${TASK}.indic.txt {ur,si}
done
BLEU perl ${MOSES_SCRIPT}/generic/multi-bleu.perl tests.tok/${TASK}.indic.txt < results.tok/${TASK}.indic.txt
RIBES python3 ${RIBES}/RIBES.py -c -r tests.tok/${TASK}.indic.txt results.tok/${TASK}.indic.txt
JST (Japan Science and Technology Agency)
NICT (National Institute of Information and Communications Technology)
Kyoto University
Last Modified: 2016-07-06