@inproceedings{805ea078d9c34485a554ed5229b231dc,
title = "Improved Chinese-Japanese phrase-based MT quality using an extended quasi-parallel corpus",
abstract = "State-of-the-art phrase-based machine translation (MT) systems usually demand large parallel corpora in the step of training. The quality and the quantity of the training data exert a direct influence on the performance of such translation systems. The lack of open-source bilingual corpora for a particular language pair results in lower translation scores reported for such a language pair. This is the case of Chinese-Japanese. In this paper, we propose to build an extension of an initial parallel corpus in the form of quasi-parallel sentences, instead of adding new parallel sentences. The extension of the initial corpus is obtained by using monolingual analogical associations. Our experiments show that the use of such quasi-parallel corpora improves the performance of Chinese-Japanese translation systems.",
keywords = "analogy, machine translation, paraphrasing, quasi-parallel data",
author = "Hao Wang and Wei Yang and Yves Lepage",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 2nd IEEE International Conference on Progress in Informatics and Computing, PIC 2014 ; Conference date: 16-05-2014 Through 18-05-2014",
year = "2014",
month = dec,
day = "2",
doi = "10.1109/PIC.2014.6972285",
language = "English",
series = "PIC 2014 - Proceedings of 2014 IEEE International Conference on Progress in Informatics and Computing",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6--10",
editor = "Yinglin Wang and Xuelong Li and Hongming Cai",
booktitle = "PIC 2014 - Proceedings of 2014 IEEE International Conference on Progress in Informatics and Computing",
}