From 9949db189acb0ebb534adcaeeaeeafd2dbe87787 Mon Sep 17 00:00:00 2001 From: "taku.ku@gmail.com" Date: Sun, 2 Mar 2014 07:17:02 +0000 Subject: [PATCH] --- config.h.in | 6 ++++++ configure | 15 +++++++++++++++ configure.in | 12 ++++++++++++ man/Makefile | 10 +++++----- src/morph.cpp | 10 +++++++++- src/parser.cpp | 1 + src/ucs.cpp | 8 ++++++++ 7 files changed, 56 insertions(+), 6 deletions(-) diff --git a/config.h.in b/config.h.in index 49fed08..9490fa1 100644 --- a/config.h.in +++ b/config.h.in @@ -24,6 +24,12 @@ /* Define if you have the iconv() function and it works. */ #undef HAVE_ICONV +/* */ +#undef HAVE_ICONV_CP932 + +/* */ +#undef HAVE_ICONV_EUC_JP_MS + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H diff --git a/configure b/configure index c39e605..67e72b1 100755 --- a/configure +++ b/configure @@ -2573,6 +2573,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + + am__api_version='1.11' ac_aux_dir= @@ -17721,6 +17723,19 @@ then LIBS="$LIBS $MECAB_LIBS" fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iconv supports EUC-JP-MS and CP932" >&5 +$as_echo_n "checking whether iconv supports EUC-JP-MS and CP932... " >&6; } +if iconv -l | grep -i 'euc-jp-ms' > /dev/null; then +$as_echo "#define HAVE_ICONV_EUC_JP_MS 1" >>confdefs.h + +fi + +if iconv -l | grep -i 'cp932' > /dev/null; then +$as_echo "#define HAVE_ICONV_CP932 1" >>confdefs.h + +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lstdc++" >&5 $as_echo_n "checking for main in -lstdc++... " >&6; } if ${ac_cv_lib_stdcpp_main+:} false; then : diff --git a/configure.in b/configure.in index 296c0fb..56901e1 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,7 @@ AC_INIT(src/cabocha.cpp) AH_TEMPLATE([HAVE_TLS_KEYWORD], []) +AH_TEMPLATE([HAVE_ICONV_EUC_JP_MS], []) +AH_TEMPLATE([HAVE_ICONV_CP932], []) AM_INIT_AUTOMAKE(cabocha, 0.68) AM_MAINTAINER_MODE @@ -109,6 +111,16 @@ then LIBS="$LIBS $MECAB_LIBS" fi + +AC_MSG_CHECKING(whether iconv supports EUC-JP-MS and CP932) +if iconv -l | grep -i 'euc-jp-ms' > /dev/null; then +AC_DEFINE([HAVE_ICONV_EUC_JP_MS]) +fi + +if iconv -l | grep -i 'cp932' > /dev/null; then +AC_DEFINE([HAVE_ICONV_CP932]) +fi + AC_CHECK_LIB(stdc++, main, STDCPP_LIBS="-lstdc++") AC_CHECK_LIB(crfpp, crfpp_new, STDCRFPP_LIBS="-lcrfpp") AC_CHECK_LIB(mecab, mecab_new, CRFPP_LIBS="-lmecab") diff --git a/man/Makefile b/man/Makefile index 94c47a5..ddeff7f 100644 --- a/man/Makefile +++ b/man/Makefile @@ -95,7 +95,7 @@ CABOCHA_USE_UTF8_ONLY = CC = gcc CCDEPMODE = depmode=none CFLAGS = -O3 -Wno-deprecated -Wall -CHARSET = utf8 +CHARSET = EUC-JP CPP = gcc -E CPPFLAGS = CXX = g++ @@ -131,7 +131,7 @@ LN_S = ln -s LTLIBICONV = LTLIBOBJS = LTVERSION = 4:0:0 -MAINT = # +MAINT = MAKEINFO = ${SHELL} /home/taku/proj/cabocha/missing --run makeinfo MANIFEST_TOOL = : MECAB_CFLAGS = @@ -218,7 +218,7 @@ EXTRA_DIST = $(man_MANS) all: all-am .SUFFIXES: -$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -243,9 +243,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: # $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): # $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/morph.cpp b/src/morph.cpp index 7a6f345..52cdb4d 100644 --- a/src/morph.cpp +++ b/src/morph.cpp @@ -153,9 +153,10 @@ bool MorphAnalyzer::open(const Param ¶m) { CHECK_FALSE(action_mode() == PARSING_MODE) << "MorphAnalyzer supports PARSING_MODE only"; - const std::string mecabrc = param.get("mecabrc"); std::vector argv; argv.push_back(param.program_name()); + + const std::string mecabrc = param.get("mecabrc"); if (!mecabrc.empty()) { argv.push_back("-r"); argv.push_back(mecabrc.c_str()); @@ -168,6 +169,13 @@ bool MorphAnalyzer::open(const Param ¶m) { argv.push_back(mecabdic.c_str()); } + const std::string mecabuserdic = + param.get("mecab-userdic"); + if (!mecabuserdic.empty()) { + argv.push_back("-u"); + argv.push_back(mecabuserdic.c_str()); + } + mecab_ = mecab_new_f(argv.size(), const_cast(&argv[0])); CHECK_FALSE(mecab_) << mecab_strerror_f(0); diff --git a/src/parser.cpp b/src/parser.cpp index a1e9081..620b225 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -61,6 +61,7 @@ const CaboCha::Option long_options[] = { { "rcfile", 'r', 0, "FILE", "use FILE as resource file" }, { "mecabrc", 'b', 0, "FILE", "use FILE as mecabrc"}, { "mecab-dicdir", 'd', 0, "DIR", "use DIR as mecab dictionary directory"}, + { "mecab-userdic", 'u', 0, "FILE", "use FILE as mecab user directory"}, { "output", 'o', 0, "FILE", "use FILE as output file"}, { "version", 'v', 0, 0, "show the version and exit"}, { "help", 'h', 0, 0, "show this help and exit"}, diff --git a/src/ucs.cpp b/src/ucs.cpp index 782e1fd..2f85e9d 100644 --- a/src/ucs.cpp +++ b/src/ucs.cpp @@ -162,9 +162,17 @@ const char *encode_charset(CharsetType charset) { case UTF8: return "UTF8"; case EUC_JP: +#ifdef HAVE_ICONV_EUC_JP_MS + return "EUC-JP-MS"; +#else return "EUC-JP"; +#endif case CP932: +#ifdef HAVE_ICONV_CP932 return "CP932"; +#else + return "SHIFT_JIS"; +#endif default: std::cerr << "charset " << charset << " is not defined, use " CABOCHA_DEFAULT_CHARSET;