#include <stdafx.h>
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <afx.h>
#include <algorithm>
#include "C:\\Program Files\\Mecab\\sdk\\mecab.h"
#pragma comment (lib, "C:\\Program Files\\Mecab\\sdk\\libmecab.lib")
#pragma warning (disable:4786)
using namespace std;
int main()
{
int argc =2;
char *argv[] = {"mecab.exe", "--node-format=%m,%f[0]\n", NULL};
vector<string> vsFile;
vector<string> vsOut;
string test1;
string test2;
string test3 = "\n";
string test4;
// string str1;
string str2;
int i,k=0;
unsigned long int j;
// ファイルの読み込み
string folder = "SampleText_0007\\*.txt";
CFileFind cFile;
BOOL bFile = cFile.FindFile( (LPCTSTR)folder.c_str() );
while( bFile ) {
bFile = cFile.FindNextFile();
vsFile.push_back( (LPCTSTR)cFile.GetFilePath() );
}
cout << vsFile.size() << endl;
// 出力用のファイルを開く
ofstream fout;
fout.open("SampleText_0007_result.txt", std::ios::out | std::ios::app );
mecab_t * mecab = mecab_new( argc, argv );
while( k < vsFile.size() ){
cout << k << endl;
// vsFileのk番目のファイルを開く
ifstream fin;
fin.open( vsFile[k].c_str() );
if( fin.fail() ) {
cout << "ERROR : fin.open() : " << endl;
return 0;
}
string str1;
for( i=0; !fin.eof(); i++ ) {
getline( fin , str2 );
str1 += str2;
}
string result = mecab_sparse_tostr( mecab, (char*)str1.c_str() );
result.erase( result.size()-4, result.size() );
int a2 = result.find( test3 );
test2 = result.substr( 0 , a2 );
int a1 = test2.find( ',' );
if( a1 != (-1) ){
test1 = test2.substr( 0 , a1 );
test2 = test2.substr( a1+1 , test2.size() );
test4 = test1;
}
if( test2.compare( "名詞" ) == 0 ){
j = find( vsOut.begin(), vsOut.end(), test4.c_str() ) - vsOut.begin();
if( j != vsOut.size() ){
test4.erase( 0, test4.size() );
}
}
else if( test2.compare( "名詞" ) != 0 ){
test4.erase( 0, test4.size() );
}
while(1){
string tmp;
string::iterator iout= result.begin();
iout += (a2+1);
while( *iout != '\n' ){
tmp += *iout;
iout++;
}
a2 = result.find( test3 , a2+test3.size() );
if( a2 == (-1) )
break;
a1 = tmp.find( ',' );
if( a1 != (-1) ){
test1 = tmp.substr( 0 , a1 );
test2 = tmp.substr( a1+1, tmp.size() );
}
if( test2.compare( "名詞" ) == 0 ){
test4 += test1;
continue;
}else if( test2.compare( "名詞" ) != 0 ) {
if( test4.empty() )
continue;
/* // 重複のチェック
ifstream check;
check.open( "result.txt" );
for( int s = 0; !check.eof(); s++ ){
getline( check , tmp );
j = tmp.compare( test4 );
if( j == 0 )
break;
}
check.close();
*/
/* // 重複のチェック
for( int s = 0; s<vsOut.size(); s++ ){
j = test4.compare( vsOut[s].c_str() );
if( j == 0 ){
cout << test4.c_str() << endl;
cout << vsOut[s].c_str() << endl;
test4.erase( 0, test4.size() );
break;
}
}
if( j == 0 ){
j = -1;
continue;
}
*/
// 重複のチェック(vector)
j = find( vsOut.begin(), vsOut.end(), test4.c_str() ) - vsOut.begin();
if( j != vsOut.size() ){
test4.erase( 0, test4.size() );
continue;
}
vsOut.push_back( test4.c_str() );
test4.erase( 0, test4.size() );
}
}
fin.close();
k++;
}
mecab_destroy(mecab);
sort( vsOut.begin(), vsOut.end() );
for( i=0; i<vsOut.size(); i++ ){
// cout << vsOut[i].c_str() << endl;
fout << vsOut[i].c_str() << endl;
}
fout.close();
return 0;
}
'etc' 카테고리의 다른 글
키보드 자판배열을 일본식으로 바꾸자 (0) | 2012.12.27 |
---|---|
find関数の裏技 (0) | 2008.01.18 |
kadai (0) | 2008.01.09 |
プログラミングB テキストリストファイル (0) | 2008.01.09 |
keitaiso_sourcecord (0) | 2007.12.22 |