-
Notifications
You must be signed in to change notification settings - Fork 1
/
posfilter.cpp
68 lines (62 loc) · 1.89 KB
/
posfilter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include<iostream>
#include<fstream>
#include<string>
#include<set>
#include<cstring>
using namespace std;
int main(int argc, char *argv[])
{
if (argc < 2) { //命令行中需要给定要处理的文件名
cout << "Usage:command filename" << endl;
return 1;
}
string arr_pos[] = { "/n", //名词
"/nr", //人名
"/nr1", //汉语姓氏
"/nr2", //汉语名字
"/nrj", //日语人名
"/ns", //地名
"/nt", //机构团体名
"/wj", //句号
"/nl", //名词性惯用语
"/ng", //名词性语素
"/v", //动词
"/vd", //副动词
"/vn", //名动词
"/vl", //动词性惯用语
"/vg", //动词性语素
"/a", //形容词
"/an", //名形词
"/ag", //形容词性语素
"/al", //形容词性惯用语
""
};
set < string > set_pos;
int i;
for (i = 0; arr_pos[i] != ""; ++i)
set_pos.insert(arr_pos[i]);
string filename(argv[1]);
string outfile = filename + "_pos";
ifstream ifs(filename.c_str()); //打开输入文件
ofstream ofs(outfile.c_str()); //打开输出文件
if (!(ifs && ofs)) {
cerr << "error:open file failed." << endl;
return 1;
}
string word;
while (ifs >> word) {
bool flag = false;
int length = word.find("/");
//cout<<word<<"\t"<<length<<endl;
if (length == 3 && strncmp(word.c_str(), "。", 3) != 0) //过滤掉单个汉字,但是不过滤全角句号(因为一个汉字就3个字节)
continue;
string pos = word.substr(length);
if (set_pos.find(pos) != set_pos.end())
flag = true;
if (flag)
ofs << word << "\t";
}
ifs.close();
ofs.close();
return 0;
}