/* * $Id: main.cpp,v 1.8 2005/02/16 10:45:39 tsuruoka Exp $ */ #include #include #include #include #include #include #include "maxent.h" #include "common.h" #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; string bidir_postag(const string & s, const vector & vme, const vector & cvme, bool dont_tokenize); void bidir_chunking(vector & vs, const vector & vme); void init_morphdic(); static bool dont_tokenize = false; static vector vme(16); static vector vme_chunking(16); static int client_sockfd; static volatile bool connection_closed = false; void iohandler(int dummy) { const int BUFLEN = 10000; char ch; char buf[BUFLEN]; buf[0] = 0; if (read(client_sockfd, buf, BUFLEN) == -1) return; for (int i = 0; i < BUFLEN-1; i++) { if (buf[i] == '\r') { // buf[i] = '\n'; buf[i] = 0; break; } if (buf[i] == '\n') { buf[i] = 0; break; } } string line(buf); if (line.size() == 0) { close(client_sockfd); cerr << "empty line. connection closed." << endl; connection_closed = true; return; } string postagged = bidir_postag(line, vme, vme_chunking, dont_tokenize); postagged += "\n"; write(client_sockfd, postagged.c_str(), postagged.size()); cerr << "."; /* int len = 0; for (int i = 0; i < BUFLEN; i++) { if (buf[i] == '\n') break; len++; buf[i] = toupper(buf[i]); } // cout << "len = " << len << endl; // note) CR+LF? write(client_sockfd, buf, len+1); */ } int wait_for_connection(int port) { int server_sockfd; socklen_t server_len, client_len; struct sockaddr_in server_address; struct sockaddr_in client_address; if ((server_sockfd = socket(PF_INET, SOCK_STREAM, 0)) < 0) { cerr << "error: socket" << endl; exit(1); } bzero((char *)&server_address, sizeof(server_address)); server_address.sin_family = AF_INET; server_address.sin_addr.s_addr = htonl(INADDR_ANY); // server_address.sin_addr.s_addr = inet_addr("127.0.0.1"); server_address.sin_port = htons(port); int on = 1; setsockopt(server_sockfd, SOL_SOCKET, SO_REUSEADDR, (char*) &on, sizeof(on)); server_len = sizeof(server_address); if (bind(server_sockfd, (struct sockaddr *)&server_address, server_len) < 0){ cerr << "error. bind(): " << strerror(errno) << endl; exit(1); } if (listen(server_sockfd, 5) < 0) { cerr << "error. listen(): " << strerror(errno) << endl; exit(1); } cerr << "GENIA Tagger server is waiting at port " << port << "." << endl; client_len = sizeof(client_address); client_sockfd = accept(server_sockfd, (struct sockaddr *)&client_address, &client_len); if (client_sockfd == -1) { cerr << "error. accept(): " << strerror(errno) << endl; exit(1); } cerr << "connected." << endl; close(server_sockfd); struct sigaction act; act.sa_handler = iohandler; sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGIO, &act, NULL); if (fcntl(client_sockfd, F_SETOWN, getpid()) == -1) { cerr << "error. fcntl(): " << strerror(errno) << endl; exit(1); } if (fcntl(client_sockfd, F_SETFL, O_NONBLOCK|O_ASYNC) == -1) { cerr << "error. fcntl(): " << strerror(errno) << endl; exit(1); } while(1) { if (connection_closed) { connection_closed = false; return 0; } } close(client_sockfd); exit(0); } void help() { cout << "Usage: geniatagger [OPTION]... [FILE]..." << endl; cout << "Analyze English sentences and print the base forms, part-of-speech tags, and" << endl; cout << "chunk tags." << endl; cout << endl; cout << "Options:" << endl; cout << " --server [PORT] server mode" << endl; cout << " -nt don't perform tokenization" << endl; cout << " --help display this help and exit" << endl; cout << endl; cout << "Report bugs to ." << endl; } void version() { cout << "GENIA Tagger 2.0.1" << endl << endl; } int main(int argc, char** argv) { istream *is(&std::cin); string ifilename, ofilename; int port = -1; for (int i = 1; i < argc; i++) { string v = argv[i]; if (v == "-nt") { dont_tokenize = true; continue; } if (v == "--server") { port = atoi(argv[i+1]); i++; continue; } if (v == "--help") { help(); exit(0); } ifilename = argv[i]; } ifstream ifile; if (ifilename != "" && ifilename != "-") { ifile.open(ifilename.c_str()); if (!ifile) { cerr << "error: cannot open " << ifilename << endl; exit(1); } is = &ifile; } init_morphdic(); cerr << "loading pos_models"; for (int i = 0; i < 16; i++) { char buf[1000]; sprintf(buf, "./models_medline/model.bidir.%d", i); vme[i].load_from_file(buf); cerr << "."; } cerr << "done." << endl; cerr << "loading chunk_models"; for (int i = 0; i < 8; i +=2 ) { char buf[1000]; sprintf(buf, "./models_chunking/model.bidir.%d", i); vme_chunking[i].load_from_file(buf); cerr << "."; } cerr << "done." << endl; if (port > 0) { while (1) { wait_for_connection(port); } exit(0); } string line; while (getline(*is, line)) { string postagged = bidir_postag(line, vme, vme_chunking, dont_tokenize); cout << postagged << endl; } } /* * $Log: main.cpp,v $ * Revision 1.8 2005/02/16 10:45:39 tsuruoka * acl05 submit * * Revision 1.7 2005/01/10 13:44:36 tsuruoka * a * * Revision 1.6 2004/12/30 10:34:56 tsuruoka * add bidir_decode_search * * Revision 1.5 2004/12/25 10:47:12 tsuruoka * add load_tag_dictionary() * * Revision 1.4 2004/12/25 09:22:57 tsuruoka * add make_tag_dictionary() * * Revision 1.3 2004/12/21 13:54:46 tsuruoka * add bidir.cpp * * Revision 1.2 2004/12/20 12:06:24 tsuruoka * change the data * * Revision 1.1 2004/07/16 13:40:42 tsuruoka * init * */