-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathserver.py
502 lines (451 loc) · 21 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
#### MEMORY-BASED SHALLOW PARSER ######################################################################
# Copyright (c) 2003-2010 University of Antwerp, Belgium and Tilburg University, The Netherlands
# Vincent Van Asch <vincent.vanasch@ua.ac.be>, Tom De Smedt <tom@organisms.be>
# License: GNU General Public License, see LICENSE.txt
### SERVER ###########################################################################################
# Functionality for starting TiMBL and MBT localhost servers at the ports defined in config.py.
# Since TiMBL, MBT and MBLEM require a lot of lookup data
# (they are memory-based instead of rule-based - see the data files in /models)
# it is better to load the data once in a server process instead of every time your run a script.
# The Server class can also be used to easily set up your own TiMBL servers in Python.
import sys, os, tempfile, subprocess, signal, socket, atexit, time
import config
import client
from config import LOCALHOST, WINDOWS
#--- SERVER ------------------------------------------------------------------------------------------
# Location of the TiMBL and MBT executables.
# These are the processes started in server-mode.
TIMBL = config.paths['timbl']
MBT = config.paths['mbt']
# Forgetting to modify config.py path to "TimblServer" instead of "Timbl" is common,
# we silently overlook this.
if not os.path.exists(TIMBL) \
and os.path.exists(TIMBL + "Server"):
TIMBL += "Server"
# The number of allowed concurrent connections for a multithreaded server.
CONNECTIONS = 100
# Each server starts a process in the background (either /Timbl/Timbl or /Mbt/Mbt).
# We need the process id in order to remove the process when we're done.
# The pid-files are stored in a temporary folder and contain the process id's of the running servers.
tmp = tempfile.gettempdir()
_pidfile = lambda server, port: os.path.join(tmp, 'mbsp_%s_%s.pid' % (str(port), server))
# When checking if a server is up and running, a sample request is sent to it.
# A response is then expected, delivered within the given amount of time.
# See also Server.started:
PING_REQUEST = 'x ?'
PING_TIMEOUT = 3
# The events.server dict in config.py defines functions to run when a server's state changes.
# If the given event name (e.g. "on_start_server") is not None,
# its associated function is applied to the server that fired the event.
def _handle_event(name, server):
event = config.events.get('server', {}).get(name, None)
if event is not None:
event(server)
class ServerError(Exception):
pass
class ServerTimeoutError(ServerError):
pass
class ServerResponseError(ServerError):
pass
class Server:
def __init__(self, name, host=LOCALHOST, port=6060, process=TIMBL, ping=None, features={}, **kwargs):
""" Starts a TiMBL or MBT server.
Requests can be sent to the server with a Client object (see Client.tag() function).
- name : a unique name for the server.
- host : localhost by default.
- port : the port at the host through which tcp communication is established.
- process : either TIMBL or MBT, the executable to start.
- features : options to configure the server process, see the TiMBL manual.
- ping : a (request, response)-tuple to check whether the server is responding.
If ping=None, any answer from the server is accepted.
However, this is unsafe because it might be an other server running at the desired port.
"""
self.group = None # The Servers collection this server is registered in.
self.name = name # The name of the server.
self.host = host # The host (usually localhost).
self.port = port # The port for tcp communication with the client.
self.process = process # Either TIMBL or MBT.
self.features = dict(features, **kwargs)
self.ping = ping # A (request, response)-tuple for testing.
self._process = None # The subprocess.Popen object once started.
@property
def pid(self):
""" Returns the server process id.
The pid is retrieved from a temporary file generated by TiMBL.
"""
# Note: Server._process.pid is not the correct process id (it is the id of the shell process).
# We obtain the id from the TiMBL -pidfile command option (see Server._program).
path = _pidfile(self.name, self.port)
try:
f = open(path); pid = int(f.read().strip())
f.close()
return pid
except:
return None
def _clear_pid(self):
# Clears the contents of the temporary pid file.
# This ensures that we don't try to kill an unrelated process in Server.stop().
try: os.remove(_pidfile(self.name, self.port))
except:
pass
@property
def version(self):
# Poll the executable for a version number.
#v = os.popen4(self.process+" -v")[1].read().strip().split("\n")[0].split(" ")
v = subprocess.Popen(self.process+" -v", shell=True, bufsize=-1,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.STDOUT, close_fds=True)
v = v.stdout.read().strip().split("\n")[0].split(" ")
v = self.process == TIMBL and v[1] or v[-1]
return v
@property
def _program(self):
# Returns a list of arguments that can be passed to subprocess.Popen().
# For example:
# s = Server("chunk", port=6061, process=MBT, features={"-s":"chunk.data"})
# s._program => ["/Mbt/Mbt", "-S", "5001", "-s", "chunk.data", "-C", "30", "--pidfile=..."]
# See the TiMBL manual for all available features.
# -C restricts the number of concurrent connections to a TiMBL server.
# -S tells TiMBL to run in server-mode at a given port.
# -f reads a file with training data to process.
# -i reads a previously trained file.
# -s is used for training files with whitespace-delimited exemplar weights.
# --pidfile= defines a file to store the server process id in.
f = {}
o = lambda option: not option.startswith(('-','+')) and '-'+option or option
for k,v in self.features.items():
f[o(str(k))] = v
# TiMBL version 6.1.5 uses -pidfile, version 6.3.0 (correctly) uses --pidfile.
# Same for MBT 3.1 and 3.2 series.
if self.process == TIMBL:
p = self.version.startswith(('5.','6.1.')) and '-pidfile' or '--pidfile'
if self.process == MBT:
p = self.version.startswith(('3.1','3.2.')) and '-pidfile' or '--pidfile'
f.setdefault('-S', self.port)
f.setdefault('-C', CONNECTIONS)
f.setdefault('%s=%s' % (p, _pidfile(self.name, self.port)), None)
a = [self.process]; [a.extend((k,v)) for k,v in f.items()]
a = [str(x) for x in a if x is not None]
return a
@property
def program(self):
""" Yields the shell command string that is used to start the server process.
"""
return " ".join(self._program)
def start(self, timeout="default"):
""" Attempts to start the server at the given host and port.
If it is already up and running, does nothing.
If it is not running after timeout seconds, raises a ServerTimeoutError.
"""
if timeout == "default": timeout = config.timeout # 60
if not self.started:
o = open(os.devnull, 'w')
try:
# Create the server process from Server.program.
# Server startup info is written to os.devnull (e.g. nowhere),
# Popen has the habit of waiting for more output even if timeout is exceeded.
self._process = subprocess.Popen(self._program, close_fds=True, stderr=o, stdout=o)
except:
# We end up here when there is no working executable for TiMBL/MBT.
s = "can't start %s for server '%s'" % (os.path.basename(self.process), self.name)
raise ServerError(s)
if config.verbose:
sys.stderr.write("Starting server '%s' at %s:%s" % (self.name, self.host, str(self.port)))
# Wait until the server is done processing all training data.
# Raise a ServerTimeoutError if it times out.
t = 0
while t < timeout and not self.started:
time.sleep(1.0); t+=1.0
if config.verbose: sys.stderr.write('.')
if config.verbose: sys.stderr.write('\n')
if t >= timeout:
s = "couldn't start server '%s' in %s seconds" % (self.name, str(timeout))
if self._process.poll() is not None:
# - A None value indicates that the server hasn't terminated loading yet.
# Since it has, and we are still waiting for a correct response,
# something might be wrong with the command line options we sent.
# - Another reason might be insufficient memory, for example
# a virtual machine (e.g. Windows XP + cygwin) with little memory assigned to it.
s += ",\ncheck the features used to start the server:\n%s" % self.program
raise ServerTimeoutError(s)
_handle_event('on_start', self)
return True
def stop(self):
""" Attempts to stop the server.
Returns True when the server is stopped, False otherwise.
"""
pid = self.pid
self._clear_pid()
if not self.started:
# The server is not running.
return True
#if hasattr(self._process, 'kill'):
# # Popen.kill() works in Python 2.6+ on all platforms.
# # However, this doesn't work: self.pid != self._process.pid
# # so self._process.kill() only terminates the parent process.
# self._process.kill()
# self._process = None
# _handle_event('on_stop', self)
# return True
if pid is not None and not WINDOWS:
# os.kill() works in Python 2.4+ on Unix and Mac OS X.
# This will only work if we still have the pid stored in a temporary file.
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
self._process = None
_handle_event('on_stop', self)
return True
if pid is not None and WINDOWS:
# Use ctypes on Windows platforms.
# This will only work if we still have the pid stored in a temporary file.
import ctypes
p = ctypes.windll.kernel32.OpenProcess(1, False, pid)
ctypes.windll.kernel32.TerminateProcess(p, -1)
ctypes.windll.kernel32.CloseHandle(p)
time.sleep(0.1)
self._process = None
_handle_event('on_stop', self)
return True
return False
@property
def started(self):
""" Yields True if the server is up and running.
Yields False is the server is down or busy starting up.
A sample query is sent to the server and compared to the expected answer.
A ServerResponseError is raised if the answer differs from what is expected.
"""
# If Server.ping=None, just send something to the server and accept whatever response.
# If Server.ping is given, it is a (question, answer)-tuple,
# and the server's response must match answer or a ServerResponseError is raised.
# Care is taken to disconnect the ping client after each attempt.
Q, A = self.ping is not None and self.ping or (PING_REQUEST, None)
ping = None
try:
ping = self.client()
a = ping.tag(Q, timeout=PING_TIMEOUT)
if A is not None and A.strip() != a.strip():
s = "unexpected answer from server '%s':\n'%s' instead of\n'%s'""" % (self.name, a, A)
raise ServerResponseError(s)
except ServerResponseError, e:
# Ping request and response do not match.
del ping; raise e
except client.ClientError:
# Client can't connect to server, server must be down or loading.
del ping; return False
del ping
return True
def client(self):
""" Returns a Client instance, used to send Server.ping requests.
It works with raw data (e.g. no request and response formatters).
"""
return client.Client(self.host, self.port, self.name)
def __repr__(self):
return "<server name='%s', host='%s', port='%s' process='%s'>" % (
self.name,
self.host,
self.port,
os.path.basename(self.process)
)
class Timbl(Server):
def __init__(self, name, host=LOCALHOST, port=6060, ping=None, features={}, **kwargs):
kwargs["process"] = TIMBL
Server.__init__(self, name, host, port, ping, features, **kwargs)
class Mbt(Server):
def __init__(self, name, host=LOCALHOST, port=6060, ping=None, features={}, **kwargs):
kwargs["process"] = MBT
Server.__init__(self, name, host, port, ping, features, **kwargs)
#--- SERVERS -----------------------------------------------------------------------------------------
class Servers(list):
def __init__(self, start=False, stop=False):
""" A list of servers that can be started and stopped as a group.
If start=True, automatically starts each registered server.
If stop=True, automatically stops all the servers when Python exits.
"""
self._start = start
self._stop = stop
if self._exithandler not in atexit._exithandlers:
atexit.register(self._exithandler) # When Python exits, call Servers.stop() if stop=True.
def _exithandler(self):
if self._stop is True:
self.stop()
def append(self, server):
""" Appends the given server and automatically starts it if necessary.
"""
for s in self:
if server.name == s.name:
raise ServerError, "name '%s' already taken" % str(s.name)
if server.port == s.port:
raise ServerError, "port %s already taken" % str(s.port)
list.append(self, server)
# If the server is owned by another Servers group,
# remove it from that list and set ownership to this list.
if server.group is not None:
server.group.remove(server)
server.group = self
# Fire the register event.
_handle_event('on_register', server)
# Automatically start the server if requested.
if self._start is True:
server.start()
def __delitem__(self, index):
if self._stop is True:
self[index].stop()
list.__delitem__(self, index)
def register(self, *args, **kwargs):
""" Appends a new server from the given arguments.
"""
self.append(Server(*args, **kwargs))
def __getattr__(self, name):
# Servers are available as attribute by their name.
for server in self:
if server.name == name: return server
raise AttributeError, "'Servers' object has no attribute '%s'" % name
def find(self, name):
""" Returns the server with the given name, None otherwise.
"""
for server in self:
if server.name == name: return server
def start(self, timeout="default"):
""" Starts all registered servers.
"""
if timeout == "default": timeout = config.timeout # 60
for server in self:
server.start(timeout)
def stop(self):
""" Stops all registered servers.
"""
for server in self:
server.stop()
def started(self, server=None):
""" Yields True if all registered servers are up and running.
If server name is given, returns True if that specific server is running.
"""
if server is not None:
return getattr(self, server).started
for server in self:
if not server.started: return False
return True
#--- SERVER TOOLS ------------------------------------------------------------------------------------
def force_quit(processes=(TIMBL, MBT)):
""" Terminates all running TiMBL and MBT servers.
This is useful if we lost the pid's of running server processes.
This can happen if the servers were started in a previous Python session,
and the temporary files have meanwhile been deleted.
Availability: Unix.
"""
if not WINDOWS:
ps = subprocess.Popen(('ps', '-A'), stdout=subprocess.PIPE).communicate()[0]
ps = [[v.strip() for v in p.split()] for p in ps.split('\n')]
for p in ps:
for v in p:
if v in processes:
# A column in this process record has the path to TiMBL or MBT in it.
# Terminate it. Process id is the first item in the list.
os.kill(int(p[0].strip()), signal.SIGTERM)
time.sleep(0.1)
def is_free(port):
""" Returs True is the given port number is free.
"""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((LOCALHOST, port))
s.close()
except socket.error, e:
return True
else:
return False
def port_scan(start=6061, range=100, n=4, exclude=[]):
""" Returns a list of n free ports, starting from the given number up to start+range.
If not enough free ports are found, a ServerError is raised.
The exclude list defines reserved ports (they may be free but we are not allowed to use them).
"""
ports, p, i = [], start, 0
while len(ports) < n and i < range:
if not p in exclude and is_free(p):
ports.append(p)
p += 1
i += 1
if len(ports) != n:
s = "not enough free ports in %d-%d range" % (start, start+range)
raise ServerError(s)
return ports
#### MBSP SERVERS #####################################################################################
# Servers always start at localhost (e.g. this script is run on the server).
# See the TiMBL manual for all options.
# Location of the training data.
# You may want to change these if you have your own training data.
# You may need to tweak the command options below if you use newer versions of TiMBL and MBT.
MODELS = config.paths['models']
MODELS = dict(
chunk = os.path.join(MODELS, "train.tagchunker.settings"),
lemma = os.path.join(MODELS, "em.data"),
relation = os.path.join(MODELS, "train.instancebase"),
preposition = os.path.join(MODELS, "pp.instances")
)
#-----------------------------------------------------------------------------------------------------
# See the TiMBL manual for an explanation of all the options.
# The servers start and stop automatically if configured this way in config.py.
active_servers = Servers(start=config.autostart, stop=config.autostop)
for name, port in zip(config.servers,
config.ports[:len(config.servers)]):
if name == 'chunk':
active_servers.append(Server(
name = 'chunk',
port = port,
process = MBT,
ping = (
'Make a red oval .',
'Make/VB/I-VP a/DT/I-NP red/JJ/I-NP oval/NN/I-NP ././O <utt>\n'),
features = {
'-s' : MODELS['chunk'],
}))
if name == 'lemma':
active_servers.append(Server(
name = 'lemma',
port = port,
process = TIMBL,
ping = (
'c = = = = = = = = = = = = = = = = = = = B ?\n',
'CATEGORY {ABB-X}\n'),
features = {
'-f' : MODELS['lemma'],
'-m' : 'M',
'-w' : 2,
'-k' : 5
}))
if name == 'relation':
active_servers.append(Server(
name = 'relation',
port = port,
process = TIMBL,
ping = (
'c 1 0 0 Make VB - - - Make VB VP - oval NN NP CC - ?\n',
'CATEGORY {NP-OBJ}\n'),
features = {
'-i' : MODELS['relation'],
'-a' : 0,
'-m' : 'M',
'-L' : 2,
'-w' : 1,
'-k' : 19,
'-d' : 'IL',
'-v' : 's'
}))
if name == 'preposition':
active_servers.append(Server(
name = 'preposition',
port = port,
process = TIMBL,
ping = (
'c 2 0 10 - Vinken NNP board NN as director NN 2 0 ?\n',
'CATEGORY {n-NP} DISTRIBUTION { n-NP 2.27722 } DISTANCE {0}\n'),
features = {
'-f' : MODELS['preposition'],
"-m" : "M",
"-L" : 2,
"-w" : 0,
"-k" : 11,
"-d" : "IL",
"+v" : "di+db"
}))