Skip to content

Commit

Permalink
Merge pull request #4 from pitchforks/issue-3
Browse files Browse the repository at this point in the history
fix for issue #3: implemented low-memory processing of huge PGN files using iterators
  • Loading branch information
renatopp committed Jun 23, 2014
2 parents 53841ad + 7707a66 commit 12a8188
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
14 changes: 13 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ yaml). The basic usage::
print pgn.loads(pgn_text) # Returns a list of PGNGame
print pgn.dumps(pgn_game) # Returns a string with a pgn game

**Note**:

The above basic example doesn't work properly with huge files (hundreds of
megabytes and more): reading the whole file at once is slow and uses much
memory, pgn.loads(big_string) uses even more memory.

To process huge PGN files, do it like this::

import pgn

for game in pgn.GameIterator("bigfile.pgn"):
print game # or do something else with it

**Features**:

Expand Down Expand Up @@ -47,4 +59,4 @@ yaml). The basic usage::
23. Ne5 Rae8 24. Bxf7+ Rxf7 25. Nxf7 Rxe1+ 26. Qxe1 Kxf7 27. Qe3 Qg5 28. Qxg5
hxg5 29. b3 Ke6 30. a3 Kd6 31. axb4 cxb4 32. Ra5 Nd5 33. f3 Bc8 34. Kf2 Bf5
35. Ra7 g6 36. Ra6+ Kc5 37. Ke1 Nf4 38. g3 Nxh3 39. Kd2 Kb5 40. Rd6 Kc5 41. Ra6
Nf2 42. g4 Bd3 43. Re6 1/2-1/2
Nf2 42. g4 Bd3 43. Re6 1/2-1/2
67 changes: 67 additions & 0 deletions pgn.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,73 @@ def dumps(self):
def __repr__(self):
return '<PGNGame "%s" vs "%s">' % (self.white, self.black)

class GameStringIterator(object):
"""
Iterator containing multiline strings
that represent games from a PGN file
"""

def __init__(self, file_name):
"""
Args:
file_name (str): PGN file name
"""
self.file_name = file_name
self.file_iter = iter(open(self.file_name))
self.game_lines = []
self.end = False

def __iter__(self):
"""doc"""
return self

def next(self):
"""doc"""
if self.end is True:
raise StopIteration
try:
while True:
line = self.file_iter.next()
if line.startswith("[Event"):
if len(self.game_lines) == 0:
self.game_lines.append(line)
continue
else:
game_lines = self.game_lines[:]
self.game_lines = []
self.game_lines.append(line)
game_str = "".join(game_lines)
return game_str
else:
self.game_lines.append(line)
except StopIteration:
game_lines = self.game_lines[:]
game_str = "".join(game_lines)
self.end = True
return game_str

class GameIterator(object):
"""
Iterator containing games from a PGN file
"""

def __init__(self, file_name):
"""
Args:
file_name (str): PGN file name
"""
self.game_str_iterator = GameStringIterator(file_name)

def __iter__(self):
"""doc"""
return self

def next(self):
"""doc"""
for game_str in self.game_str_iterator:
game = loads(game_str)[0]
return game

def _pre_process_text(text):
'''
This function is responsible for removal of end line commentarys
Expand Down

0 comments on commit 12a8188

Please sign in to comment.