uds-lsv · mgomersbach · Apr 5, 2022 · Apr 5, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 .idea
 .DS_Store
 *~
-*.pyc
+*.pyc
+.venv
diff --git a/README.md b/README.md
@@ -7,39 +7,42 @@ Its most unusual feature is its prediction of semantic annotations based on a fi
 
 ![Screenshot of ATC-Anno](media/gui.png)
 
-
 ## Tutorial
+
 To demonstrate the workflow and features of the tool, we have created a brief video tutorial.
 The video has optional subtitles.
 
 - Repository location: `media/tutorial.mp4`
 - [Download the tutorial](media/tutorial.mp4?raw=true) (you may have to right click and use "Save link as...")
 - [Watch on Youtube](https://youtu.be/4me6htnJIBk)
 
-
 ## Requirements
-ATC-Anno is written in Python 2.7 and requires the following additional packages:
+
+ATC-Anno is written in Python 3 and requires the following additional packages:
+
 - [wxPython](https://www.wxpython.org/)
 - [pyaudio](https://people.csail.mit.edu/hubert/pyaudio/)
 
 ### Finite-state Grammar
+
 To access the **_Get XML_** feature, you need to provide a finite-state grammar that uses the [OpenFST](http://www.openfst.org/) format.
 The default location for the grammar is `data/grammars/default.fst`
 
 ### Concept Extraction
-The conversion of airline names to callsign representations (e.g. "Lufthansa" to "DLH") is based on the airline dictionary found at `data/airlines/callsigns.txt`. You can expand the file to include all airlines that occur in your grammar.
 
+The conversion of airline names to callsign representations (e.g. "Lufthansa" to "DLH") is based on the airline dictionary found at `data/airlines/callsigns.txt`. You can expand the file to include all airlines that occur in your grammar.
 
 ## Attribution
+
 This software is published under an [MIT License](LICENSE).
 
 If you use it in your research or work, please cite the following publication:
 
 [Schulder, Marc](http://marc.schulder.info) and O'Mahony, Johannah and Bakanouski, Yury and [Klakow, Dietrich](https://www.lsv.uni-saarland.de/people/dietrich-klakow/) (2020). **["ATC-Anno: Semantic Annotation for Air Traffic Control with Assistive Auto-Annotation"](https://aclanthology.org/2020.lrec-1.783/)**. _Proceedings of the 12th Conference on Language Resources and Evaluation (LREC)_, pages 6375–6380, Marseille, France, 13 May 2020.
 
-
 ### BibTex
-```
+
+```bibtex
 @InProceedings{schulder2020atcAnno,
   author = {Schulder, Marc and O'Mahony, Johannah and Bakanouski, Yury and Klakow, Dietrich},
   title = {{ATC-Anno}: Semantic Annotation for Air Traffic Control with Assistive Auto-Annotation},

diff --git a/annotator.py b/annotator.py
@@ -122,7 +122,7 @@ def updateAnnotationStatus(self, filename):
             else:
                 self.SetItemTextColour(i, 'red')
         else:
-            print "WARNING: %s does not exist" % filename
+            print("WARNING: %s does not exist" % filename)
 
 
 class WaveformPanel(wx.Window):
@@ -249,12 +249,12 @@ def Draw(self, dc, data):
            'go_around': [],
            'navigation_own': [],
            }
-keywordsXml = tagsXML + specialTagsXML + cmdsXML.keys()
+keywordsXml = tagsXML + specialTagsXML + list(cmdsXML.keys())
 autocompXML = ['<callsign><airline></airline><flightnumber></flightnumber></callsign>']
 for tag in tagsXML:
     autocompXML.append('<{0}>  </{0}>'.format(tag))
 
-for cmd, content in cmdsXML.iteritems():
+for cmd, content in cmdsXML.items():
     elems = ['<command="{0}"> '.format(cmd)]
     for tag in content:
         if len(tag.strip()) > 0:

diff --git a/tools/GenerateConcept.py b/tools/GenerateConcept.py
@@ -4,7 +4,8 @@
 import itertools
 from os import path
 
-from FileTools import loadAirlineCallsigns
+from .FileTools import loadAirlineCallsigns
+from functools import reduce
 
 reOpenTag = re.compile('^<([a-z_]+)>$')
 reOpenAnyTag = re.compile('^<([a-z_="]+)>$')
@@ -437,7 +438,7 @@ class UtteranceUnit(object):
                    victor='V', whisky='W', xray='X',
                    yankee='Y', zoulou='Z')
     LETTERWORDS = dict()
-    for word, letter in LETTERS.iteritems():
+    for word, letter in LETTERS.items():
         LETTERWORDS.setdefault(letter, list()).append(word)
 
     SINGLE_DIGITS = dict(one='1', two='2', three='3',
@@ -526,7 +527,7 @@ def parseNumber(self, numberFrame, isFrame=True):
                 items.insert(nxt, 'zero')
             items.pop(i)
 
-        for multiple, multiplier in self.MULTIPLIERS.iteritems():
+        for multiple, multiplier in self.MULTIPLIERS.items():
             while multiple in items:
                 i = items.index(multiple)
                 nxt = i + 1
@@ -537,7 +538,7 @@ def parseNumber(self, numberFrame, isFrame=True):
                 items.pop(i)
 
         # Tens
-        for tenner, tenval in self.TENS_DIGITS.iteritems():
+        for tenner, tenval in self.TENS_DIGITS.items():
             while tenner in items:
                 i = items.index(tenner)
                 nxt = i + 1
@@ -547,7 +548,7 @@ def parseNumber(self, numberFrame, isFrame=True):
                 elif items[nxt] not in self.SINGLE_DIGITS or items[nxt] == 'zero':
                     items.insert(nxt, 'zero')
         # Teen values (11-19)
-        for teen, teenval in self.TEEN_DIGITS.iteritems():
+        for teen, teenval in self.TEEN_DIGITS.items():
             while teen in items:
                 i = items.index(teen)
                 items[i] = teenval
@@ -1221,10 +1222,10 @@ def __str__(self):
         return string
 
     def getCallsign(self):
-        print self.callsign
+        print(self.callsign)
 
     def getCommands(self):
-        print self.commands
+        print(self.commands)
 
     def addCommand(self, command):
         self.commands.append(command)
@@ -1270,7 +1271,7 @@ def getAMANStrings(self, mode, getConfidence=True, getSubconfidences=True):
 
             # Prepare string construction
             cmd = [self.callsign.callsign, NO_CONCEPT]
-            frames['total'] = itertools.chain.from_iterable(frames.values())
+            frames['total'] = itertools.chain.from_iterable(list(frames.values()))
 
             # Compute confidences
             items = list()

diff --git a/tools/Text2XML.py b/tools/Text2XML.py
@@ -15,8 +15,8 @@
 from warnings import warn
 from functools import partial
 from operator import itemgetter
-from vocabularyHandlers import findCommandVocabularies
-from FileTools import loadAirlineCallsigns
+from .vocabularyHandlers import findCommandVocabularies
+from .FileTools import loadAirlineCallsigns
 
 
 CONF_SEPARATOR = ':'
@@ -137,7 +137,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
 
         # Prepare auxiliary information
         originalSentence = ' '.join(sentence)
-        sentence = map(self._lower_, sentence)
+        sentence = list(map(self._lower_, sentence))
 
         if callsign_whitelist is None:
             callsign_whitelist = self.default_callsign_whitelist
@@ -147,7 +147,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
             filtered_sentence = set(words)
             filtered_sentence.difference_update(self.single_digits)
             filtered_sentence.difference_update(self.letters)
-            for cmd, vocab in cmdVocabularies.iteritems():
+            for cmd, vocab in cmdVocabularies.items():
                 if vocab.isdisjoint(filtered_sentence):
                     irrelevant_commands.add(cmd)
 
@@ -191,7 +191,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
             signal(SIGALRM, partial(self.transduceTimerHandler, text=originalSentence))
             alarm(timeout)
 
-        bestDistance = sys.maxint
+        bestDistance = sys.maxsize
 
         # Start parsing
         i = 0
@@ -253,7 +253,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
                             complete_parses.append((output, '', skips, tagcost, list(), -1, skipped_words, False))
                     else:
                         considered_word = False
-                        for outnode, transitions in branches.iteritems():
+                        for outnode, transitions in branches.items():
                             if outnode != 'is_terminal':
                                 for transition in transitions:
                                     nodeword = transition['outword'].lower()
@@ -376,7 +376,7 @@ def _sortParses_(complete_parses, incomplete_parses=None):
         if len(complete_parses) > 0:
             cost_complete = sorted_complete[0][3]  # Complete cost is skips
         else:
-            cost_complete = sys.maxint
+            cost_complete = sys.maxsize
 
         # Sort incomplete parses
         sorted_incomplete = []
@@ -399,7 +399,7 @@ def _sortParses_(complete_parses, incomplete_parses=None):
             cost_incomplete = sorted_incomplete[0][3] + len(
                 sorted_incomplete[0][1])  # Incomplete cost is skips plus remaining unparsed words
         else:
-            cost_incomplete = sys.maxint
+            cost_incomplete = sys.maxsize
 
         # Pick better parse set
         if cost_complete <= cost_incomplete:
@@ -455,7 +455,7 @@ def _closeTags_(nodeword_tag, open_tags):
         return remaining_open_tags, closed_tags
 
     def addMissingWords(self, sentence, base_xml):
-        sentence = map(str.lower, sentence)
+        sentence = list(map(str.lower, sentence))
         xmlSentence = base_xml.strip().split()
         seenWords, remainingWords, xmlSentence = self._addMissingWords(sentence, xmlSentence)
         return ' '.join(seenWords)
@@ -512,10 +512,10 @@ def removeOOGWords(sentence, vocabulary, verbose=False):
                 defluffedSentence.append(token)
         if verbose:
             if len(sentence) == len(defluffedSentence):
-                print "No OOG words in sentence:", ' '.join(sentence)
+                print("No OOG words in sentence:", ' '.join(sentence))
             else:
-                print "original sentence: ", ' '.join(sentence)
-                print "Defluffed sentence:", ' '.join(defluffedSentence)
+                print("original sentence: ", ' '.join(sentence))
+                print("Defluffed sentence:", ' '.join(defluffedSentence))
         return defluffedSentence
 
 
@@ -567,8 +567,8 @@ def getGrammarName(self):
 
     def getVocabularyFromGrammar(self):
         words = set()
-        for branches in self.grammar.itervalues():
-            for outnode, transitions in branches.iteritems():
+        for branches in self.grammar.values():
+            for outnode, transitions in branches.items():
                 if outnode != 'is_terminal':
                     for transition in transitions:
                         nodeword = transition['outword'].lower()