Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get this project running in 2022 #2

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.idea
.DS_Store
*~
*.pyc
*.pyc
.venv
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,42 @@ Its most unusual feature is its prediction of semantic annotations based on a fi

![Screenshot of ATC-Anno](media/gui.png)


## Tutorial

To demonstrate the workflow and features of the tool, we have created a brief video tutorial.
The video has optional subtitles.

- Repository location: `media/tutorial.mp4`
- [Download the tutorial](media/tutorial.mp4?raw=true) (you may have to right click and use "Save link as...")
- [Watch on Youtube](https://youtu.be/4me6htnJIBk)


## Requirements
ATC-Anno is written in Python 2.7 and requires the following additional packages:

ATC-Anno is written in Python 3 and requires the following additional packages:

- [wxPython](https://www.wxpython.org/)
- [pyaudio](https://people.csail.mit.edu/hubert/pyaudio/)

### Finite-state Grammar

To access the **_Get XML_** feature, you need to provide a finite-state grammar that uses the [OpenFST](http://www.openfst.org/) format.
The default location for the grammar is `data/grammars/default.fst`

### Concept Extraction
The conversion of airline names to callsign representations (e.g. "Lufthansa" to "DLH") is based on the airline dictionary found at `data/airlines/callsigns.txt`. You can expand the file to include all airlines that occur in your grammar.

The conversion of airline names to callsign representations (e.g. "Lufthansa" to "DLH") is based on the airline dictionary found at `data/airlines/callsigns.txt`. You can expand the file to include all airlines that occur in your grammar.

## Attribution

This software is published under an [MIT License](LICENSE).

If you use it in your research or work, please cite the following publication:

[Schulder, Marc](http://marc.schulder.info) and O'Mahony, Johannah and Bakanouski, Yury and [Klakow, Dietrich](https://www.lsv.uni-saarland.de/people/dietrich-klakow/) (2020). **["ATC-Anno: Semantic Annotation for Air Traffic Control with Assistive Auto-Annotation"](https://aclanthology.org/2020.lrec-1.783/)**. _Proceedings of the 12th Conference on Language Resources and Evaluation (LREC)_, pages 6375–6380, Marseille, France, 13 May 2020.


### BibTex
```

```bibtex
@InProceedings{schulder2020atcAnno,
author = {Schulder, Marc and O'Mahony, Johannah and Bakanouski, Yury and Klakow, Dietrich},
title = {{ATC-Anno}: Semantic Annotation for Air Traffic Control with Assistive Auto-Annotation},
Expand Down
6 changes: 3 additions & 3 deletions annotator.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def updateAnnotationStatus(self, filename):
else:
self.SetItemTextColour(i, 'red')
else:
print "WARNING: %s does not exist" % filename
print("WARNING: %s does not exist" % filename)


class WaveformPanel(wx.Window):
Expand Down Expand Up @@ -249,12 +249,12 @@ def Draw(self, dc, data):
'go_around': [],
'navigation_own': [],
}
keywordsXml = tagsXML + specialTagsXML + cmdsXML.keys()
keywordsXml = tagsXML + specialTagsXML + list(cmdsXML.keys())
autocompXML = ['<callsign><airline></airline><flightnumber></flightnumber></callsign>']
for tag in tagsXML:
autocompXML.append('<{0}> </{0}>'.format(tag))

for cmd, content in cmdsXML.iteritems():
for cmd, content in cmdsXML.items():
elems = ['<command="{0}"> '.format(cmd)]
for tag in content:
if len(tag.strip()) > 0:
Expand Down
17 changes: 9 additions & 8 deletions tools/GenerateConcept.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import itertools
from os import path

from FileTools import loadAirlineCallsigns
from .FileTools import loadAirlineCallsigns
from functools import reduce

reOpenTag = re.compile('^<([a-z_]+)>$')
reOpenAnyTag = re.compile('^<([a-z_="]+)>$')
Expand Down Expand Up @@ -437,7 +438,7 @@ class UtteranceUnit(object):
victor='V', whisky='W', xray='X',
yankee='Y', zoulou='Z')
LETTERWORDS = dict()
for word, letter in LETTERS.iteritems():
for word, letter in LETTERS.items():
LETTERWORDS.setdefault(letter, list()).append(word)

SINGLE_DIGITS = dict(one='1', two='2', three='3',
Expand Down Expand Up @@ -526,7 +527,7 @@ def parseNumber(self, numberFrame, isFrame=True):
items.insert(nxt, 'zero')
items.pop(i)

for multiple, multiplier in self.MULTIPLIERS.iteritems():
for multiple, multiplier in self.MULTIPLIERS.items():
while multiple in items:
i = items.index(multiple)
nxt = i + 1
Expand All @@ -537,7 +538,7 @@ def parseNumber(self, numberFrame, isFrame=True):
items.pop(i)

# Tens
for tenner, tenval in self.TENS_DIGITS.iteritems():
for tenner, tenval in self.TENS_DIGITS.items():
while tenner in items:
i = items.index(tenner)
nxt = i + 1
Expand All @@ -547,7 +548,7 @@ def parseNumber(self, numberFrame, isFrame=True):
elif items[nxt] not in self.SINGLE_DIGITS or items[nxt] == 'zero':
items.insert(nxt, 'zero')
# Teen values (11-19)
for teen, teenval in self.TEEN_DIGITS.iteritems():
for teen, teenval in self.TEEN_DIGITS.items():
while teen in items:
i = items.index(teen)
items[i] = teenval
Expand Down Expand Up @@ -1221,10 +1222,10 @@ def __str__(self):
return string

def getCallsign(self):
print self.callsign
print(self.callsign)

def getCommands(self):
print self.commands
print(self.commands)

def addCommand(self, command):
self.commands.append(command)
Expand Down Expand Up @@ -1270,7 +1271,7 @@ def getAMANStrings(self, mode, getConfidence=True, getSubconfidences=True):

# Prepare string construction
cmd = [self.callsign.callsign, NO_CONCEPT]
frames['total'] = itertools.chain.from_iterable(frames.values())
frames['total'] = itertools.chain.from_iterable(list(frames.values()))

# Compute confidences
items = list()
Expand Down
28 changes: 14 additions & 14 deletions tools/Text2XML.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from warnings import warn
from functools import partial
from operator import itemgetter
from vocabularyHandlers import findCommandVocabularies
from FileTools import loadAirlineCallsigns
from .vocabularyHandlers import findCommandVocabularies
from .FileTools import loadAirlineCallsigns


CONF_SEPARATOR = ':'
Expand Down Expand Up @@ -137,7 +137,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca

# Prepare auxiliary information
originalSentence = ' '.join(sentence)
sentence = map(self._lower_, sentence)
sentence = list(map(self._lower_, sentence))

if callsign_whitelist is None:
callsign_whitelist = self.default_callsign_whitelist
Expand All @@ -147,7 +147,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
filtered_sentence = set(words)
filtered_sentence.difference_update(self.single_digits)
filtered_sentence.difference_update(self.letters)
for cmd, vocab in cmdVocabularies.iteritems():
for cmd, vocab in cmdVocabularies.items():
if vocab.isdisjoint(filtered_sentence):
irrelevant_commands.add(cmd)

Expand Down Expand Up @@ -191,7 +191,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
signal(SIGALRM, partial(self.transduceTimerHandler, text=originalSentence))
alarm(timeout)

bestDistance = sys.maxint
bestDistance = sys.maxsize

# Start parsing
i = 0
Expand Down Expand Up @@ -253,7 +253,7 @@ def transduce(self, sentence, grammar, allow_skip=True, cmdVocabularies=None, ca
complete_parses.append((output, '', skips, tagcost, list(), -1, skipped_words, False))
else:
considered_word = False
for outnode, transitions in branches.iteritems():
for outnode, transitions in branches.items():
if outnode != 'is_terminal':
for transition in transitions:
nodeword = transition['outword'].lower()
Expand Down Expand Up @@ -376,7 +376,7 @@ def _sortParses_(complete_parses, incomplete_parses=None):
if len(complete_parses) > 0:
cost_complete = sorted_complete[0][3] # Complete cost is skips
else:
cost_complete = sys.maxint
cost_complete = sys.maxsize

# Sort incomplete parses
sorted_incomplete = []
Expand All @@ -399,7 +399,7 @@ def _sortParses_(complete_parses, incomplete_parses=None):
cost_incomplete = sorted_incomplete[0][3] + len(
sorted_incomplete[0][1]) # Incomplete cost is skips plus remaining unparsed words
else:
cost_incomplete = sys.maxint
cost_incomplete = sys.maxsize

# Pick better parse set
if cost_complete <= cost_incomplete:
Expand Down Expand Up @@ -455,7 +455,7 @@ def _closeTags_(nodeword_tag, open_tags):
return remaining_open_tags, closed_tags

def addMissingWords(self, sentence, base_xml):
sentence = map(str.lower, sentence)
sentence = list(map(str.lower, sentence))
xmlSentence = base_xml.strip().split()
seenWords, remainingWords, xmlSentence = self._addMissingWords(sentence, xmlSentence)
return ' '.join(seenWords)
Expand Down Expand Up @@ -512,10 +512,10 @@ def removeOOGWords(sentence, vocabulary, verbose=False):
defluffedSentence.append(token)
if verbose:
if len(sentence) == len(defluffedSentence):
print "No OOG words in sentence:", ' '.join(sentence)
print("No OOG words in sentence:", ' '.join(sentence))
else:
print "original sentence: ", ' '.join(sentence)
print "Defluffed sentence:", ' '.join(defluffedSentence)
print("original sentence: ", ' '.join(sentence))
print("Defluffed sentence:", ' '.join(defluffedSentence))
return defluffedSentence


Expand Down Expand Up @@ -567,8 +567,8 @@ def getGrammarName(self):

def getVocabularyFromGrammar(self):
words = set()
for branches in self.grammar.itervalues():
for outnode, transitions in branches.iteritems():
for branches in self.grammar.values():
for outnode, transitions in branches.items():
if outnode != 'is_terminal':
for transition in transitions:
nodeword = transition['outword'].lower()
Expand Down
Loading