-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubset_lvis_pants_labels.py
53 lines (44 loc) · 2.2 KB
/
subset_lvis_pants_labels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
def process_file(source_directory, target_directory, labels_of_interest, filename):
file_path = os.path.join(source_directory, filename)
output_path = os.path.join(target_directory, filename)
with open(file_path, 'r') as file:
lines = file.readlines()
with open(output_path, 'w') as output_file:
for line in lines:
# Each line in labels files consists of a label index, followed
# by a series of x,y coordinates of the contour, each of them
# separated by a space.
# We only want to keep the lines which define the contour of one
# of the labels of interest (others are not written to the output file).
# In addition, we overwrite the label to 0, the only class our model will
# train for.
parts = line.strip().split()
if parts[0] in labels_of_interest:
parts[0] = '0' # Change the label to '0'
# Write the line back, each element separated by a single space,
# with the label replaced with a zero.
output_file.write(' '.join(parts) + '\n')
def subset_labels(source_directory, target_directory):
# ensure the target directory exists
if not os.path.exists(target_directory):
os.makedirs(target_directory)
# classes in the LVIS dataset that represent pants
labels_of_interest = {'950', '1039', '1121'}
# get the number of CPUs available
num_cpus = multiprocessing.cpu_count()
# using ThreadPoolExecutor to process files in parallel
with ThreadPoolExecutor(max_workers=num_cpus) as executor:
# List only .txt files from the directory
txt_files = [file for file in os.listdir(source_directory) if file.endswith('.txt')]
# Submit tasks to the executor
futures = [
executor.submit(process_file, source_directory, target_directory, labels_of_interest, file)
for file in txt_files
]
# wait for all futures to complete
for future in futures:
future.result()
print("Files have been processed and labels adjusted successfully.")