-
Notifications
You must be signed in to change notification settings - Fork 0
/
byte2img.py
executable file
·74 lines (68 loc) · 2.65 KB
/
byte2img.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing as mp
#### GLOABL VARS
DIR = '~/workspace/datasets/'
DIR_TRAIN = os.path.expanduser(DIR + 'train/')
DIR_HERE = os.path.expanduser(DIR)
if os.path.exists(os.path.join(DIR_HERE, 'train_imgs')):
print("Folder exists: train")
else:
os.mkdir(os.path.join(DIR_HERE, 'train_imgs'))
print("Folder created: train")
def preprocess_file2img(file:str,d:str,IMG_H:int,IMG_W:int)->None:
SIZE = IMG_H * IMG_W
print(f"Processing file: {file} in directory {d}",flush=True)
with open(os.path.join(DIR_TRAIN, d, file)) as f:
f = f.read().splitlines()
#print('\n'.join(f))
# Remove the first column from the file
# The first column is the address
# The address is not needed
#print("PROCESSING FILE2IMG --------------------->",f)
f = [i.split()[1:] for i in f]
file_processed = [' '.join(i) for i in f]
#print('\n'.join(file_processed))
# Change the ?? to 00
# The ?? are the bytes that are not present
# The ?? are not needed
file_processed = [i.replace('??', '00') for i in file_processed]
# The file will be converted to a image
# Each pixel will be a byte
# The image will be grayscale
# The image will be saved as a numpy array
# It has hexadecimal values
# Convert the hexadecimal values to decimal
# Read each line of the file
# Read each word of the line
img_file = []
for line in file_processed:
for word in line.split():
# Convert the hexadecimal value to decimal
img_file.append(int(word, 16))
# If img_file is smaller than SIZE padding with 0
if len(img_file) < SIZE:
img_file.extend([0] * (SIZE - len(img_file)))
# Convert the list to numpy array
img_file = np.array(img_file[:SIZE])
# Make it a matrix to fit the shape of the img_file.shape
img_file = img_file.reshape(IMG_H, IMG_W)
plt.imsave(os.path.join(DIR_HERE, 'train_imgs',d, file + '.png'), img_file, cmap='gray')
if __name__ == '__main__':
IMG_H = 256
IMG_W = 256
dirs = os.listdir(DIR_TRAIN)
print(dirs,flush=True)
for d in dirs:
print("Processing directory: " + d,flush=True)
# Get the list of files in the directory
files = os.listdir(os.path.join(DIR_TRAIN, d))
# Process each file
for file in files:
print("Processing file: " + file,flush=True)
preprocess_file2img(file,d,IMG_H,IMG_W)
break
print("Done",flush=True)
pass