-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlecsum.py
executable file
·121 lines (96 loc) · 3.3 KB
/
lecsum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
import argparse
from pathlib import Path
import sys
import warnings
import yaml
from modules.transcribe import transcribe
from modules.summarize import summarize
from modules.utils import check_config, write
# Default configuration
DEFAULT_CONFIG = {
"whisper_model": "base.en",
"ollama_model": "llama3.1:8b",
"prompt": "Summarize: ",
}
# Common paths that may contain a configuration file
CONFIG_FILE = "lecsum.yaml"
CONFIG_FILE_PATHS = [
Path.cwd().joinpath(CONFIG_FILE), # ./
Path.home().joinpath(".config", CONFIG_FILE), # ~/.config/
]
# Parse command-line arguments
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Automatically transcribe and summarize lecture recordings.",
)
parser.add_argument("file", help="name of an audio file", type=str)
parser.add_argument(
"-c",
"--config",
help=f"'{CONFIG_FILE}' configuration file",
type=str,
)
return parser.parse_args()
# Parse YAML config file
def load_config(path: str = None) -> dict:
# Parse a yaml file, immediately exiting if any errors are found
def load_yaml_file(path: Path = None) -> dict:
try:
return yaml.load(open(path, "r").read(), Loader=yaml.Loader)
except yaml.YAMLError as e:
print(f"Error in configuration file '{path}': {e}")
sys.exit(1)
# Default to path passed via command-line
if path:
p = Path(path)
if p.is_file():
return load_yaml_file(p)
else:
print(f"Error: Configuration file '{p}' cannot be opened.")
sys.exit(1)
# If a config file is not specified, check a couple default locations
for p in CONFIG_FILE_PATHS:
if p.is_file():
return load_yaml_file(p)
# Use the default configuration if a config file cannot be found
return DEFAULT_CONFIG
def transcribe_and_summarize(
whisper_model: str, ollama_model: str, prompt: str, file: str
) -> tuple[str, str]:
path = Path(file)
filename = path.stem
parent = path.resolve().parent
# Transcribe the audio file
transcript = transcribe(model_name=whisper_model, audio_file=file)
# Write the transcript to a text file
write(path=parent.joinpath(f"{filename}_transcript.txt"), text=transcript)
# Summarize the transcription
summary = summarize(model_name=ollama_model, prompt=prompt, text=transcript)
# Write the summary to a text file
write(path=parent.joinpath(f"{filename}_summary.txt"), text=summary)
return (transcript, summary)
def main():
# Ignore module warnings
warnings.simplefilter("ignore")
# Parse command-line arguments
args = parse_args()
# Parse configuration file
config = load_config(args.config)
# Path to audio file
path = Path(args.file)
if not path.is_file():
print(f"Error: Audio file '{path}' cannot be opened.")
sys.exit(1)
# Ensure the configuration is valid
check_config(
whisper_model=config["whisper_model"], ollama_model=config["ollama_model"]
)
transcribe_and_summarize(
whisper_model=config["whisper_model"],
ollama_model=config["ollama_model"],
prompt=config["prompt"],
file=args.file,
)
if __name__ == "__main__":
main()