Add minimum values for --summin, --summax and --segmax

This commit is contained in:
ChaoticByte 2024-08-13 21:20:28 +02:00
parent a480fdcd34
commit 27ee9a7d8b
No known key found for this signature in database
2 changed files with 9 additions and 7 deletions

View file

@ -38,9 +38,9 @@ audio-summarize.py -m filepath -i filepath -o filepath
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
--summin n The minimum lenght of a segment summary [10] --summin n The minimum lenght of a segment summary [10, min: 5]
--summax n The maximum lenght of a segment summary [90] --summax n The maximum lenght of a segment summary [90, min: 5]
--segmax n The maximum number of tokens per segment [375, max: 500] --segmax n The maximum number of tokens per segment [375, 5 - 500]
-m filepath The path to a whisper.cpp-compatible model file -m filepath The path to a whisper.cpp-compatible model file
-i filepath The path to the media file -i filepath The path to the media file
-o filepath Where to save the output text to -o filepath Where to save the output text to

View file

@ -69,14 +69,16 @@ def summarize(chunks: List[str], summary_min: int, summary_max: int) -> str:
if __name__ == "__main__": if __name__ == "__main__":
argp = ArgumentParser() argp = ArgumentParser()
argp.add_argument("--summin", metavar="n", type=int, default=10, help="The minimum lenght of a segment summary [10]") argp.add_argument("--summin", metavar="n", type=int, default=10, help="The minimum lenght of a segment summary [10, min: 5]")
argp.add_argument("--summax", metavar="n", type=int, default=90, help="The maximum lenght of a segment summary [90]") argp.add_argument("--summax", metavar="n", type=int, default=90, help="The maximum lenght of a segment summary [90, min: 5]")
argp.add_argument("--segmax", metavar="n", type=int, default=375, help="The maximum number of tokens per segment [375, max: 500]") argp.add_argument("--segmax", metavar="n", type=int, default=375, help="The maximum number of tokens per segment [375, 5 - 500]")
argp.add_argument("-m", required=True, metavar="filepath", type=Path, help="The path to a whisper.cpp-compatible model file") argp.add_argument("-m", required=True, metavar="filepath", type=Path, help="The path to a whisper.cpp-compatible model file")
argp.add_argument("-i", required=True, metavar="filepath", type=Path, help="The path to the media file") argp.add_argument("-i", required=True, metavar="filepath", type=Path, help="The path to the media file")
argp.add_argument("-o", required=True, metavar="filepath", type=Path, help="Where to save the output text to") argp.add_argument("-o", required=True, metavar="filepath", type=Path, help="Where to save the output text to")
args = argp.parse_args() args = argp.parse_args()
args.segmax = min(args.segmax, 500) args.summin = max(5, args.summin)
args.summax = max(5, args.summax)
args.segmax = max(5, min(args.segmax, 500))
# create tmpdir # create tmpdir
with TemporaryDirectory(suffix="as") as d: with TemporaryDirectory(suffix="as") as d:
converted_audio_path = (Path(d) / "audio.wav").__str__() converted_audio_path = (Path(d) / "audio.wav").__str__()