From f59300977c8dd1e558010bf26788bef58e473b18 Mon Sep 17 00:00:00 2001 From: Alex Belanger Date: Tue, 21 Apr 2020 21:48:12 -0400 Subject: [PATCH 1/3] Fixes issues #28, invalid filenames on some OSes --- youtube.go | 46 ++++++++++++++++++++++++++++++++++++++++------ youtube_test.go | 20 ++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/youtube.go b/youtube.go index cb43a0d9..32fb575f 100644 --- a/youtube.go +++ b/youtube.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "log" + "mime" "net/http" "net/url" "os" @@ -109,23 +110,56 @@ func (y *Youtube) StartDownloadWithQuality(destFile string, quality string) erro func (y *Youtube) StartDownloadFile() error { //download highest resolution on [0] err := errors.New("Empty stream list") - for _, v := range y.StreamList { - url := v["url"] - y.log(fmt.Sprintln("Download url=", url)) + for _, stream := range y.StreamList { + streamUrl := stream["url"] + streamType := stream["type"] + y.log(fmt.Sprintln("Download url=", streamUrl)) + + // Find out what the file name should be. + fileName := sanitizeFilename(stream["title"]) + + // Find out what the file extension should be. + fileExtensions, err := mime.ExtensionsByType(streamType) + if err != nil { + fileName += ".mov" + } else { + fileName += fileExtensions[0] + } usr, _ := user.Current() - fileName := fmt.Sprintf("%s.mov", v["title"]) destFile := filepath.Join(filepath.Join(usr.HomeDir, "Movies", "youtubedr"), fileName) y.log(fmt.Sprintln("Download to file=", destFile)) - err = y.videoDLWorker(destFile, url) + err = y.videoDLWorker(destFile, streamUrl) if err == nil { - break + return nil } } return err } +func sanitizeFilename(fileName string) string { + // Characters not allowed on mac + // :/ + // Characters not allowed on linux + // / + // Characters not allowed on windows + // <>:"/\|?* + + // Ref https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions + + reg, err := regexp.Compile(`[:/<>\:"\\|?*]`) + if err != nil { + log.Fatal(err) + } + + fileName = reg.ReplaceAllString(fileName, "") + fileName = strings.ReplaceAll(fileName, " ", " ") + fileName = strings.ReplaceAll(fileName, " ", " ") + + return fileName +} + func (y *Youtube) parseVideoInfo() error { answer, err := url.ParseQuery(y.videoInfo) if err != nil { diff --git a/youtube_test.go b/youtube_test.go index dfded0b2..5e5da639 100644 --- a/youtube_test.go +++ b/youtube_test.go @@ -63,3 +63,23 @@ func TestParseVideo(t *testing.T) { return } } + +func TestSanitizeFilename(t *testing.T) { + fileName := "ac:d\\e\"f/g\\h|i?j*k" + sanitized := sanitizeFilename(fileName) + if sanitized != "abcdefghijk" { + t.Error("Invalid characters must get stripped") + } + + fileName = "aB Cd" + sanitized = sanitizeFilename(fileName) + if sanitized != "aB Cd" { + t.Error("Casing and whitespaces must be preserved") + } + + fileName = "~!@#$%^&()[].," + sanitized = sanitizeFilename(fileName) + if sanitized != "~!@#$%^&()[].," { + t.Error("The common harmless symbols should remain valid") + } +} \ No newline at end of file From 7b5d63f8056218d3f13844693c0bfbb34d0f25cc Mon Sep 17 00:00:00 2001 From: Alex Belanger Date: Wed, 22 Apr 2020 20:09:40 -0400 Subject: [PATCH 2/3] Better mime type/extension conversion --- youtube.go | 63 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/youtube.go b/youtube.go index 32fb575f..19cf0f6c 100644 --- a/youtube.go +++ b/youtube.go @@ -112,19 +112,11 @@ func (y *Youtube) StartDownloadFile() error { err := errors.New("Empty stream list") for _, stream := range y.StreamList { streamUrl := stream["url"] - streamType := stream["type"] y.log(fmt.Sprintln("Download url=", streamUrl)) // Find out what the file name should be. fileName := sanitizeFilename(stream["title"]) - - // Find out what the file extension should be. - fileExtensions, err := mime.ExtensionsByType(streamType) - if err != nil { - fileName += ".mov" - } else { - fileName += fileExtensions[0] - } + fileName += pickIdealFileExtension(stream["type"]) usr, _ := user.Current() destFile := filepath.Join(filepath.Join(usr.HomeDir, "Movies", "youtubedr"), fileName) @@ -138,6 +130,49 @@ func (y *Youtube) StartDownloadFile() error { return err } +func pickIdealFileExtension(mediaType string) string { + defaultExtension := ".mov" + + mediaType, _, err := mime.ParseMediaType(mediaType) + if err != nil { + return defaultExtension + } + + // Rely on hardcoded canonical mime types, as the ones provided by Go aren't exhaustive [1]. + // This seems to be a recurring problem for youtube downloaders, see [2]. + // The implementation is based on mozilla's list [3], IANA [4] and Youtube's support [5]. + // [1] https://github.com/golang/go/blob/ed7888aea6021e25b0ea58bcad3f26da2b139432/src/mime/type.go#L60 + // [2] https://github.com/ZiTAL/youtube-dl/blob/master/mime.types + // [3] https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types + // [4] https://www.iana.org/assignments/media-types/media-types.xhtml#video + // [5] https://support.google.com/youtube/troubleshooter/2888402?hl=en + canonicals := map[string]string { + "video/quicktime": ".mov", + "video/x-msvideo": ".avi", + "video/x-matroska": ".mkv", + "video/mpeg": ".mpeg", + "video/webm": ".webm", + "video/3gpp2": ".3g2", + "video/x-flv": ".flv", + "video/3gpp": ".3gp", + "video/mp4": ".mp4", + "video/ogg": ".ogv", + "video/mp2t": ".ts", + } + + if extension, ok := canonicals[mediaType]; ok { + return extension + } + + // Our last resort is to ask the operating system, but these give multiple results and are rarely canonical. + extensions, err := mime.ExtensionsByType(mediaType) + if err != nil || extensions == nil { + return defaultExtension + } + + return extensions[0] +} + func sanitizeFilename(fileName string) string { // Characters not allowed on mac // :/ @@ -148,14 +183,8 @@ func sanitizeFilename(fileName string) string { // Ref https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions - reg, err := regexp.Compile(`[:/<>\:"\\|?*]`) - if err != nil { - log.Fatal(err) - } - - fileName = reg.ReplaceAllString(fileName, "") - fileName = strings.ReplaceAll(fileName, " ", " ") - fileName = strings.ReplaceAll(fileName, " ", " ") + fileName = regexp.MustCompile(`[:/<>\:"\\|?*]`).ReplaceAllString(fileName, "") + fileName = regexp.MustCompile(`\s+`).ReplaceAllString(fileName, " ") return fileName } From e8eb1fcfe1f0e28bb19c8ee07711e7753163573f Mon Sep 17 00:00:00 2001 From: Alex Belanger Date: Wed, 22 Apr 2020 20:10:08 -0400 Subject: [PATCH 3/3] Added ignore rules for common IDEs --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3c75998f..8d79a67b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +/.idea +/.vscode youtubedr -.vscode/launch.json