Skip to content

Commit

Permalink
Merge pull request #29 from nitrix/fix/issue-28
Browse files Browse the repository at this point in the history
Fixes issues #28, invalid filenames on some OSes
  • Loading branch information
Julian-Chu committed Apr 23, 2020
2 parents 703507d + e8eb1fc commit 100792d
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/.idea
/.vscode
youtubedr
.vscode/launch.json
75 changes: 69 additions & 6 deletions youtube.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"io/ioutil"
"log"
"mime"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -109,23 +110,85 @@ func (y *Youtube) StartDownloadWithQuality(destFile string, quality string) erro
func (y *Youtube) StartDownloadFile() error {
//download highest resolution on [0]
err := errors.New("Empty stream list")
for _, v := range y.StreamList {
url := v["url"]
y.log(fmt.Sprintln("Download url=", url))
for _, stream := range y.StreamList {
streamUrl := stream["url"]
y.log(fmt.Sprintln("Download url=", streamUrl))

// Find out what the file name should be.
fileName := sanitizeFilename(stream["title"])
fileName += pickIdealFileExtension(stream["type"])

usr, _ := user.Current()
fileName := fmt.Sprintf("%s.mov", v["title"])
destFile := filepath.Join(filepath.Join(usr.HomeDir, "Movies", "youtubedr"), fileName)
y.log(fmt.Sprintln("Download to file=", destFile))

err = y.videoDLWorker(destFile, url)
err = y.videoDLWorker(destFile, streamUrl)
if err == nil {
break
return nil
}
}
return err
}

func pickIdealFileExtension(mediaType string) string {
defaultExtension := ".mov"

mediaType, _, err := mime.ParseMediaType(mediaType)
if err != nil {
return defaultExtension
}

// Rely on hardcoded canonical mime types, as the ones provided by Go aren't exhaustive [1].
// This seems to be a recurring problem for youtube downloaders, see [2].
// The implementation is based on mozilla's list [3], IANA [4] and Youtube's support [5].
// [1] https://github.com/golang/go/blob/ed7888aea6021e25b0ea58bcad3f26da2b139432/src/mime/type.go#L60
// [2] https://github.com/ZiTAL/youtube-dl/blob/master/mime.types
// [3] https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
// [4] https://www.iana.org/assignments/media-types/media-types.xhtml#video
// [5] https://support.google.com/youtube/troubleshooter/2888402?hl=en
canonicals := map[string]string {
"video/quicktime": ".mov",
"video/x-msvideo": ".avi",
"video/x-matroska": ".mkv",
"video/mpeg": ".mpeg",
"video/webm": ".webm",
"video/3gpp2": ".3g2",
"video/x-flv": ".flv",
"video/3gpp": ".3gp",
"video/mp4": ".mp4",
"video/ogg": ".ogv",
"video/mp2t": ".ts",
}

if extension, ok := canonicals[mediaType]; ok {
return extension
}

// Our last resort is to ask the operating system, but these give multiple results and are rarely canonical.
extensions, err := mime.ExtensionsByType(mediaType)
if err != nil || extensions == nil {
return defaultExtension
}

return extensions[0]
}

func sanitizeFilename(fileName string) string {
// Characters not allowed on mac
// :/
// Characters not allowed on linux
// /
// Characters not allowed on windows
// <>:"/\|?*

// Ref https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions

fileName = regexp.MustCompile(`[:/<>\:"\\|?*]`).ReplaceAllString(fileName, "")
fileName = regexp.MustCompile(`\s+`).ReplaceAllString(fileName, " ")

return fileName
}

func (y *Youtube) parseVideoInfo() error {
answer, err := url.ParseQuery(y.videoInfo)
if err != nil {
Expand Down
20 changes: 20 additions & 0 deletions youtube_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,23 @@ func TestParseVideo(t *testing.T) {
return
}
}

func TestSanitizeFilename(t *testing.T) {
fileName := "a<b>c:d\\e\"f/g\\h|i?j*k"
sanitized := sanitizeFilename(fileName)
if sanitized != "abcdefghijk" {
t.Error("Invalid characters must get stripped")
}

fileName = "aB Cd"
sanitized = sanitizeFilename(fileName)
if sanitized != "aB Cd" {
t.Error("Casing and whitespaces must be preserved")
}

fileName = "~!@#$%^&()[].,"
sanitized = sanitizeFilename(fileName)
if sanitized != "~!@#$%^&()[].," {
t.Error("The common harmless symbols should remain valid")
}
}

0 comments on commit 100792d

Please sign in to comment.