adds support for the "new" tistory URLs

Seklfreak · Dec 5, 2018 · 2cfba01 · 2cfba01
1 parent 2bcf036
commit 2cfba01
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 61 deletions.
diff --git a/extract.go b/extract.go
@@ -30,6 +30,14 @@ func getDownloadLinks(inputURL string, channelID string, interactive bool) map[s
 			return skipDuplicateLinks(links, channelID, interactive)
 		}
 	}
+	if RegexpUrlTistoryLegacy.MatchString(inputURL) {
+		links, err := getLegacyTistoryUrls(inputURL)
+		if err != nil {
+			fmt.Println("legacy tistory URL failed,", inputURL, ",", err)
+		} else if len(links) > 0 {
+			return skipDuplicateLinks(links, channelID, interactive)
+		}
+	}
 	if RegexpUrlGfycat.MatchString(inputURL) {
 		links, err := getGfycatUrls(inputURL)
 		if err != nil {

diff --git a/main.go b/main.go
@@ -669,23 +669,6 @@ func getTwitterStatusUrls(url string, channelID string) (map[string]string, erro
 	return links, nil
 }
 
-func getTistoryUrls(url string) (map[string]string, error) {
-	url = strings.Replace(url, "/image/", "/original/", -1)
-	return map[string]string{url: ""}, nil
-}
-
-func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
-	parameters, _ := url.ParseQuery(urlI)
-	if val, ok := parameters["fname"]; ok {
-		if len(val) > 0 {
-			if RegexpUrlTistory.MatchString(val[0]) {
-				return getTistoryUrls(val[0])
-			}
-		}
-	}
-	return nil, nil
-}
-
 func getGfycatUrls(url string) (map[string]string, error) {
 	parts := strings.Split(url, "/")
 	if len(parts) < 3 {
@@ -1051,8 +1034,8 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
 	doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) {
 		foundUrl, exists := s.Attr("src")
 		if exists {
-			isTistoryCdnUrl := RegexpUrlTistoryWithCDN.MatchString(foundUrl)
-			isTistoryUrl := RegexpUrlTistory.MatchString(foundUrl)
+			isTistoryCdnUrl := RegexpUrlTistoryLegacyWithCDN.MatchString(foundUrl)
+			isTistoryUrl := RegexpUrlTistoryLegacy.MatchString(foundUrl)
 			if isTistoryCdnUrl == true {
 				finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl)
 				if len(finalTistoryUrls) > 0 {
@@ -1062,7 +1045,7 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
 					}
 				}
 			} else if isTistoryUrl == true {
-				finalTistoryUrls, _ := getTistoryUrls(foundUrl)
+				finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl)
 				if len(finalTistoryUrls) > 0 {
 					for finalTistoryUrl := range finalTistoryUrls {
 						foundFilename := s.AttrOr("filename", "")

diff --git a/main_test.go b/main_test.go
@@ -12,8 +12,8 @@ import (
 
 func init() {
 	RegexpUrlTwitter, _ = regexp.Compile(REGEXP_URL_TWITTER)
-	RegexpUrlTistory, _ = regexp.Compile(REGEXP_URL_TISTORY)
-	RegexpUrlTistoryWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN)
+	RegexpUrlTistoryLegacy, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY)
+	RegexpUrlTistoryLegacyWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN)
 	RegexpUrlGfycat, _ = regexp.Compile(REGEXP_URL_GFYCAT)
 	RegexpUrlInstagram, _ = regexp.Compile(REGEXP_URL_INSTAGRAM)
 	RegexpUrlImgurSingle, _ = regexp.Compile(REGEXP_URL_IMGUR_SINGLE)
@@ -77,7 +77,7 @@ var getTistoryUrlsTests = []urlsTestpair{
 
 func TestGetTistoryUrls(t *testing.T) {
 	for _, pair := range getTistoryUrlsTests {
-		v, err := getTistoryUrls(pair.value)
+		v, err := getLegacyTistoryUrls(pair.value)
 		if err != nil {
 			t.Errorf("For %v, expected %v, got %v", pair.value, nil, err)
 		}

diff --git a/regex.go b/regex.go
@@ -4,22 +4,44 @@ import (
 	"regexp"
 )
 
+const (
+	REGEXP_URL_TWITTER                 = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$`
+	REGEXP_URL_TWITTER_STATUS          = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$`
+	REGEXP_URL_TISTORY                 = `^http(s?):\/\/t[0-9]+\.daumcdn\.net\/cfile\/tistory\/([A-Z0-9]+?)(\?original)?$`
+	REGEXP_URL_TISTORY_LEGACY          = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$`
+	REGEXP_URL_TISTORY_LEGACY_WITH_CDN = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$`
+	REGEXP_URL_GFYCAT                  = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$`
+	REGEXP_URL_INSTAGRAM               = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$`
+	REGEXP_URL_IMGUR_SINGLE            = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$`
+	REGEXP_URL_IMGUR_ALBUM             = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$`
+	REGEXP_URL_GOOGLEDRIVE             = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$`
+	REGEXP_URL_GOOGLEDRIVE_FOLDER      = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$`
+	REGEXP_URL_POSSIBLE_TISTORY_SITE   = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$`
+	REGEXP_URL_FLICKR_PHOTO            = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$`
+	REGEXP_URL_FLICKR_ALBUM            = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$`
+	REGEXP_URL_FLICKR_ALBUM_SHORT      = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$`
+	REGEXP_URL_STREAMABLE              = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$`
+
+	REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$`
+)
+
 var (
-	RegexpUrlTwitter             *regexp.Regexp
-	RegexpUrlTwitterStatus       *regexp.Regexp
-	RegexpUrlTistory             *regexp.Regexp
-	RegexpUrlTistoryWithCDN      *regexp.Regexp
-	RegexpUrlGfycat              *regexp.Regexp
-	RegexpUrlInstagram           *regexp.Regexp
-	RegexpUrlImgurSingle         *regexp.Regexp
-	RegexpUrlImgurAlbum          *regexp.Regexp
-	RegexpUrlGoogleDrive         *regexp.Regexp
-	RegexpUrlGoogleDriveFolder   *regexp.Regexp
-	RegexpUrlPossibleTistorySite *regexp.Regexp
-	RegexpUrlFlickrPhoto         *regexp.Regexp
-	RegexpUrlFlickrAlbum         *regexp.Regexp
-	RegexpUrlFlickrAlbumShort    *regexp.Regexp
-	RegexpUrlStreamable          *regexp.Regexp
+	RegexpUrlTwitter              *regexp.Regexp
+	RegexpUrlTwitterStatus        *regexp.Regexp
+	RegexpUrlTistory              *regexp.Regexp
+	RegexpUrlTistoryLegacy        *regexp.Regexp
+	RegexpUrlTistoryLegacyWithCDN *regexp.Regexp
+	RegexpUrlGfycat               *regexp.Regexp
+	RegexpUrlInstagram            *regexp.Regexp
+	RegexpUrlImgurSingle          *regexp.Regexp
+	RegexpUrlImgurAlbum           *regexp.Regexp
+	RegexpUrlGoogleDrive          *regexp.Regexp
+	RegexpUrlGoogleDriveFolder    *regexp.Regexp
+	RegexpUrlPossibleTistorySite  *regexp.Regexp
+	RegexpUrlFlickrPhoto          *regexp.Regexp
+	RegexpUrlFlickrAlbum          *regexp.Regexp
+	RegexpUrlFlickrAlbumShort     *regexp.Regexp
+	RegexpUrlStreamable           *regexp.Regexp
 )
 
 func initRegex() error {
@@ -36,7 +58,11 @@ func initRegex() error {
 	if err != nil {
 		return err
 	}
-	RegexpUrlTistoryWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN)
+	RegexpUrlTistoryLegacy, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY)
+	if err != nil {
+		return err
+	}
+	RegexpUrlTistoryLegacyWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN)
 	if err != nil {
 		return err
 	}

diff --git a/tistory.go b/tistory.go
@@ -0,0 +1,33 @@
+package main
+
+import (
+	"net/url"
+	"strings"
+)
+
+// getTistoryUrls downloads tistory URLs
+// http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…]
+// http://t1.daumcdn.net/cfile/tistory/[…]?original => as is
+func getTistoryUrls(link string) (map[string]string, error) {
+	if !strings.HasSuffix(link, "?original") {
+		link += "?original"
+	}
+	return map[string]string{link: ""}, nil
+}
+
+func getLegacyTistoryUrls(link string) (map[string]string, error) {
+	link = strings.Replace(link, "/image/", "/original/", -1)
+	return map[string]string{link: ""}, nil
+}
+
+func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
+	parameters, _ := url.ParseQuery(urlI)
+	if val, ok := parameters["fname"]; ok {
+		if len(val) > 0 {
+			if RegexpUrlTistoryLegacy.MatchString(val[0]) {
+				return getLegacyTistoryUrls(val[0])
+			}
+		}
+	}
+	return nil, nil
+}
diff --git a/vars.go b/vars.go
@@ -1,28 +1,11 @@
 package main
 
 const (
-	VERSION                          = "1.32"
-	DATABASE_DIR                     = "database"
-	RELEASE_URL                      = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest"
-	RELEASE_API_URL                  = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest"
-	IMGUR_CLIENT_ID                  = "a39473314df3f59"
-	REGEXP_URL_TWITTER               = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$`
-	REGEXP_URL_TWITTER_STATUS        = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$`
-	REGEXP_URL_TISTORY               = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$`
-	REGEXP_URL_TISTORY_WITH_CDN      = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$`
-	REGEXP_URL_GFYCAT                = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$`
-	REGEXP_URL_INSTAGRAM             = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$`
-	REGEXP_URL_IMGUR_SINGLE          = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$`
-	REGEXP_URL_IMGUR_ALBUM           = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$`
-	REGEXP_URL_GOOGLEDRIVE           = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$`
-	REGEXP_URL_GOOGLEDRIVE_FOLDER    = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$`
-	REGEXP_URL_POSSIBLE_TISTORY_SITE = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$`
-	REGEXP_URL_FLICKR_PHOTO          = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$`
-	REGEXP_URL_FLICKR_ALBUM          = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$`
-	REGEXP_URL_FLICKR_ALBUM_SHORT    = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$`
-	REGEXP_URL_STREAMABLE            = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$`
-
-	REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$`
+	VERSION         = "1.33"
+	DATABASE_DIR    = "database"
+	RELEASE_URL     = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest"
+	RELEASE_API_URL = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest"
+	IMGUR_CLIENT_ID = "a39473314df3f59"
 
 	DEFAULT_CONFIG_FILE = "config.ini"