Skip to content
This repository has been archived by the owner on Nov 18, 2024. It is now read-only.

Commit

Permalink
adds support for the "new" tistory URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
Seklfreak committed Dec 5, 2018
1 parent 2bcf036 commit 2cfba01
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 61 deletions.
8 changes: 8 additions & 0 deletions extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ func getDownloadLinks(inputURL string, channelID string, interactive bool) map[s
return skipDuplicateLinks(links, channelID, interactive)
}
}
if RegexpUrlTistoryLegacy.MatchString(inputURL) {
links, err := getLegacyTistoryUrls(inputURL)
if err != nil {
fmt.Println("legacy tistory URL failed,", inputURL, ",", err)
} else if len(links) > 0 {
return skipDuplicateLinks(links, channelID, interactive)
}
}
if RegexpUrlGfycat.MatchString(inputURL) {
links, err := getGfycatUrls(inputURL)
if err != nil {
Expand Down
23 changes: 3 additions & 20 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,23 +669,6 @@ func getTwitterStatusUrls(url string, channelID string) (map[string]string, erro
return links, nil
}

func getTistoryUrls(url string) (map[string]string, error) {
url = strings.Replace(url, "/image/", "/original/", -1)
return map[string]string{url: ""}, nil
}

func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
parameters, _ := url.ParseQuery(urlI)
if val, ok := parameters["fname"]; ok {
if len(val) > 0 {
if RegexpUrlTistory.MatchString(val[0]) {
return getTistoryUrls(val[0])
}
}
}
return nil, nil
}

func getGfycatUrls(url string) (map[string]string, error) {
parts := strings.Split(url, "/")
if len(parts) < 3 {
Expand Down Expand Up @@ -1051,8 +1034,8 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) {
foundUrl, exists := s.Attr("src")
if exists {
isTistoryCdnUrl := RegexpUrlTistoryWithCDN.MatchString(foundUrl)
isTistoryUrl := RegexpUrlTistory.MatchString(foundUrl)
isTistoryCdnUrl := RegexpUrlTistoryLegacyWithCDN.MatchString(foundUrl)
isTistoryUrl := RegexpUrlTistoryLegacy.MatchString(foundUrl)
if isTistoryCdnUrl == true {
finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl)
if len(finalTistoryUrls) > 0 {
Expand All @@ -1062,7 +1045,7 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
}
}
} else if isTistoryUrl == true {
finalTistoryUrls, _ := getTistoryUrls(foundUrl)
finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl)
if len(finalTistoryUrls) > 0 {
for finalTistoryUrl := range finalTistoryUrls {
foundFilename := s.AttrOr("filename", "")
Expand Down
6 changes: 3 additions & 3 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ import (

func init() {
RegexpUrlTwitter, _ = regexp.Compile(REGEXP_URL_TWITTER)
RegexpUrlTistory, _ = regexp.Compile(REGEXP_URL_TISTORY)
RegexpUrlTistoryWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN)
RegexpUrlTistoryLegacy, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY)
RegexpUrlTistoryLegacyWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN)
RegexpUrlGfycat, _ = regexp.Compile(REGEXP_URL_GFYCAT)
RegexpUrlInstagram, _ = regexp.Compile(REGEXP_URL_INSTAGRAM)
RegexpUrlImgurSingle, _ = regexp.Compile(REGEXP_URL_IMGUR_SINGLE)
Expand Down Expand Up @@ -77,7 +77,7 @@ var getTistoryUrlsTests = []urlsTestpair{

func TestGetTistoryUrls(t *testing.T) {
for _, pair := range getTistoryUrlsTests {
v, err := getTistoryUrls(pair.value)
v, err := getLegacyTistoryUrls(pair.value)
if err != nil {
t.Errorf("For %v, expected %v, got %v", pair.value, nil, err)
}
Expand Down
58 changes: 42 additions & 16 deletions regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,44 @@ import (
"regexp"
)

const (
REGEXP_URL_TWITTER = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$`
REGEXP_URL_TWITTER_STATUS = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$`
REGEXP_URL_TISTORY = `^http(s?):\/\/t[0-9]+\.daumcdn\.net\/cfile\/tistory\/([A-Z0-9]+?)(\?original)?$`
REGEXP_URL_TISTORY_LEGACY = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$`
REGEXP_URL_TISTORY_LEGACY_WITH_CDN = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$`
REGEXP_URL_GFYCAT = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$`
REGEXP_URL_INSTAGRAM = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$`
REGEXP_URL_IMGUR_SINGLE = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$`
REGEXP_URL_IMGUR_ALBUM = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$`
REGEXP_URL_GOOGLEDRIVE = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$`
REGEXP_URL_GOOGLEDRIVE_FOLDER = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$`
REGEXP_URL_POSSIBLE_TISTORY_SITE = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$`
REGEXP_URL_FLICKR_PHOTO = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$`
REGEXP_URL_FLICKR_ALBUM = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$`
REGEXP_URL_FLICKR_ALBUM_SHORT = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$`
REGEXP_URL_STREAMABLE = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$`

REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$`
)

var (
RegexpUrlTwitter *regexp.Regexp
RegexpUrlTwitterStatus *regexp.Regexp
RegexpUrlTistory *regexp.Regexp
RegexpUrlTistoryWithCDN *regexp.Regexp
RegexpUrlGfycat *regexp.Regexp
RegexpUrlInstagram *regexp.Regexp
RegexpUrlImgurSingle *regexp.Regexp
RegexpUrlImgurAlbum *regexp.Regexp
RegexpUrlGoogleDrive *regexp.Regexp
RegexpUrlGoogleDriveFolder *regexp.Regexp
RegexpUrlPossibleTistorySite *regexp.Regexp
RegexpUrlFlickrPhoto *regexp.Regexp
RegexpUrlFlickrAlbum *regexp.Regexp
RegexpUrlFlickrAlbumShort *regexp.Regexp
RegexpUrlStreamable *regexp.Regexp
RegexpUrlTwitter *regexp.Regexp
RegexpUrlTwitterStatus *regexp.Regexp
RegexpUrlTistory *regexp.Regexp
RegexpUrlTistoryLegacy *regexp.Regexp
RegexpUrlTistoryLegacyWithCDN *regexp.Regexp
RegexpUrlGfycat *regexp.Regexp
RegexpUrlInstagram *regexp.Regexp
RegexpUrlImgurSingle *regexp.Regexp
RegexpUrlImgurAlbum *regexp.Regexp
RegexpUrlGoogleDrive *regexp.Regexp
RegexpUrlGoogleDriveFolder *regexp.Regexp
RegexpUrlPossibleTistorySite *regexp.Regexp
RegexpUrlFlickrPhoto *regexp.Regexp
RegexpUrlFlickrAlbum *regexp.Regexp
RegexpUrlFlickrAlbumShort *regexp.Regexp
RegexpUrlStreamable *regexp.Regexp
)

func initRegex() error {
Expand All @@ -36,7 +58,11 @@ func initRegex() error {
if err != nil {
return err
}
RegexpUrlTistoryWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN)
RegexpUrlTistoryLegacy, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY)
if err != nil {
return err
}
RegexpUrlTistoryLegacyWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN)
if err != nil {
return err
}
Expand Down
33 changes: 33 additions & 0 deletions tistory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package main

import (
"net/url"
"strings"
)

// getTistoryUrls downloads tistory URLs
// http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…]
// http://t1.daumcdn.net/cfile/tistory/[…]?original => as is
func getTistoryUrls(link string) (map[string]string, error) {
if !strings.HasSuffix(link, "?original") {
link += "?original"
}
return map[string]string{link: ""}, nil
}

func getLegacyTistoryUrls(link string) (map[string]string, error) {
link = strings.Replace(link, "/image/", "/original/", -1)
return map[string]string{link: ""}, nil
}

func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
parameters, _ := url.ParseQuery(urlI)
if val, ok := parameters["fname"]; ok {
if len(val) > 0 {
if RegexpUrlTistoryLegacy.MatchString(val[0]) {
return getLegacyTistoryUrls(val[0])
}
}
}
return nil, nil
}
27 changes: 5 additions & 22 deletions vars.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,11 @@
package main

const (
VERSION = "1.32"
DATABASE_DIR = "database"
RELEASE_URL = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest"
RELEASE_API_URL = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest"
IMGUR_CLIENT_ID = "a39473314df3f59"
REGEXP_URL_TWITTER = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$`
REGEXP_URL_TWITTER_STATUS = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$`
REGEXP_URL_TISTORY = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$`
REGEXP_URL_TISTORY_WITH_CDN = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$`
REGEXP_URL_GFYCAT = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$`
REGEXP_URL_INSTAGRAM = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$`
REGEXP_URL_IMGUR_SINGLE = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$`
REGEXP_URL_IMGUR_ALBUM = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$`
REGEXP_URL_GOOGLEDRIVE = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$`
REGEXP_URL_GOOGLEDRIVE_FOLDER = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$`
REGEXP_URL_POSSIBLE_TISTORY_SITE = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$`
REGEXP_URL_FLICKR_PHOTO = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$`
REGEXP_URL_FLICKR_ALBUM = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$`
REGEXP_URL_FLICKR_ALBUM_SHORT = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$`
REGEXP_URL_STREAMABLE = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$`

REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$`
VERSION = "1.33"
DATABASE_DIR = "database"
RELEASE_URL = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest"
RELEASE_API_URL = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest"
IMGUR_CLIENT_ID = "a39473314df3f59"

DEFAULT_CONFIG_FILE = "config.ini"

Expand Down

0 comments on commit 2cfba01

Please sign in to comment.