From 480e073fa9be184b6839619c38795af582247c19 Mon Sep 17 00:00:00 2001 From: syeopite Date: Fri, 8 Dec 2023 18:20:17 -0800 Subject: [PATCH 1/6] Use HTTP pools for image requests to YouTube --- src/invidious.cr | 8 ++++++++ src/invidious/routes/images.cr | 12 +++++------- src/invidious/yt_backend/connection_pool.cr | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 3804197e..81db2c6c 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -92,6 +92,14 @@ SOFTWARE = { YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size) +# Image request pool + +GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size) + +# Mapping of subdomain => YoutubeConnectionPool +# This is needed as we may need to access arbitrary subdomains of ytimg +YTIMG_POOLS = {} of String => YoutubeConnectionPool + # CLI Kemal.config.extra_options do |parser| parser.banner = "Usage: invidious [arguments]" diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index b6a2e110..1964d597 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -32,7 +32,7 @@ module Invidious::Routes::Images } begin - HTTP::Client.get("https://yt3.ggpht.com#{url}") do |resp| + GGPHT_POOL.client &.get(url) do |resp| return request_proc.call(resp) end rescue ex @@ -80,7 +80,7 @@ module Invidious::Routes::Images } begin - HTTP::Client.get("https://#{authority}.ytimg.com#{url}") do |resp| + get_ytimg_pool(authority).client &.get(url) do |resp| return request_proc.call(resp) end rescue ex @@ -119,7 +119,7 @@ module Invidious::Routes::Images } begin - HTTP::Client.get("https://i9.ytimg.com#{url}") do |resp| + get_ytimg_pool("i9").client &.get(url) do |resp| return request_proc.call(resp) end rescue ex @@ -165,8 +165,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - # This can likely be optimized into a (small) pool sometime in the future. - if HTTP::Client.head("https://i.ytimg.com#{thumbnail_resource_path}").status_code == 200 + if get_ytimg_pool("i9").client &.head(thumbnail_resource_path).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -199,8 +198,7 @@ module Invidious::Routes::Images } begin - # This can likely be optimized into a (small) pool sometime in the future. - HTTP::Client.get("https://i.ytimg.com#{url}") do |resp| + get_ytimg_pool("i").client &.get(url) do |resp| return request_proc.call(resp) end rescue ex diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr index ca612083..26bf2773 100644 --- a/src/invidious/yt_backend/connection_pool.cr +++ b/src/invidious/yt_backend/connection_pool.cr @@ -77,3 +77,18 @@ def make_client(url : URI, region = nil, force_resolve : Bool = false, &) client.close end end + +# Fetches a HTTP pool for the specified subdomain of ytimg.com +# +# Creates a new one when the specified pool for the subdomain does not exist +def get_ytimg_pool(subdomain) + if pool = YTIMG_POOLS[subdomain]? + return pool + else + LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"") + pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size) + YTIMG_POOLS[subdomain] = pool + + return pool + end +end From 52bc9aa328e44ff32bb1d7f2e05625e4080459c7 Mon Sep 17 00:00:00 2001 From: syeopite Date: Fri, 8 Dec 2023 18:42:40 -0800 Subject: [PATCH 2/6] Refactor duplicate logic in image routes --- src/invidious/routes/images.cr | 97 ++++++++-------------------------- 1 file changed, 21 insertions(+), 76 deletions(-) diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 1964d597..7fdd33b0 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -11,29 +11,9 @@ module Invidious::Routes::Images end end - # We're encapsulating this into a proc in order to easily reuse this - # portion of the code for each request block below. - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 - env.response.headers.delete("Transfer-Encoding") - return - end - - proxy_file(response, env) - } - begin GGPHT_POOL.client &.get(url) do |resp| - return request_proc.call(resp) + return self.proxy_image(env, resp) end rescue ex end @@ -61,27 +41,9 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Connection"] = "close" - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 - return env.response.headers.delete("Transfer-Encoding") - end - - proxy_file(response, env) - } - begin get_ytimg_pool(authority).client &.get(url) do |resp| - return request_proc.call(resp) + return self.proxy_image(env, resp) end rescue ex end @@ -101,26 +63,9 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 && response.status_code != 404 - return env.response.headers.delete("Transfer-Encoding") - end - - proxy_file(response, env) - } - begin get_ytimg_pool("i9").client &.get(url) do |resp| - return request_proc.call(resp) + return self.proxy_image(env, resp) end rescue ex end @@ -180,28 +125,28 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 && response.status_code != 404 - return env.response.headers.delete("Transfer-Encoding") - end - - proxy_file(response, env) - } - begin get_ytimg_pool("i").client &.get(url) do |resp| - return request_proc.call(resp) + return self.proxy_image(env, resp) end rescue ex end end + + private def self.proxy_image(env, response) + env.response.status_code = response.status_code + response.headers.each do |key, value| + if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) + env.response.headers[key] = value + end + end + + env.response.headers["Access-Control-Allow-Origin"] = "*" + + if response.status_code >= 300 + return env.response.headers.delete("Transfer-Encoding") + end + + return proxy_file(response, env) + end end From 06e1a508e8dc5417a61a02ad1eb08e94fb24ae99 Mon Sep 17 00:00:00 2001 From: syeopite Date: Fri, 8 Dec 2023 18:52:11 -0800 Subject: [PATCH 3/6] Fix headers not being added in image requests Regression from #2364 --- src/invidious/routes/images.cr | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 7fdd33b0..c4197746 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -12,7 +12,7 @@ module Invidious::Routes::Images end begin - GGPHT_POOL.client &.get(url) do |resp| + GGPHT_POOL.client &.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -42,7 +42,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool(authority).client &.get(url) do |resp| + get_ytimg_pool(authority).client &.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -64,7 +64,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool("i9").client &.get(url) do |resp| + get_ytimg_pool("i9").client &.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -110,7 +110,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - if get_ytimg_pool("i9").client &.head(thumbnail_resource_path).status_code == 200 + if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -126,7 +126,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool("i").client &.get(url) do |resp| + get_ytimg_pool("i").client &.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex From 4bc77b81bf994336e324d84ab82a362b330c827d Mon Sep 17 00:00:00 2001 From: syeopite Date: Sat, 23 Dec 2023 13:47:47 -0800 Subject: [PATCH 4/6] Move YTIMG_POOLS to connection_pool.cr --- src/invidious.cr | 4 --- src/invidious/yt_backend/connection_pool.cr | 32 ++++++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 81db2c6c..e0e72415 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -96,10 +96,6 @@ YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size) GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size) -# Mapping of subdomain => YoutubeConnectionPool -# This is needed as we may need to access arbitrary subdomains of ytimg -YTIMG_POOLS = {} of String => YoutubeConnectionPool - # CLI Kemal.config.extra_options do |parser| parser.banner = "Usage: invidious [arguments]" diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr index 26bf2773..646d0d1a 100644 --- a/src/invidious/yt_backend/connection_pool.cr +++ b/src/invidious/yt_backend/connection_pool.cr @@ -1,17 +1,6 @@ -def add_yt_headers(request) - request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal" - request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" - - request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" - request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - request.headers["Accept-Language"] ||= "en-us,en;q=0.5" - - # Preserve original cookies and add new YT consent cookie for EU servers - request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}" - if !CONFIG.cookies.empty? - request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" - end -end +# Mapping of subdomain => YoutubeConnectionPool +# This is needed as we may need to access arbitrary subdomains of ytimg +private YTIMG_POOLS = {} of String => YoutubeConnectionPool struct YoutubeConnectionPool property! url : URI @@ -54,6 +43,21 @@ struct YoutubeConnectionPool end end +def add_yt_headers(request) + request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal" + request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" + + request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + request.headers["Accept-Language"] ||= "en-us,en;q=0.5" + + # Preserve original cookies and add new YT consent cookie for EU servers + request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}" + if !CONFIG.cookies.empty? + request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" + end +end + def make_client(url : URI, region = nil, force_resolve : Bool = false) client = HTTP::Client.new(url) From 003c6f81dcf6399d1fa808866d0806b915a713ee Mon Sep 17 00:00:00 2001 From: syeopite Date: Mon, 8 Jan 2024 14:13:38 -0800 Subject: [PATCH 5/6] Preserve connection close header of get_storyboard --- src/invidious/routes/images.cr | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index c4197746..251258ec 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -41,9 +41,14 @@ module Invidious::Routes::Images end end + # A callable proc to be used inside #proxy_image + callable_proc = ->(env : HTTP::Server::Context) { + env.response.headers["Connection"] = "close" + } + begin get_ytimg_pool(authority).client &.get(url, headers) do |resp| - return self.proxy_image(env, resp) + return self.proxy_image(env, resp, callable_proc: callable_proc) end rescue ex end @@ -133,7 +138,7 @@ module Invidious::Routes::Images end end - private def self.proxy_image(env, response) + private def self.proxy_image(env, response, callable_proc = nil) env.response.status_code = response.status_code response.headers.each do |key, value| if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) @@ -143,6 +148,10 @@ module Invidious::Routes::Images env.response.headers["Access-Control-Allow-Origin"] = "*" + if callable_proc + callable_proc.call(env) + end + if response.status_code >= 300 return env.response.headers.delete("Transfer-Encoding") end From 75b68618ab14a9f884ee7215a467bc510e8bd2c2 Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 25 Apr 2024 13:28:58 -0700 Subject: [PATCH 6/6] Remove useless proc usage in images.cr --- src/invidious/routes/images.cr | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 251258ec..639697db 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -41,14 +41,10 @@ module Invidious::Routes::Images end end - # A callable proc to be used inside #proxy_image - callable_proc = ->(env : HTTP::Server::Context) { - env.response.headers["Connection"] = "close" - } - begin get_ytimg_pool(authority).client &.get(url, headers) do |resp| - return self.proxy_image(env, resp, callable_proc: callable_proc) + env.response.headers["Connection"] = "close" + return self.proxy_image(env, resp) end rescue ex end @@ -138,7 +134,7 @@ module Invidious::Routes::Images end end - private def self.proxy_image(env, response, callable_proc = nil) + private def self.proxy_image(env, response) env.response.status_code = response.status_code response.headers.each do |key, value| if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) @@ -148,10 +144,6 @@ module Invidious::Routes::Images env.response.headers["Access-Control-Allow-Origin"] = "*" - if callable_proc - callable_proc.call(env) - end - if response.status_code >= 300 return env.response.headers.delete("Transfer-Encoding") end