From e9b2db4d802d8714a37101851e4325c585c613cf Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 14 Nov 2024 16:20:23 -0800 Subject: [PATCH] Connection pool: ensure response is fully read The streaming API of HTTP::Client has an internal buffer that will continue to persist onto the next request unless the response is fully read. This commit privatizes the #client method of Pool and instead expose various HTTP request methods that will call and yield the underlying request and response. This way, we can ensure that the resposne is fully read before the client is passed back into the pool for another request. --- src/invidious/channels/channels.cr | 2 +- src/invidious/connection/pool.cr | 29 +++++++++++++++++++++++-- src/invidious/mixes.cr | 2 +- src/invidious/routes/api/manifest.cr | 6 ++--- src/invidious/routes/api/v1/videos.cr | 6 ++--- src/invidious/routes/channels.cr | 2 +- src/invidious/routes/embed.cr | 2 +- src/invidious/routes/errors.cr | 6 ++--- src/invidious/routes/feeds.cr | 5 +++-- src/invidious/routes/images.cr | 12 +++++----- src/invidious/routes/playlists.cr | 2 +- src/invidious/search/processors.cr | 6 ++--- src/invidious/yt_backend/youtube_api.cr | 16 ++++++-------- 13 files changed, 60 insertions(+), 36 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 65982325..f71c4293 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -166,7 +166,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) } LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") - rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body + rss = YT_POOL.get("/feeds/videos.xml?channel_id=#{ucid}").body LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") rss = XML.parse(rss) diff --git a/src/invidious/connection/pool.cr b/src/invidious/connection/pool.cr index 9bb0f5e0..139c7e77 100644 --- a/src/invidious/connection/pool.cr +++ b/src/invidious/connection/pool.cr @@ -33,8 +33,33 @@ module Invidious::ConnectionPool # Returns the underlying `DB::Pool` object abstract def pool : DB::Pool(PoolClient) - # Checks out a client from the pool - def client(&) + {% for method in %w[get post put patch delete head options] %} + def {{method.id}}(*args, **kwargs) + self.client do | client | + client.{{method.id}}(*args, **kwargs) do | response | + + result = yield response + return result + + ensure + response.body_io?.try &. skip_to_end + end + end + end + + def {{method.id}}(*args, **kwargs) + self.client do | client | + return response = client.{{method.id}}(*args, **kwargs) + ensure + if response + response.body_io?.try &. skip_to_end + end + end + end + {% end %} + + # Checks out a client in the pool + private def client(&) pool.checkout do |http_client| # Proxy needs to be reinstated every time we get a client from the pool http_client.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr index 28ff0ff6..6728ff47 100644 --- a/src/invidious/mixes.cr +++ b/src/invidious/mixes.cr @@ -26,7 +26,7 @@ def fetch_mix(rdid, video_id, cookies = nil, locale = nil) end video_id = "CvFH_6DNRCY" if rdid.starts_with? "OLAK5uy_" - response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) + response = YT_POOL.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) initial_data = extract_initial_data(response.body) if !initial_data["contents"]["twoColumnWatchNextResults"]["playlist"]? diff --git a/src/invidious/routes/api/manifest.cr b/src/invidious/routes/api/manifest.cr index c27caad7..bc1258f5 100644 --- a/src/invidious/routes/api/manifest.cr +++ b/src/invidious/routes/api/manifest.cr @@ -26,7 +26,7 @@ module Invidious::Routes::API::Manifest end if dashmpd = video.dash_manifest_url - response = YT_POOL.client &.get(URI.parse(dashmpd).request_target) + response = YT_POOL.get(URI.parse(dashmpd).request_target) if response.status_code != 200 haltf env, status_code: response.status_code @@ -167,7 +167,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_playlist/* def self.get_hls_playlist(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code @@ -223,7 +223,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_variant/* def self.get_hls_variant(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 6a3eb8ae..9d10b0e1 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -106,7 +106,7 @@ module Invidious::Routes::API::V1::Videos # Auto-generated captions often have cues that aren't aligned properly with the video, # as well as some other markup that makes it cumbersome, so we try to fix that here if caption.name.includes? "auto-generated" - caption_xml = YT_POOL.client &.get(url).body + caption_xml = YT_POOL.get(url).body settings_field = { "Kind" => "captions", @@ -147,7 +147,7 @@ module Invidious::Routes::API::V1::Videos query_params = uri.query_params query_params["fmt"] = "vtt" uri.query_params = query_params - webvtt = YT_POOL.client &.get(uri.request_target).body + webvtt = YT_POOL.get(uri.request_target).body if webvtt.starts_with?("[a-zA-Z0-9_-]{11})"/).try &.["video_id"] env.params.query.delete_all("channel") diff --git a/src/invidious/routes/errors.cr b/src/invidious/routes/errors.cr index 1e9ab44e..2f35f050 100644 --- a/src/invidious/routes/errors.cr +++ b/src/invidious/routes/errors.cr @@ -9,10 +9,10 @@ module Invidious::Routes::ErrorRoutes item = md["id"] # Check if item is branding URL e.g. https://youtube.com/gaming - response = YT_POOL.client &.get("/#{item}") + response = YT_POOL.get("/#{item}") if response.status_code == 301 - response = YT_POOL.client &.get(URI.parse(response.headers["Location"]).request_target) + response = YT_POOL.get(URI.parse(response.headers["Location"]).request_target) end if response.body.empty? @@ -40,7 +40,7 @@ module Invidious::Routes::ErrorRoutes end # Check if item is video ID - if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.client &.head("/watch?v=#{item}").status_code != 404 + if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.head("/watch?v=#{item}").status_code != 404 env.response.headers["Location"] = url haltf env, status_code: 302 end diff --git a/src/invidious/routes/feeds.cr b/src/invidious/routes/feeds.cr index 7f9a0edb..cd0fd2de 100644 --- a/src/invidious/routes/feeds.cr +++ b/src/invidious/routes/feeds.cr @@ -160,8 +160,9 @@ module Invidious::Routes::Feeds "default" => "http://www.w3.org/2005/Atom", } - response = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}") + response = YT_POOL.get("/feeds/videos.xml?channel_id=#{ucid}") return error_atom(404, NotFoundException.new("Channel does not exist.")) if response.status_code == 404 + rss = XML.parse(response.body) videos = rss.xpath_nodes("//default:feed/default:entry", namespaces).map do |entry| @@ -304,7 +305,7 @@ module Invidious::Routes::Feeds end end - response = YT_POOL.client &.get("/feeds/videos.xml?playlist_id=#{plid}") + response = YT_POOL.get("/feeds/videos.xml?playlist_id=#{plid}") return error_atom(404, NotFoundException.new("Playlist does not exist.")) if response.status_code == 404 document = XML.parse(response.body) diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 88720b38..e79a7432 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -12,7 +12,7 @@ module Invidious::Routes::Images end begin - GGPHT_POOL.client &.get(url, headers) do |resp| + GGPHT_POOL.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -42,7 +42,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool(authority).client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool(authority).get(url, headers) do |resp| env.response.headers["Connection"] = "close" return self.proxy_image(env, resp) end @@ -65,7 +65,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool("i9").client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool("i9").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -81,7 +81,7 @@ module Invidious::Routes::Images end begin - YT_POOL.client &.get(env.request.resource, headers) do |response| + YT_POOL.get(env.request.resource, headers) do |response| env.response.status_code = response.status_code response.headers.each do |key, value| if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) @@ -111,7 +111,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - if Invidious::ConnectionPool.get_ytimg_pool("i").client &.head(thumbnail_resource_path, headers).status_code == 200 + if Invidious::ConnectionPool.get_ytimg_pool("i").head(thumbnail_resource_path, headers).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -127,7 +127,7 @@ module Invidious::Routes::Images end begin - Invidious::ConnectionPool.get_ytimg_pool("i").client &.get(url, headers) do |resp| + Invidious::ConnectionPool.get_ytimg_pool("i").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex diff --git a/src/invidious/routes/playlists.cr b/src/invidious/routes/playlists.cr index f2213da4..cb24648f 100644 --- a/src/invidious/routes/playlists.cr +++ b/src/invidious/routes/playlists.cr @@ -464,7 +464,7 @@ module Invidious::Routes::Playlists # Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos def self.watch_videos(env) - response = YT_POOL.client &.get(env.request.resource) + response = YT_POOL.get(env.request.resource) if url = response.headers["Location"]? url = URI.parse(url).request_target return env.redirect url diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr index 25edb936..4c635ab2 100644 --- a/src/invidious/search/processors.cr +++ b/src/invidious/search/processors.cr @@ -16,11 +16,11 @@ module Invidious::Search # Search a youtube channel # TODO: clean code, and rely more on YoutubeAPI def channel(query : Query) : Array(SearchItem) - response = YT_POOL.client &.get("/channel/#{query.channel}") + response = YT_POOL.get("/channel/#{query.channel}") if response.status_code == 404 - response = YT_POOL.client &.get("/user/#{query.channel}") - response = YT_POOL.client &.get("/c/#{query.channel}") if response.status_code == 404 + response = YT_POOL.get("/user/#{query.channel}") + response = YT_POOL.get("/c/#{query.channel}") if response.status_code == 404 initial_data = extract_initial_data(response.body) ucid = initial_data.dig?("header", "c4TabbedHeaderRenderer", "channelId").try(&.as_s?) raise ChannelSearchException.new(query.channel) if !ucid diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index b40092a1..9a0c1cf8 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -639,15 +639,13 @@ module YoutubeAPI LOGGER.trace("YoutubeAPI: POST data: #{data}") # Send the POST request - body = YT_POOL.client() do |client| - client.post(url, headers: headers, body: data.to_json) do |response| - if response.status_code != 200 - raise InfoException.new("Error: non 200 status code. Youtube API returned \ - status code #{response.status_code}. See \ - https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") - end - self._decompress(response.body_io, response.headers["Content-Encoding"]?) + body = YT_POOL.post(url, headers: headers, body: data.to_json) do |response| + if response.status_code != 200 + raise InfoException.new("Error: non 200 status code. Youtube API returned \ + status code #{response.status_code}. See \ + https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") end + self._decompress(response.body_io, response.headers["Content-Encoding"]?) end # Convert result to Hash @@ -695,7 +693,7 @@ module YoutubeAPI # Send the POST request begin - response = COMPANION_POOL.client &.post(endpoint, headers: headers, body: data.to_json) + response = COMPANION_POOL.post(endpoint, headers: headers, body: data.to_json) body = response.body if (response.status_code != 200) raise Exception.new(