mirror of
				https://github.com/iv-org/invidious.git
				synced 2025-10-31 20:51:56 +00:00 
			
		
		
		
	Performance: Improve speed of automatic instance redirection (#4193)
The automatic instance redirection implemented in #1940 fetches a new list of instances each time someone queries the /redirect endpoint. This is extremely inefficient... This PR optimizes all that into a background job that only fetches a single list every 30 minutes. This should performance quite a bit. No related issue was opened.
This commit is contained in:
		| @@ -189,6 +189,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL | ||||
|  | ||||
| Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new | ||||
|  | ||||
| Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new | ||||
|  | ||||
| Invidious::Jobs.start_all | ||||
|  | ||||
| def popular_videos | ||||
|   | ||||
| @@ -323,68 +323,6 @@ def parse_range(range) | ||||
|   return 0_i64, nil | ||||
| end | ||||
|  | ||||
| def fetch_random_instance | ||||
|   begin | ||||
|     instance_api_client = make_client(URI.parse("https://api.invidious.io")) | ||||
|  | ||||
|     # Timeouts | ||||
|     instance_api_client.connect_timeout = 10.seconds | ||||
|     instance_api_client.dns_timeout = 10.seconds | ||||
|  | ||||
|     instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a | ||||
|     instance_api_client.close | ||||
|   rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException | ||||
|     instance_list = [] of JSON::Any | ||||
|   end | ||||
|  | ||||
|   filtered_instance_list = [] of String | ||||
|  | ||||
|   instance_list.each do |data| | ||||
|     # TODO Check if current URL is onion instance and use .onion types if so. | ||||
|     if data[1]["type"] == "https" | ||||
|       # Instances can have statistics disabled, which is an requirement of version validation. | ||||
|       # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails. | ||||
|       begin | ||||
|         data[1]["stats"].as_nil | ||||
|         next | ||||
|       rescue TypeCastError | ||||
|       end | ||||
|  | ||||
|       # stats endpoint could also lack the software dict. | ||||
|       next if data[1]["stats"]["software"]?.nil? | ||||
|  | ||||
|       # Makes sure the instance isn't too outdated. | ||||
|       if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"] | ||||
|         remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) | ||||
|         next if !remote_commit_date | ||||
|  | ||||
|         remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) | ||||
|         local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) | ||||
|  | ||||
|         next if (remote_commit_date - local_commit_date).abs.days > 30 | ||||
|  | ||||
|         begin | ||||
|           data[1]["monitor"].as_nil | ||||
|           health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"] | ||||
|           filtered_instance_list << data[0].as_s if health.to_s.to_f > 90 | ||||
|         rescue TypeCastError | ||||
|           # We can't check the health if the monitoring is broken. Thus we'll just add it to the list | ||||
|           # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that | ||||
|           # it's an error that often occurs with all the instances at the same time, we have to just skip the check. | ||||
|           filtered_instance_list << data[0].as_s | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io | ||||
|   if filtered_instance_list.size == 0 | ||||
|     return "redirect.invidious.io" | ||||
|   end | ||||
|  | ||||
|   return filtered_instance_list.sample(1)[0] | ||||
| end | ||||
|  | ||||
| def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String | ||||
|   str = uri.to_s.sub(/^https?:\/\//, "") | ||||
|   if str.size > max_length | ||||
|   | ||||
							
								
								
									
										97
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								src/invidious/jobs/instance_refresh_job.cr
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob | ||||
|   # We update the internals of a constant as so it can be accessed from anywhere | ||||
|   # within the codebase | ||||
|   # | ||||
|   # "INSTANCES" => Array(Tuple(String, String))  # region, instance | ||||
|  | ||||
|   INSTANCES = {"INSTANCES" => [] of Tuple(String, String)} | ||||
|  | ||||
|   def initialize | ||||
|   end | ||||
|  | ||||
|   def begin | ||||
|     loop do | ||||
|       refresh_instances | ||||
|       LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes") | ||||
|       sleep 30.minute | ||||
|       Fiber.yield | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   # Refreshes the list of instances used for redirects. | ||||
|   # | ||||
|   # Does the following three checks for each instance | ||||
|   # -  Is it a clear-net instance? | ||||
|   # -  Is it an instance with a good uptime? | ||||
|   # -  Is it an updated instance? | ||||
|   private def refresh_instances | ||||
|     raw_instance_list = self.fetch_instances | ||||
|     filtered_instance_list = [] of Tuple(String, String) | ||||
|  | ||||
|     raw_instance_list.each do |instance_data| | ||||
|       # TODO allow Tor hidden service instances when the current instance | ||||
|       # is also a hidden service. Same for i2p and any other non-clearnet instances. | ||||
|       begin | ||||
|         domain = instance_data[0] | ||||
|         info = instance_data[1] | ||||
|         stats = info["stats"] | ||||
|  | ||||
|         next unless info["type"] == "https" | ||||
|         next if bad_uptime?(info["monitor"]) | ||||
|         next if outdated?(stats["software"]["version"]) | ||||
|  | ||||
|         filtered_instance_list << {info["region"].as_s, domain.as_s} | ||||
|       rescue ex | ||||
|         if domain | ||||
|           LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ") | ||||
|         else | ||||
|           LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\"  ") | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     if !filtered_instance_list.empty? | ||||
|       INSTANCES["INSTANCES"] = filtered_instance_list | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   # Fetches information regarding instances from api.invidious.io or an otherwise configured URL | ||||
|   private def fetch_instances : Array(JSON::Any) | ||||
|     begin | ||||
|       # We directly call the stdlib HTTP::Client here as it allows us to negate the effects | ||||
|       # of the force_resolve config option. This is needed as api.invidious.io does not support ipv6 | ||||
|       # and as such the following request raises if we were to use force_resolve with the ipv6 value. | ||||
|       instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io")) | ||||
|  | ||||
|       # Timeouts | ||||
|       instance_api_client.connect_timeout = 10.seconds | ||||
|       instance_api_client.dns_timeout = 10.seconds | ||||
|  | ||||
|       raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a | ||||
|       instance_api_client.close | ||||
|     rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException | ||||
|       raw_instance_list = [] of JSON::Any | ||||
|     end | ||||
|  | ||||
|     return raw_instance_list | ||||
|   end | ||||
|  | ||||
|   # Checks if the given target instance is outdated | ||||
|   private def outdated?(target_instance_version) : Bool | ||||
|     remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) | ||||
|     return false if !remote_commit_date | ||||
|  | ||||
|     remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) | ||||
|     local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) | ||||
|  | ||||
|     return (remote_commit_date - local_commit_date).abs.days > 30 | ||||
|   end | ||||
|  | ||||
|   # Checks if the uptime of the target instance is greater than 90% over a 30 day period | ||||
|   private def bad_uptime?(target_instance_health_monitor) : Bool | ||||
|     return true if !target_instance_health_monitor["down"].as_bool == false | ||||
|     return true if target_instance_health_monitor["uptime"].as_f < 90 | ||||
|  | ||||
|     return false | ||||
|   end | ||||
| end | ||||
| @@ -40,7 +40,16 @@ module Invidious::Routes::Misc | ||||
|  | ||||
|   def self.cross_instance_redirect(env) | ||||
|     referer = get_referer(env) | ||||
|     instance_url = fetch_random_instance | ||||
|  | ||||
|     instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"] | ||||
|     if instance_list.empty? | ||||
|       instance_url = "redirect.invidious.io" | ||||
|     else | ||||
|       # Sample returns an array | ||||
|       # Instances are packaged as {region, domain} in the instance list | ||||
|       instance_url = instance_list.sample(1)[0][1] | ||||
|     end | ||||
|  | ||||
|     env.redirect "https://#{instance_url}#{referer}" | ||||
|   end | ||||
| end | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Samantaz Fox
					Samantaz Fox