mirror of
				https://github.com/iv-org/invidious.git
				synced 2025-11-03 22:21:55 +00:00 
			
		
		
		
	Add a youtube URL sanitizer
This commit is contained in:
		
							
								
								
									
										121
									
								
								src/invidious/yt_backend/url_sanitizer.cr
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								src/invidious/yt_backend/url_sanitizer.cr
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,121 @@
 | 
			
		||||
require "uri"
 | 
			
		||||
 | 
			
		||||
module UrlSanitizer
 | 
			
		||||
  extend self
 | 
			
		||||
 | 
			
		||||
  ALLOWED_QUERY_PARAMS = {
 | 
			
		||||
    channel:  ["u", "user", "lb"],
 | 
			
		||||
    playlist: ["list"],
 | 
			
		||||
    search:   ["q", "search_query", "sp"],
 | 
			
		||||
    watch:    [
 | 
			
		||||
      "v",                                  # Video ID
 | 
			
		||||
      "list", "index",                      # Playlist-related
 | 
			
		||||
      "playlist",                           # Unnamed playlist (id,id,id,...) (embed-only?)
 | 
			
		||||
      "t", "time_continue", "start", "end", # Timestamp
 | 
			
		||||
      "lc",                                 # Highlighted comment (watch page only)
 | 
			
		||||
    ],
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # Returns wether the given string is an ASCII word. This is the same as
 | 
			
		||||
  # running the following regex in US-ASCII locale: /^[\w-]+$/
 | 
			
		||||
  private def ascii_word?(str : String) : Bool
 | 
			
		||||
    if str.bytesize == str.size
 | 
			
		||||
      str.each_byte do |byte|
 | 
			
		||||
        next if 'a'.ord <= byte <= 'z'.ord
 | 
			
		||||
        next if 'A'.ord <= byte <= 'Z'.ord
 | 
			
		||||
        next if '0'.ord <= byte <= '9'.ord
 | 
			
		||||
        next if byte == '-'.ord || byte == '_'.ord
 | 
			
		||||
 | 
			
		||||
        return false
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      return true
 | 
			
		||||
    else
 | 
			
		||||
      return false
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Return which kind of parameters are allowed based on the
 | 
			
		||||
  # first path component (breadcrumb 0).
 | 
			
		||||
  private def determine_allowed(path_root : String)
 | 
			
		||||
    case path_root
 | 
			
		||||
    when "watch", "w", "v", "embed", "e", "shorts", "clip"
 | 
			
		||||
      return :watch
 | 
			
		||||
    when .starts_with?("@"), "c", "channel", "user", "profile", "attribution_link"
 | 
			
		||||
      return :channel
 | 
			
		||||
    when "playlist", "mix"
 | 
			
		||||
      return :playlist
 | 
			
		||||
    when "results", "search"
 | 
			
		||||
      return :search
 | 
			
		||||
    else # hashtag, post, trending, brand URLs, etc..
 | 
			
		||||
      return nil
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Create a new URI::Param containing only the allowed parameters
 | 
			
		||||
  private def copy_params(unsafe_params : URI::Params, allowed_type) : URI::Params
 | 
			
		||||
    new_params = URI::Params.new
 | 
			
		||||
 | 
			
		||||
    ALLOWED_QUERY_PARAMS[allowed_type].each do |name|
 | 
			
		||||
      if unsafe_params[name]?
 | 
			
		||||
        # Only copy the last parameter, in case there is more than one
 | 
			
		||||
        new_params[name] = unsafe_params.fetch_all(name)[-1]
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    return new_params
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Transform any user-supplied youtube URL into something we can trust
 | 
			
		||||
  # and use across the code.
 | 
			
		||||
  def process(str : String) : URI
 | 
			
		||||
    # Because URI follows RFC3986 specifications, URL without a scheme
 | 
			
		||||
    # will be parsed as a relative path. So we have to add a scheme ourselves.
 | 
			
		||||
    str = "https://#{str}" if !str.starts_with?(/https?:\/\//)
 | 
			
		||||
 | 
			
		||||
    unsafe_uri = URI.parse(str)
 | 
			
		||||
    new_uri = URI.new(path: "/")
 | 
			
		||||
 | 
			
		||||
    # Redirect to homepage for bogus URLs
 | 
			
		||||
    return new_uri if (unsafe_uri.host.nil? || unsafe_uri.path.nil?)
 | 
			
		||||
 | 
			
		||||
    breadcrumbs = unsafe_uri.path
 | 
			
		||||
      .split('/', remove_empty: true)
 | 
			
		||||
      .compact_map do |bc|
 | 
			
		||||
        # Exclude attempts at path trasversal
 | 
			
		||||
        next if bc == "." || bc == ".."
 | 
			
		||||
 | 
			
		||||
        # Non-alnum characters are unlikely in a genuine URL
 | 
			
		||||
        next if !ascii_word?(bc)
 | 
			
		||||
 | 
			
		||||
        bc
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
    # If nothing remains, it's either a legit URL to the homepage
 | 
			
		||||
    # (who does that!?) or because we filtered some junk earlier.
 | 
			
		||||
    return new_uri if breadcrumbs.empty?
 | 
			
		||||
 | 
			
		||||
    # Replace the original query parameters with the sanitized ones
 | 
			
		||||
    case unsafe_uri.host.not_nil!
 | 
			
		||||
    when .ends_with?("youtube.com")
 | 
			
		||||
      # Use our sanitized path (not forgetting the leading '/')
 | 
			
		||||
      new_uri.path = "/#{breadcrumbs.join('/')}"
 | 
			
		||||
 | 
			
		||||
      # Then determine which params are allowed, and copy them over
 | 
			
		||||
      if allowed = determine_allowed(breadcrumbs[0])
 | 
			
		||||
        new_uri.query_params = copy_params(unsafe_uri.query_params, allowed)
 | 
			
		||||
      end
 | 
			
		||||
    when "youtu.be"
 | 
			
		||||
      # Always redirect to the watch page
 | 
			
		||||
      new_uri.path = "/watch"
 | 
			
		||||
 | 
			
		||||
      new_params = copy_params(unsafe_uri.query_params, :watch)
 | 
			
		||||
      new_params["id"] = breadcrumbs[0]
 | 
			
		||||
 | 
			
		||||
      new_uri.query_params = new_params
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    new_uri.host = nil # Safety measure
 | 
			
		||||
    return new_uri
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
		Reference in New Issue
	
	Block a user