From d93d19520e24a2fe498d25eb6f98b441764e27e2 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Thu, 7 Sep 2023 00:54:21 -0400 Subject: [PATCH] Use `SearchHashtag` for parsing the header of hashtag pages --- src/invidious/hashtag.cr | 59 +++----------------------- src/invidious/yt_backend/extractors.cr | 14 ++++-- 2 files changed, 16 insertions(+), 57 deletions(-) diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index 15faba7c..a0f7f718 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -5,15 +5,15 @@ module Invidious::Hashtag include DB::Serializable property videos : Array(SearchItem) | Array(Video) - property header : HashtagHeader? + property header : SearchHashtag? property has_next_continuation : Bool def to_json(locale : String?, json : JSON::Builder) json.object do - json.field "type", "hashtag" + json.field "type", "hashtagPage" if self.header != nil json.field "header" do - self.header.to_json(json) + self.header.try &.as(SearchHashtag).to_json(locale, json) end end json.field "results" do @@ -26,39 +26,6 @@ module Invidious::Hashtag json.field "hasNextPage", self.has_next_continuation end end - - # TODO: remove the locale and follow the crystal convention - def to_json(locale : String?, _json : Nil) - JSON.build do |json| - to_json(locale, json) - end - end - - def to_json(json : JSON::Builder) - to_json(nil, json) - end - end - - struct HashtagHeader - include DB::Serializable - - property tag : String - property channel_count : Int64 - property video_count : Int64 - - def to_json(json : JSON::Builder) - json.object do - json.field "hashtag", self.tag - json.field "channelCount", self.channel_count - json.field "videoCount", self.video_count - end - end - - def to_json(_json : Nil) - JSON.build do |json| - to_json(json) - end - end end def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage @@ -72,8 +39,8 @@ module Invidious::Hashtag else # item browses the first page (including metadata) response = YoutubeAPI.browse("FEhashtag", params: item, client_config: client_config) - if item_contents = response.dig?("header", "hashtagHeaderRenderer") - header = parse_hashtag_renderer(item_contents) + if item_contents = response.dig?("header") + header = parse_item(item_contents).try &.as(SearchHashtag) end end @@ -119,20 +86,4 @@ module Invidious::Hashtag .try { |i| Base64.urlsafe_encode(i) } .try { |i| URI.encode_www_form(i) } end - - def parse_hashtag_renderer(item_contents) - info = extract_text(item_contents.dig?("hashtagInfoText")) || "" - - regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) - - hashtag = extract_text(item_contents.dig?("hashtag")) || "" - videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") - channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") - - return HashtagHeader.new({ - tag: hashtag, - channel_count: channels, - video_count: videos, - }) - end end diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 2631b62a..c3823894 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -226,9 +226,11 @@ private module Parsers # # A `hashtagTileRenderer` is a kind of search result. # It can be found when searching for any hashtag (e.g "#hi" or "#shorts") + # + # A `hashtagHeaderRenderer` is displayed on the first page of the hashtag page. module HashtagRendererParser def self.process(item : JSON::Any, author_fallback : AuthorFallback) - if item_contents = item["hashtagTileRenderer"]? + if item_contents = (item["hashtagTileRenderer"]? || item["hashtagHeaderRenderer"]?) return self.parse(item_contents) end end @@ -240,8 +242,14 @@ private module Parsers url = item_contents.dig?("onTapCommand", "commandMetadata", "webCommandMetadata", "url").try &.as_s url ||= URI.encode_path("/hashtag/#{title.lchop('#')}") - video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" - channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" + if info = extract_text(item_contents.dig?("hashtagInfoText")) + regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) + videos = regex_match.try &.["videos"]?.try &.to_s + channels = regex_match.try &.["channels"]?.try &.to_s + else + video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" + channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" + end # Fallback for video/channel counts if channel_count_txt.nil? || video_count_txt.nil?