fix: fix channel videos and playlists on searches (#5736)

* fix: fix channel videos and playlists on searches

Channel videos are now encapsulated in a `lockupViewModel`.

There is 3 types of content that can be inside a `lockupViewModel`:

- LOCKUP_CONTENT_TYPE_VIDEO
- LOCKUP_CONTENT_TYPE_PLAYLIST
- LOCKUP_CONTENT_TYPE_PODCAST

This commit parses `LOCKUP_CONTENT_TYPE_VIDEO`, `LOCKUP_CONTENT_TYPE_PLAYLIST`, `LOCKUP_CONTENT_TYPE_PODCAST` types
to fix videos in channels, playlists in channels, podcast in channels, and other parts of Invidious were playlists and videos are displayed.

* remove unused variable `author_verified`

* fix parsing for podcasts

For some reason, Podcasts contains an empty JSON Object that we have to
skip, therefore we just iterate metadataRows until finding metadataParts
since metadataRows will not always contain a single Object.

* fix length_seconds for channel videos

* fix playlists parsing for playlists without metadataParts

On some channels like MrBeast, metadataParts is absent, missing the
author information, this is intended behaviour by Youtube since there is
no author information attached to them.

Example URL: https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA/playlists

* restore author_verified functionality

* more robust metadata_parts parsing

Videos that have two or more authors (in collaborations), have their
author information in JSON Objects inside metadataParts, alongside with
their view count and their published date, therefore, we need to iterate
the metadataParts array and do some filtering based on the content of
each JSON Object to find the view count and published date for some
videos.

Example:

```json
"metadataParts": [
    {
        "text": {
            "content": "Veritasium"
        },
        "icon": {
            "name": "CHECK_CIRCLE_FILLED",
            "height": 14,
            "width": 14,
            "accessibilityLabel": "Verified"
        }
    },
    {
        "text": {
            "content": "and Linus Tech Tips"
        },
        "icon": {
            "name": "CHECK_CIRCLE_FILLED",
            "height": 14,
            "width": 14,
            "accessibilityLabel": "Verified"
        }
    },
    {
        "text": {
            "content": "10M"
        },
        "accessibilityLabel": "10 million views"
    },
    {
        "text": {
            "content": "1y ago"
        }
    }
]
```

* improve playlist metadataRows and metadataParts parsing for channel playlists

* apply ameba suggestion

* Also parse lessons for playlists that are a course
This commit is contained in:
Fijxu
2026-05-26 17:46:32 -04:00
committed by GitHub
parent 99390d065d
commit 6659cbbbd8

View File

@@ -630,13 +630,14 @@ private module Parsers
end
end
# Parses an InnerTube lockupViewModel into a SearchPlaylist.
# Parses an InnerTube lockupViewModel into a SearchPlaylist or a SearchVideo
# Returns nil when the given object is not a lockupViewModel.
#
# This structure is present since November 2024 on the "podcasts" and
# "playlists" tabs of the channel page. It is usually encapsulated in either
# a richItemRenderer or a richGridRenderer.
#
# Since 2026-05-21, now channel videos are encapsulated in a lockupViewModel.
module LockupViewModelParser
extend self
include BaseParser
@@ -648,25 +649,90 @@ private module Parsers
end
private def parse_internal(item_contents, author_fallback)
playlist_id = item_contents["contentId"].as_s
content_type = item_contents["contentType"].as_s
if content_type == "LOCKUP_CONTENT_TYPE_VIDEO"
thumbnail_view_model = item_contents.dig(
"contentImage", "thumbnailViewModel"
)
thumbnail = thumbnail_view_model.dig("image", "sources", 0, "url").as_s
video_id = item_contents["contentId"].as_s
metadata = item_contents.dig("metadata", "lockupMetadataViewModel")
title = metadata.dig("title", "content").as_s
# Contains the views of the video and the published time of the video.
metadata_parts = metadata.dig("metadata", "contentMetadataViewModel", "metadataRows", 0, "metadataParts").try &.as_a
view_count_text = metadata_parts.try &.find { |item| item["icon"]?.nil? && item.dig?("text", "content").try &.as_s.includes?("views") }
.try &.dig("text", "content").as_s
published = metadata_parts.try &.find { |item| item["icon"]?.nil? && item.dig?("text", "content").try &.as_s.includes?("ago") }
.try { |item| decode_date(item.dig("text", "content").as_s) } || Time.local
view_count = short_text_to_number(view_count_text || "0")
length = thumbnail_view_model.dig("overlays", 0, "thumbnailBottomOverlayViewModel", "badges", 0, "thumbnailBadgeViewModel", "text").try &.as_s
length_seconds = decode_length_seconds(length) if length
return SearchVideo.new({
title: title,
id: video_id,
author: author_fallback.name,
ucid: author_fallback.id,
published: published,
views: view_count,
description_html: "",
length_seconds: length_seconds || 0,
premiere_timestamp: Time.unix(0),
author_verified: false,
author_thumbnail: nil,
badges: VideoBadges::None,
})
# If it's a playlist, it's content_type would be "LOCKUP_CONTENT_TYPE_PLAYLIST"
# If it's a podcast, it's content_type would be "LOCKUP_CONTENT_TYPE_PODCAST"
# Playlist and Podcasts structures are quite similar, so we can use the same logic
# we use to parse Playlists data, for Podcasts.
else
thumbnail_view_model = item_contents.dig(
"contentImage", "collectionThumbnailViewModel",
"primaryThumbnail", "thumbnailViewModel"
)
thumbnail = thumbnail_view_model.dig("image", "sources", 0, "url").as_s
playlist_id = item_contents["contentId"].as_s
# This complicated sequences tries to extract the following data structure:
# "overlays": [{
#
# "overlays": [
# {
# "thumbnailOverlayBadgeViewModel": {
# "thumbnailBadges": [{
# "thumbnailBadges": [
# {
# "thumbnailBadgeViewModel": {
# "text": "430 episodes",
# "badgeStyle": "THUMBNAIL_OVERLAY_BADGE_STYLE_DEFAULT"
# "icon": {
# "sources": [
# {
# "clientResource": {
# "imageName": "BROADCAST"
# }
# }]
# }
# }]
# ]
# },
# "text": "5 episodes",
# "badgeStyle": "THUMBNAIL_OVERLAY_BADGE_STYLE_DEFAULT",
# "backgroundColor": {
# "lightTheme": 991526,
# "darkTheme": 991526
# }
# }
# }
# ],
# "position": "THUMBNAIL_OVERLAY_BADGE_POSITION_BOTTOM_END"
# }
# },
# ... <-- There is another item bellow the Object we use to extract episodes/videos
# ]
#
# NOTE: this simplistic `.to_i` conversion might not work on larger
# playlists and hasn't been tested.
@@ -674,13 +740,74 @@ private module Parsers
.compact_map(&.dig?("thumbnailOverlayBadgeViewModel", "thumbnailBadges").try &.as_a)
.flatten
.find(nil, &.dig?("thumbnailBadgeViewModel", "text").try { |node|
{"episodes", "videos"}.any? { |str| node.as_s.ends_with?(str) }
{"episodes", "videos", "lessons"}.any? { |str| node.as_s.ends_with?(str) }
})
.try &.dig("thumbnailBadgeViewModel", "text").as_s.to_i(strict: false)
metadata = item_contents.dig("metadata", "lockupMetadataViewModel")
title = metadata.dig("title", "content").as_s
# metadataParts is not always in the first place of the metadataRows array, therefore,
# we search for it iterating the array. We have only seen metadataRows with at least
# 2 items inside it.
#
# It looks like this:
# "metadataRows": [
# {}, <-- empty Object
# {
# "metadataParts": [ ... ] <-- metadataParts with the information we are searching for.
# }
# ]
#
# Playlist on channels also contain metadataRows, but not with the type of data we are searching
# for which are the channel name and channel ID, instead they have two fields depending of the playlist
# updated date:
#
# It looks like this:
# "metadataRows": [
# {
# "metadataParts": [
# {
# "text": {
# "content": "Updated 4 days ago"
# }
# } <-- This object is missing if the playlist has not been updated in around 7
# days
# ]
# },
# {
# "metadataParts": [
# {
# "text": {
# "content": "View full playlist",
# "commandRuns": [ ... ],
# "styleRuns": [ ... ].
# }
# } <-- This object is always present, so we use this to determine if the
# metadataParts can be used or not.
# ]
# }
# ]
#
metadata_rows = metadata.dig?("metadata", "contentMetadataViewModel", "metadataRows").try &.as_a
metadata_parts = metadata_rows.try &.find { |row|
parts = row["metadataParts"]?.try &.as_a
parts && !parts.any? { |item| item.dig?("text", "content").try &.as_s == "View full playlist" }
}.try &.["metadataParts"].as_a
if author_info = metadata_parts.try &.find(&.dig?("text", "commandRuns"))
.try &.["text"]
author = author_info["content"].as_s
author_id = author_info.dig?("commandRuns", 0, "onTap", "innertubeCommand", "browseEndpoint", "browseId")
.try &.as_s || author_fallback.id
author_verified = (author_info.dig?("attachmentRuns", 0, "element", "type", "imageType", "image", "sources", 0, "clientResource", "imageName")
.try &.as_s) == "CHECK_CIRCLE_FILLED" || false
else
author = author_fallback.name
author_id = author_fallback.id
author_verified = false
end
# TODO: Retrieve "updated" info from metadata parts
# rows = metadata.dig("metadata", "contentMetadataViewModel", "metadataRows").as_a
# parts_text = rows.map(&.dig?("metadataParts", "text", "content").try &.as_s)
@@ -693,14 +820,15 @@ private module Parsers
return SearchPlaylist.new({
title: title,
id: playlist_id,
author: author_fallback.name,
ucid: author_fallback.id,
author: author,
ucid: author_id,
video_count: video_count || -1,
videos: [] of SearchPlaylistVideo,
thumbnail: thumbnail,
author_verified: false,
author_verified: author_verified,
})
end
end
def self.parser_name
return {{@type.name}}