Cleanup videos (#3238)

This commit is contained in:
Samantaz Fox
2022-11-17 01:03:23 +01:00
25 changed files with 1544 additions and 1099 deletions

View File

@@ -0,0 +1,168 @@
require "../../parsers_helper.cr"
Spectator.describe "parse_video_info" do
it "parses a regular video" do
# Enable mock
_player = load_mock("video/regular_mrbeast.player")
_next = load_mock("video/regular_mrbeast.next")
raw_data = _player.merge!(_next)
info = parse_video_info("2isYuQZMbdU", raw_data)
# Some basic verifications
expect(typeof(info)).to eq(Hash(String, JSON::Any))
expect(info["videoType"].as_s).to eq("Video")
# Basic video infos
expect(info["title"].as_s).to eq("I Gave My 100,000,000th Subscriber An Island")
expect(info["views"].as_i).to eq(32_846_329)
expect(info["likes"].as_i).to eq(2_611_650)
# For some reason the video length from VideoDetails and the
# one from microformat differs by 1s...
expect(info["lengthSeconds"].as_i).to be_between(930_i64, 931_i64)
expect(info["published"].as_s).to eq("2022-08-04T00:00:00Z")
# Extra video infos
expect(info["allowedRegions"].as_a).to_not be_empty
expect(info["allowedRegions"].as_a.size).to eq(249)
expect(info["allowedRegions"].as_a).to contain(
"AD", "BA", "BB", "BW", "BY", "EG", "GG", "HN", "NP", "NR", "TR",
"TT", "TV", "TW", "TZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU",
"WF", "WS", "YE", "YT", "ZA", "ZM", "ZW"
)
expect(info["keywords"].as_a).to be_empty
expect(info["allowRatings"].as_bool).to be_true
expect(info["isFamilyFriendly"].as_bool).to be_true
expect(info["isListed"].as_bool).to be_true
expect(info["isUpcoming"].as_bool).to be_false
# Related videos
expect(info["relatedVideos"].as_a.size).to eq(19)
expect(info["relatedVideos"][0]["id"]).to eq("tVWWp1PqDus")
expect(info["relatedVideos"][0]["title"]).to eq("100 Girls Vs 100 Boys For $500,000")
expect(info["relatedVideos"][0]["author"]).to eq("MrBeast")
expect(info["relatedVideos"][0]["ucid"]).to eq("UCX6OQ3DkcsbYNE6H8uQQuVA")
expect(info["relatedVideos"][0]["view_count"]).to eq("49702799")
expect(info["relatedVideos"][0]["short_view_count"]).to eq("49M")
expect(info["relatedVideos"][0]["author_verified"]).to eq("true")
# Description
description = "🚀Launch a store on Shopify, Ill buy from 100 random stores that do ▸ "
expect(info["description"].as_s).to start_with(description)
expect(info["shortDescription"].as_s).to start_with(description)
expect(info["descriptionHtml"].as_s).to start_with(description)
# Video metadata
expect(info["genre"].as_s).to eq("Entertainment")
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Author infos
expect(info["author"].as_s).to eq("MrBeast")
expect(info["ucid"].as_s).to eq("UCX6OQ3DkcsbYNE6H8uQQuVA")
expect(info["authorThumbnail"].as_s).to eq(
"https://yt3.ggpht.com/ytc/AMLnZu84dsnlYtuUFBMC8imQs0IUcTKA9khWAmUOgQZltw=s48-c-k-c0x00ffffff-no-rj"
)
expect(info["authorVerified"].as_bool).to be_true
expect(info["subCountText"].as_s).to eq("101M")
end
it "parses a regular video with no descrition/comments" do
# Enable mock
_player = load_mock("video/regular_no-description.player")
_next = load_mock("video/regular_no-description.next")
raw_data = _player.merge!(_next)
info = parse_video_info("iuevw6218F0", raw_data)
# Some basic verifications
expect(typeof(info)).to eq(Hash(String, JSON::Any))
expect(info["videoType"].as_s).to eq("Video")
# Basic video infos
expect(info["title"].as_s).to eq("Chris Rea - Auberge")
expect(info["views"].as_i).to eq(10_356_197)
expect(info["likes"].as_i).to eq(0)
expect(info["lengthSeconds"].as_i).to eq(283_i64)
expect(info["published"].as_s).to eq("2012-05-21T00:00:00Z")
# Extra video infos
expect(info["allowedRegions"].as_a).to_not be_empty
expect(info["allowedRegions"].as_a.size).to eq(249)
expect(info["allowedRegions"].as_a).to contain(
"AD", "BA", "BB", "BW", "BY", "EG", "GG", "HN", "NP", "NR", "TR",
"TT", "TV", "TW", "TZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU",
"WF", "WS", "YE", "YT", "ZA", "ZM", "ZW"
)
expect(info["keywords"].as_a).to_not be_empty
expect(info["keywords"].as_a.size).to eq(4)
expect(info["keywords"].as_a).to contain_exactly(
"Chris",
"Rea",
"Auberge",
"1991"
).in_any_order
expect(info["allowRatings"].as_bool).to be_true
expect(info["isFamilyFriendly"].as_bool).to be_true
expect(info["isListed"].as_bool).to be_true
expect(info["isUpcoming"].as_bool).to be_false
# Related videos
expect(info["relatedVideos"].as_a.size).to eq(19)
expect(info["relatedVideos"][0]["id"]).to eq("0bkrY_V0yZg")
expect(info["relatedVideos"][0]["title"]).to eq(
"Chris Rea Best Songs Collection - Chris Rea Greatest Hits Full Album 2022"
)
expect(info["relatedVideos"][0]["author"]).to eq("Rock Ultimate")
expect(info["relatedVideos"][0]["ucid"]).to eq("UCekSc2A19di9koUIpj8gxlQ")
expect(info["relatedVideos"][0]["view_count"]).to eq("1992412")
expect(info["relatedVideos"][0]["short_view_count"]).to eq("1.9M")
expect(info["relatedVideos"][0]["author_verified"]).to eq("false")
# Description
expect(info["description"].as_s).to eq(" ")
expect(info["shortDescription"].as_s).to be_empty
expect(info["descriptionHtml"].as_s).to eq("<p></p>")
# Video metadata
expect(info["genre"].as_s).to eq("Music")
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Author infos
expect(info["author"].as_s).to eq("ChrisReaOfficial")
expect(info["ucid"].as_s).to eq("UC_5q6nWPbD30-y6oiWF_oNA")
expect(info["authorThumbnail"].as_s).to be_empty
expect(info["authorVerified"].as_bool).to be_false
expect(info["subCountText"].as_s).to eq("-")
end
end

View File

@@ -1,6 +1,6 @@
require "../../parsers_helper.cr"
Spectator.describe Invidious::Hashtag do
Spectator.describe "parse_video_info" do
it "parses scheduled livestreams data (test 1)" do
# Enable mock
_player = load_mock("video/scheduled_live_nintendo.player")
@@ -12,26 +12,50 @@ Spectator.describe Invidious::Hashtag do
# Some basic verifications
expect(typeof(info)).to eq(Hash(String, JSON::Any))
expect(info["shortDescription"].as_s).to eq(
"Tune in on 6/22 at 7 a.m. PT for a livestreamed Xenoblade Chronicles 3 Direct presentation featuring roughly 20 minutes of information about the upcoming RPG adventure for Nintendo Switch."
)
expect(info["descriptionHtml"].as_s).to eq(
"Tune in on 6/22 at 7 a.m. PT for a livestreamed Xenoblade Chronicles 3 Direct presentation featuring roughly 20 minutes of information about the upcoming RPG adventure for Nintendo Switch."
)
expect(info["videoType"].as_s).to eq("Scheduled")
# Basic video infos
expect(info["title"].as_s).to eq("Xenoblade Chronicles 3 Nintendo Direct")
expect(info["views"].as_i).to eq(160)
expect(info["likes"].as_i).to eq(2_283)
expect(info["lengthSeconds"].as_i).to eq(0_i64)
expect(info["published"].as_s).to eq("2022-06-22T14:00:00Z") # Unix 1655906400
expect(info["genre"].as_s).to eq("Gaming")
expect(info["genreUrl"].raw).to be_nil
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Extra video infos
expect(info["authorThumbnail"].as_s).to eq(
"https://yt3.ggpht.com/ytc/AKedOLTt4vtjREUUNdHlyu9c4gtJjG90M9jQheRlLKy44A=s48-c-k-c0x00ffffff-no-rj"
expect(info["allowedRegions"].as_a).to_not be_empty
expect(info["allowedRegions"].as_a.size).to eq(249)
expect(info["allowedRegions"].as_a).to contain(
"AD", "BA", "BB", "BW", "BY", "EG", "GG", "HN", "NP", "NR", "TR",
"TT", "TV", "TW", "TZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU",
"WF", "WS", "YE", "YT", "ZA", "ZM", "ZW"
)
expect(info["authorVerified"].as_bool).to be_true
expect(info["subCountText"].as_s).to eq("8.5M")
expect(info["keywords"].as_a).to_not be_empty
expect(info["keywords"].as_a.size).to eq(11)
expect(info["keywords"].as_a).to contain_exactly(
"nintendo",
"game",
"gameplay",
"fun",
"video game",
"action",
"adventure",
"rpg",
"play",
"switch",
"nintendo switch"
).in_any_order
expect(info["allowRatings"].as_bool).to be_true
expect(info["isFamilyFriendly"].as_bool).to be_true
expect(info["isListed"].as_bool).to be_true
expect(info["isUpcoming"].as_bool).to be_true
# Related videos
expect(info["relatedVideos"].as_a.size).to eq(20)
@@ -50,6 +74,32 @@ Spectator.describe Invidious::Hashtag do
expect(info["relatedVideos"][16]["view_count"].as_s).to eq("53510")
expect(info["relatedVideos"][16]["short_view_count"].as_s).to eq("53K")
expect(info["relatedVideos"][16]["author_verified"].as_s).to eq("true")
# Description
description = "Tune in on 6/22 at 7 a.m. PT for a livestreamed Xenoblade Chronicles 3 Direct presentation featuring roughly 20 minutes of information about the upcoming RPG adventure for Nintendo Switch."
expect(info["description"].as_s).to eq(description)
expect(info["shortDescription"].as_s).to eq(description)
expect(info["descriptionHtml"].as_s).to eq(description)
# Video metadata
expect(info["genre"].as_s).to eq("Gaming")
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Author infos
expect(info["author"].as_s).to eq("Nintendo")
expect(info["ucid"].as_s).to eq("UCGIY_O-8vW4rfX98KlMkvRg")
expect(info["authorThumbnail"].as_s).to eq(
"https://yt3.ggpht.com/ytc/AKedOLTt4vtjREUUNdHlyu9c4gtJjG90M9jQheRlLKy44A=s48-c-k-c0x00ffffff-no-rj"
)
expect(info["authorVerified"].as_bool).to be_true
expect(info["subCountText"].as_s).to eq("8.5M")
end
it "parses scheduled livestreams data (test 2)" do
@@ -63,34 +113,63 @@ Spectator.describe Invidious::Hashtag do
# Some basic verifications
expect(typeof(info)).to eq(Hash(String, JSON::Any))
expect(info["shortDescription"].as_s).to start_with(
<<-TXT
PBD Podcast Episode 171. In this episode, Patrick Bet-David is joined by Dr. Patrick Moore and Adam Sosnick.
expect(info["videoType"].as_s).to eq("Scheduled")
Join the channel to get exclusive access to perks: https://bit.ly/3Q9rSQL
TXT
)
expect(info["descriptionHtml"].as_s).to start_with(
<<-TXT
PBD Podcast Episode 171. In this episode, Patrick Bet-David is joined by Dr. Patrick Moore and Adam Sosnick.
Join the channel to get exclusive access to perks: <a href="https://bit.ly/3Q9rSQL">bit.ly/3Q9rSQL</a>
TXT
)
# Basic video infos
expect(info["title"].as_s).to eq("The Truth About Greenpeace w/ Dr. Patrick Moore | PBD Podcast | Ep. 171")
expect(info["views"].as_i).to eq(24)
expect(info["likes"].as_i).to eq(22)
expect(info["lengthSeconds"].as_i).to eq(0_i64)
expect(info["published"].as_s).to eq("2022-07-14T13:00:00Z") # Unix 1657803600
expect(info["genre"].as_s).to eq("Entertainment")
expect(info["genreUrl"].raw).to be_nil
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Extra video infos
expect(info["authorThumbnail"].as_s).to eq(
"https://yt3.ggpht.com/61ArDiQshJrvSXcGLhpFfIO3hlMabe2fksitcf6oGob0Mdr5gztdkXxRljICUodL4iuTSrtxW4A=s48-c-k-c0x00ffffff-no-rj"
expect(info["allowedRegions"].as_a).to_not be_empty
expect(info["allowedRegions"].as_a.size).to eq(249)
expect(info["allowedRegions"].as_a).to contain(
"AD", "AR", "BA", "BT", "CZ", "FO", "GL", "IO", "KE", "KH", "LS",
"LT", "MP", "NO", "PR", "RO", "SE", "SK", "SS", "SX", "SZ", "ZW"
)
expect(info["authorVerified"].as_bool).to be_false
expect(info["subCountText"].as_s).to eq("227K")
expect(info["keywords"].as_a).to_not be_empty
expect(info["keywords"].as_a.size).to eq(25)
expect(info["keywords"].as_a).to contain_exactly(
"Patrick Bet-David",
"Valeutainment",
"The BetDavid Podcast",
"The BetDavid Show",
"Betdavid",
"PBD",
"BetDavid show",
"Betdavid podcast",
"podcast betdavid",
"podcast patrick",
"patrick bet david podcast",
"Valuetainment podcast",
"Entrepreneurs",
"Entrepreneurship",
"Entrepreneur Motivation",
"Entrepreneur Advice",
"Startup Entrepreneurs",
"valuetainment",
"patrick bet david",
"PBD podcast",
"Betdavid show",
"Betdavid Podcast",
"Podcast Betdavid",
"Show Betdavid",
"PBDPodcast"
).in_any_order
expect(info["allowRatings"].as_bool).to be_true
expect(info["isFamilyFriendly"].as_bool).to be_true
expect(info["isListed"].as_bool).to be_true
expect(info["isUpcoming"].as_bool).to be_true
# Related videos
expect(info["relatedVideos"].as_a.size).to eq(20)
@@ -109,5 +188,41 @@ Spectator.describe Invidious::Hashtag do
expect(info["relatedVideos"][9]["view_count"]).to eq("26432")
expect(info["relatedVideos"][9]["short_view_count"]).to eq("26K")
expect(info["relatedVideos"][9]["author_verified"]).to eq("true")
# Description
description_start_text = <<-TXT
PBD Podcast Episode 171. In this episode, Patrick Bet-David is joined by Dr. Patrick Moore and Adam Sosnick.
Join the channel to get exclusive access to perks: https://bit.ly/3Q9rSQL
TXT
expect(info["description"].as_s).to start_with(description_start_text)
expect(info["shortDescription"].as_s).to start_with(description_start_text)
expect(info["descriptionHtml"].as_s).to start_with(
<<-TXT
PBD Podcast Episode 171. In this episode, Patrick Bet-David is joined by Dr. Patrick Moore and Adam Sosnick.
Join the channel to get exclusive access to perks: <a href="https://bit.ly/3Q9rSQL">bit.ly/3Q9rSQL</a>
TXT
)
# Video metadata
expect(info["genre"].as_s).to eq("Entertainment")
expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty
# Author infos
expect(info["author"].as_s).to eq("PBD Podcast")
expect(info["ucid"].as_s).to eq("UCGX7nGXpz-CmO_Arg-cgJ7A")
expect(info["authorThumbnail"].as_s).to eq(
"https://yt3.ggpht.com/61ArDiQshJrvSXcGLhpFfIO3hlMabe2fksitcf6oGob0Mdr5gztdkXxRljICUodL4iuTSrtxW4A=s48-c-k-c0x00ffffff-no-rj"
)
expect(info["authorVerified"].as_bool).to be_false
expect(info["subCountText"].as_s).to eq("227K")
end
end

View File

@@ -12,6 +12,7 @@ require "../src/invidious/helpers/logger"
require "../src/invidious/helpers/utils"
require "../src/invidious/videos"
require "../src/invidious/videos/*"
require "../src/invidious/comments"
require "../src/invidious/helpers/serialized_yt_data"

View File

@@ -5,6 +5,7 @@ require "protodec/utils"
require "yaml"
require "../src/invidious/helpers/*"
require "../src/invidious/channels/*"
require "../src/invidious/videos/caption"
require "../src/invidious/videos"
require "../src/invidious/comments"
require "../src/invidious/playlists"