1
0
Fork 0
unweb/youtube.com/search.pl
2021-06-07 22:24:54 +02:00

244 lines
7.1 KiB
Perl
Executable file

#!/usr/bin/perl
# Copyright (c) 2021 smpl <smpl@slamkode.ml>
# SPDX-Identifier: Zlib
use Encode;
use JSON::XS;
use HTML::TreeBuilder 5 -weak;
use LWP;
use LWP::UserAgent;
use MIME::Base64;
use Getopt::Long;
use strict;
sub print_help {
print "Usage: " . $0 . " [--sort][--uploaded][--type][--duration][--features] QUERY\n" .
"--sort upload|views|rating|relevance\n" .
"--uploaded hour|day|week|month|year\n" .
"--type video|channel|playlist|movie\n" .
"--duration short|long\n" .
"--features live|4k|hd|sub|cc|360|vr180|3d|hdr|location|purchased\n\n";
}
sub parse_cli {
my %options = (
"sort" => {
"upload" => "\x08\x02",
"views" => "\x08\x03",
"rating" => "\x08\x01",
"relevance" => "\x08\x00"
},
"uploaded" => {
"hour" => "\x08\x01",
"day" => "\x08\x02",
"week" => "\x08\x03",
"month" => "\x08\x04",
"year" => "\x08\x05"
},
"type" => {
"video" => "\x10\x01",
"channel" => "\x10\x02",
"playlist" => "\x10\x03",
"movie" => "\x10\x04"
},
"duration" => {
"short" => "\x18\x01",
"long" => "\x18\x02"
},
"features" => {
"live" => "\x40\x01",
"4k" => "\x70\x01",
"hd" => "\x20\x01",
"sub" => "\x28\x01",
"cc" => "\x30\x01",
"360" => "\x78\x01",
"vr180" => "\xd0\x01\x01",
"3d" => "\x38\x01",
"hdr" => "\xc8\x01\x01",
"location" => "\xb8\x01\x01",
"purchased" => "\x48\x01"
}
);
my ($sp, $sp_raw);
my ($sort, $uploaded, $type, $duration, $features);
my $filter_bytes = 0;
my $filter = "";
GetOptions(
"sort:s" => \$sort,
"uploaded:s" => \$uploaded,
"type:s" => \$type,
"duration:s" => \$duration,
"features:s" => \$features
) or die("Error in command line arguments\n");
if($sort && defined($options{"sort"}{$sort})) {
$sp_raw = $options{"sort"}{$sort};
}
if($type ne "video" && $type ne "movie") {
if($duration && defined($options{"duration"}{$duration})) {
print "\n--duration can only be combined with type video or movie.\n\n";
print_help;
exit 1;
}
if($features && defined($options{"features"}{$features})) {
print "\n--features can only be combined with type video or movie.\n\n";
print_help;
exit 1;
}
if($uploaded && defined($options{"uploaded"}{$uploaded})) {
print "\n--uploaded can only be combined with type video or movie.\n\n";
print_help;
exit 1;
}
}
if($uploaded && defined($options{"uploaded"}{$uploaded})) {
$filter_bytes += bytes::length($options{"uploaded"}{$uploaded});
$filter .= $options{"uploaded"}{$uploaded};
}
if($type && defined($options{"type"}{$type})) {
$filter_bytes += bytes::length($options{"type"}{$type});
$filter .= $options{"type"}{$type};
}
if($duration && defined($options{"duration"}{$duration})) {
$filter_bytes += bytes::length($options{"duration"}{$duration});
$filter .= $options{"duration"}{$duration};
}
if($features && defined($options{"features"}{$features})) {
$filter_bytes += bytes::length($options{"features"}{$features});
$filter .= $options{"features"}{$features};
}
if($filter_bytes > 0) {
$sp = encode_base64($sp_raw . "\x12" . chr($filter_bytes) . $filter);
} else {
$sp = encode_base64($sp_raw);
}
$sp =~ s/([+\/=])/sprintf("%%25%02X", ord($1))/eg;
return $sp;
}
my $sp = "";
if($#ARGV != 0) {
if($#ARGV < 0) {
print_help;
exit;
}
$sp = parse_cli;
if($#ARGV != 0) {
print_help;
exit;
}
}
my $params = $ARGV[0];
if(bytes::length($sp) > 0) {
$params .= "&sp=".$sp;
}
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0");
$ua->cookie_jar( {} );
$ua->cookie_jar()->set_cookie(0, 'CONSENT', 'YES+cb.20210530-19-p0.en+FX+027', '/', '.youtube.com');
my $response = $ua->get('https://www.youtube.com/results?search_query='.$params);
my $jsonobject;
my $coder = JSON::XS->new->pretty->allow_nonref;
my $tree = HTML::TreeBuilder->new;
$tree->parse_content($response->content);
my $body = $tree->find_by_tag_name('body');
my $ytInitialData_tag = $body->look_down(
_tag => "script",
sub { $_[0]->as_HTML =~ /ytInitialData/ }
);
$ytInitialData_tag->as_HTML =~ /.*ytInitialData[^=]*= (.*});.*/;
my $ytInitialData = $1;
$jsonobject = $coder->decode($ytInitialData);
my $utf8_encoded_json_text = $coder->encode($jsonobject);
my @results = @{$jsonobject->{"contents"}->{'twoColumnSearchResultsRenderer'}->{'primaryContents'}->{'sectionListRenderer'}->{'contents'}->[0]->{'itemSectionRenderer'}->{'contents'}};
sub get_textarray
{
my $aref = shift @_;
my $text = "";
foreach my $line (@{$aref}) {
$text .= $line->{'text'};
}
return $text;
}
sub print_video
{
my $video = shift @_;
print "Title: " . $video->{'videoRenderer'}->{'title'}->{'runs'}->[0]->{'text'} . "\n";
print "Length: " . $video->{'videoRenderer'}->{'lengthText'}->{'simpleText'} . "\n";
print "Published: " . $video->{'videoRenderer'}->{'publishedTimeText'}->{'simpleText'} . "\n";
print "Views: " . $video->{'videoRenderer'}->{'viewCountText'}->{'simpleText'} . "\n";
print "By: " . $video->{'videoRenderer'}->{'ownerText'}->{'runs'}->[0]->{'text'} . "\n";
print "Feed: ";
my $feeduri = $video->{'videoRenderer'}->{'ownerText'}->{'runs'}->[0]->{'navigationEndpoint'}->{'commandMetadata'}->{'webCommandMetadata'}->{'url'};
if($feeduri =~ m{^/user/}) {
print 'https://www.youtube.com/feeds/videos.xml?user=' . $feeduri =~ s!^/user/!!r;
} else {
print 'http://www.youtube.com/feeds/videos.xml?channel_id=' . $feeduri =~ s!^/channel/!!r;
}
print "\n";
print "Url: https://www.youtube.com/watch?v=" . $video->{'videoRenderer'}->{'videoId'} . "\n\n";
}
sub print_playlist
{
my $playlist = shift @_;
print "Title: " . $playlist->{'playlistRenderer'}->{'title'}->{'simpleText'} . "\n";
print "Videos: " . $playlist->{'playlistRenderer'}->{'videoCount'} . "\n";
print "Url: https://www.youtube.com/playlist?list=" . $playlist->{'playlistRenderer'}->{'playlistId'} . "\n";
print "Feed: https://www.youtube.com/feeds/videos.xml?playlist_id=" . $playlist->{'playlistRenderer'}->{'playlistId'} . "\n\n";
}
sub print_channel
{
my $channel = shift @_;
print "Title: " . $channel->{'channelRenderer'}->{'title'}->{'simpleText'} . "\n";
if($channel->{'channelRenderer'}->{'descriptionSnippet'}) {
print "Description: " . get_textarray($channel->{'channelRenderer'}->{'descriptionSnippet'}->{'runs'}) . "\n";
}
if($channel->{'channelRenderer'}->{'subscriberCountText'}) {
print "Subscribers: " . $channel->{'channelRenderer'}->{'subscriberCountText'}->{'simpleText'} . "\n";
}
print "Videos: " . get_textarray($channel->{'channelRenderer'}->{'videoCountText'}->{'runs'}) . "\n";
print "Url: https://www.youtube.com/channel/" . $channel->{'channelRenderer'}->{'channelId'} . "\n";
print "Feed: http://www.youtube.com/feeds/videos.xml?channel_id=" . $channel->{'channelRenderer'}->{'channelId'} . "\n\n";
}
foreach my $item (@results) {
my ($key, $title, $length, $date, $views, $id);
foreach my $k (keys %{$item}) {
$key = $k;
}
if($key eq 'videoRenderer') {
print "[video]\n";
print_video($item);
} elsif($key eq 'playlistRenderer') {
print "[playlist]\n";
print_playlist($item);
} elsif($key eq 'channelRenderer') {
print "[channel]\n";
print_channel($item);
}
}