From 4c366c89eb9471807e503c6f5f9698834d25bd16 Mon Sep 17 00:00:00 2001
From: Ze'ev Schurmann <thisiszeev@gmail.com>
Date: Thu, 28 Dec 2023 18:59:50 +0200
Subject: [PATCH] Commit all files

---
 .gitignore            |  11 ++
 README.md             |  37 ++++-
 fetchyts.sh           | 377 ++++++++++++++++++++++++++++++++++++++++++
 imdb/README.md        |  59 +++++++
 imdb/fetchimdbdata.sh |  12 ++
 job.sh                |   6 +
 6 files changed, 500 insertions(+), 2 deletions(-)
 create mode 100644 .gitignore
 create mode 100755 fetchyts.sh
 create mode 100644 imdb/README.md
 create mode 100755 imdb/fetchimdbdata.sh
 create mode 100755 job.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d2fc261
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+*.tsv
+*.jpeg
+*.jpg
+*.png
+*.html
+*.torrent
+*.pdf
+*.json
+*.old
+*.list
+*.log
diff --git a/README.md b/README.md
index dd429ce..376bc2d 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,36 @@
-# moviecataloger
+# Movie Cataloger
 
-Create PDF pages for a movie catalog. Also downloads the torrent files from YTS.MX
\ No newline at end of file
+This Bash script creates PDF files of movies.
+
+Simply create a text file of IMDB codes (one code per line), and call it movies.list
+
+'''
+$ bash job.sh
+'''
+
+The script will then work through your list and generate PDFs for each of the movies. You can then print the PDFs and use them as an easy reference for your collection.
+
+# LEGAL
+
+Piracy is illegal. I trust you own legitimate copies of the movies you catalog using this script.
+
+I am not resposible for how you use this script and it is provided as is with zero intent of warranty whatsoever.
+
+# DONATIONS
+
+Please consider making me small donation. Even though my scripts are open source and free to use, I still need to eat. And the occasional bottle of wine also goes down well.
+
+- $5 buys me a cup of coffee
+- $10 buys me a nice burger
+- $20 buys me a bottle of wine
+- Anything above that will be awesome as well.
+
+You can send me a donation via Paypal https://www.paypal.com/paypalme/thisiszeev
+
+Drop me a message on Reddit if you do make a donation. u/thisiszeev
+
+Support is only offered freely to those who donate $20 or more.
+
+Your donation contributes to further development.
+
+If you need a custom script, contact me on Reddit for pricing.
\ No newline at end of file
diff --git a/fetchyts.sh b/fetchyts.sh
new file mode 100755
index 0000000..a6cfd24
--- /dev/null
+++ b/fetchyts.sh
@@ -0,0 +1,377 @@
+#!/bin/bash
+
+tmdbkey=""
+
+# Needs wkhtmltopdf
+# apt install wkhtmltopdf
+# ln -s /usr/bin/wkhtmltopdf /usr/local/bin/html2pdf
+
+# Needs qrencode
+# apt install qrencode
+
+# TMDB API
+# wget "https://api.themoviedb.org/3/movie/tt0448134?external_source=imdb_id&api_key=apikeygoeshere"
+# wget "https://api.themoviedb.org/3/find/tt0448134?external_source=imdb_id&api_key=apikeygoeshere"
+
+#curl --request GET --url 'https://api.themoviedb.org/3/movie/1272/content_rating?language=en-US' --header 'Authorization: Bearer tokengoeshere' --header 'accept: application/json'
+
+#curl --request GET --url https://api.themoviedb.org/3/certification/movie/1272 --header 'Authorization: Bearer tokengoeshere' --header 'accept: application/json'
+
+#This one returns the Audience Restriction data under certification.
+#curl --request GET --url https://api.themoviedb.org/3/movie/1272/release_dates --header 'Authorization: Bearer tokengoeshere' --header 'accept: application/json'
+
+#Use this to get a legend of the certifications
+#curl --request GET --url https://api.themoviedb.org/3/certification/movie/list --header 'Authorization: Bearer tokengoeshere' --header 'accept: application/json'
+
+if [[ ! -f ./json/lang.json ]]
+then
+	echo "Language JSON Data not available... Downloading now..."
+	wget -O "./json/lang.temp" "https://api.themoviedb.org/3/configuration/languages?api_key=$tmdbkey"
+	echo "{" > ./json/lang.json
+	size=$( jq '. | length' "./json/lang.temp" )
+	stop=$(( size-1 ))
+		
+	for ((n=0; n<$size; n++))
+	do
+		temp="$( jq ".[$n].iso_639_1" ./json/lang.temp ): {"
+		echo "  $temp" >> ./json/lang.json
+		temp="\"anglo\": $( jq ".[$n].english_name" ./json/lang.temp )"
+		echo "    $temp," >> ./json/lang.json
+		temp="\"native\": $( jq ".[$n].name" ./json/lang.temp )"
+		echo "    $temp" >> ./json/lang.json
+
+		if [[ $n == $stop ]]
+		then
+			echo "  }" >> ./json/lang.json
+		else
+			echo "  }," >> ./json/lang.json
+		fi
+
+	done
+	
+	echo "}" >> ./json/lang.json
+	rm ./json/lang.temp
+fi
+
+if [[ -z $1 ]]
+then
+	echo "Usage:"
+	echo
+	echo "$ ./fetchyts.sh {imdbcode}"
+	echo
+	echo "{imdbcode} is the code starting with tt found in the title's IMDB page URL."
+	exit
+fi
+
+if [[ ! -f ./json/$1.yts.json ]]
+then
+	echo
+	echo "Downloading YTS JSON Data..."
+	wget -O "./json/$1.yts.temp" "https://yts.mx/api/v2/movie_details.json?imdb_id=$1&with_images=true&with_cast=true"
+	jq '.' "./json/$1.yts.temp" > "./json/$1.yts.json"
+	rm "./json/$1.yts.temp"
+else
+	echo
+	echo "YTS JSON Data exists for this title."
+fi
+
+echo
+echo "Verifying Metadata..."
+exists=$( jq '.data.movie.id' ./json/$1.yts.json )
+
+if [[ $exists == 0 ]]
+then
+	echo
+	echo "Title does not exist on YTS!"
+	echo "Removing JSON file and adding to error.log"
+	rm "./json/$1.yts.json"
+	echo $1 >> error.log
+	exit
+fi
+
+if [[ ! -f ./json/$1.tmdb.json ]]
+then
+	echo
+	echo "Downloading TMDB JSON Data..."
+	wget -O "./json/$1.tmdb.temp" "https://api.themoviedb.org/3/movie/$1?external_source=imdb_id&api_key=$tmdbkey"
+	wget -O "./json/$1.cert.temp" "https://api.themoviedb.org/3/movie/$1/release_dates?external_source=imdb_id&api_key=$tmdbkey"
+	wget -O "./json/$1.vids.temp" "https://api.themoviedb.org/3/movie/$1/videos?external_source=imdb_id&api_key=$tmdbkey"
+	size=$( jq '.' "./json/$1.tmdb.temp" | wc -l )
+	((size--))
+	((size--))
+	jq '.' "./json/$1.tmdb.temp" | head -n $size > "./json/$1.tmdb.json"
+	temp=$( jq '.' "./json/$1.tmdb.temp" | tail -n 2 | head -1 )
+	echo "  $temp," >> "./json/$1.tmdb.json"
+	echo '  "cert": {' >> "./json/$1.tmdb.json"
+	size=$( jq '.results | length' "./json/$1.cert.temp" )
+	stop=$(( size-1 ))
+
+	for ((n=0; n<$size; n++))
+	do
+		temp="$( jq ".results[$n].iso_3166_1" "./json/$1.cert.temp" ): $( jq ".results[$n].release_dates[0].certification" "./json/$1.cert.temp" )"
+
+		if [[ $n == $stop ]]
+		then
+			echo "    $temp" >> "./json/$1.tmdb.json"
+		else
+			echo "    $temp," >> "./json/$1.tmdb.json"
+		fi
+
+	done
+
+	echo "  }" >> "./json/$1.tmdb.json"
+	echo "}" >> "./json/$1.tmdb.json"
+	rm "./json/$1.tmdb.temp"
+	rm "./json/$1.cert.temp"
+fi
+
+echo
+echo "Extracting Metadata..."
+title=$( jq '.data.movie.title_long' ./json/$1.yts.json )
+runtime=$( jq '.data.movie.runtime' ./json/$1.yts.json )
+rating=$( jq '.data.movie.rating' ./json/$1.yts.json )
+description=$( jq '.data.movie.description_full' ./json/$1.yts.json )
+poster=$( jq '.data.movie.large_cover_image' ./json/$1.yts.json )
+torrentcount=$( jq '.data.movie.torrents | length' ./json/$1.yts.json )
+trailer=$( jq '.data.movie.yt_trailer_code' ./json/$1.yts.json )
+language=$( jq '.data.movie.language' ./json/$1.yts.json )
+restriction=$( jq '.data.movie.mpa_rating' ./json/$1.yts.json )
+for ((n=0; n<$torrentcount; n++))
+do
+	torrentquality[$n]=$( jq ".data.movie.torrents[$n].quality" ./json/$1.yts.json )
+	#torrentseeds[$n]=$( jq ".data.movie.torrents[$n].seeds" ./json/$1.yts.json )
+	torrenturl[$n]=$( jq ".data.movie.torrents[$n].url" ./json/$1.yts.json )
+	torrenttype[$n]=$( jq ".data.movie.torrents[$n].type" ./json/$1.yts.json )
+done
+actor=$( jq ".data.movie.cast[].name" ./json/$1.yts.json )
+genre=$( jq '.data.movie.genres' ./json/$1.yts.json ) 
+
+if [[ $restriction == '""' ]]
+then
+	echo "No MPA Rating!"
+	restriction="Not Rated"
+else
+	size=${#restriction}
+	((size--))
+	((size--))
+	temp=${restriction:1:$size}
+	restriction=$temp
+fi
+
+#check if it is porn (needs to run fetchimdbdata.sh in the imdb folder)
+
+if [[ -e "./imdb/title.basics.tsv" ]]
+then
+	imdbstring=$( cat ./imdb/title.basics.tsv | grep "$1	" | head -1 )
+	IFS="	"
+	imdbarray=( $imdbstring )
+	isadult=${imdbarray[4]}
+	IFS=" "
+fi
+
+if [[ $isadult == 1 ]]
+then
+	temp="$restriction [ADULTS ONLY]"
+	restriction=$temp
+	echo "Title is for ADULTS ONLY!"
+fi
+
+size=${#description}
+((size--))
+((size--))
+temp=${description:1:$size}
+description=$temp
+
+size=${#title}
+((size--))
+((size--))
+temp=${title:1:$size}
+title=$temp
+
+if [[ $trailer == '""' ]]
+then
+	echo "Trailer not found!"
+	trailerexist=0
+else
+	size=${#trailer}
+	((size--))
+	((size--))
+	temp=${trailer:1:$size}
+	trailer="https://www.youtube.com/watch?v=$temp"
+	trailerexist=1
+fi
+
+size=${#language}
+((size--))
+((size--))
+temp=${language:1:$size}
+language=$( cat languages.data | grep "($temp)" )
+
+size=${#actor}
+for (( n=0; n<$size; n++ ))
+do
+temp=${actor:$n:1}
+if [[ $temp == '"' ]]
+then
+	if [[ $x == 1 ]]
+	then
+		temp=","
+			actors="$actors$temp"
+		x=0
+	else
+		x=1
+	fi
+else
+	actors="$actors$temp"
+fi
+done
+
+size=${#actors}
+((size--))
+temp=${actors:0:$size}
+actors=$temp
+
+size=${#genre}
+((size--))
+((size--))
+((size--))
+((size--))
+temp=${genre:2:$size}
+genre=$temp
+
+x=0
+size=${#genre}
+for (( n=0; n<$size; n++ ))
+do
+temp=${genre:$n:1}
+if [[ $temp == '"' ]]
+then
+	x=0
+else
+	genres="$genres$temp"
+fi
+done
+
+size=${#poster}
+((size--))
+((size--))
+temp=${poster:1:$size}
+poster=$temp
+
+echo "Movie: $title"
+echo
+echo "Restriction: $restriction"
+echo
+echo "Runtime: $runtime minutes"
+echo
+echo "IMDB Rating: $rating/10"
+echo
+echo "$description"
+echo
+echo "Starring: "$actors
+echo
+echo "Genres: "$genres
+echo
+echo "Poster: "$poster
+echo
+
+if [[ $trailerexist == 1 ]]
+then
+	echo "Trailer: $trailer"
+	echo
+fi
+
+echo "Language: $language"
+echo
+echo
+echo
+
+if [ ! -e "./html/posters/$1.jpg" ]
+then
+	echo "Downloading Poster..."
+	wget -O "./html/posters/$1.jpg" $poster
+else
+	echo "Already have Poster: $1.jpg"
+fi
+
+if [[ $trailerexist == 1 ]]
+then
+
+	if [ ! -e "./html/qrcodes/$1.png" ]
+	then
+		echo "Generating QR Code for the Trailer..."
+		qrencode -s 6 -l H -o "./html/qrcodes/$1.png" "$trailer"
+	else
+		echo "Already have QR Code for the Trailer: $1.png"
+	fi
+
+else
+	echo "Skipping Trailer..."
+fi
+
+if [[ ! -e ./html/$1.html ]]
+then
+	echo
+	echo "Generating $1.html..."
+
+	if [[ $trailerexist == 1 ]]
+	then
+		echo "<html><title>$title</title><body><table><tr><td width="10%">&nbsp</td><td><h1>$title</h1></td><td><u>Code</u>: $1</td></tr><tr><td></td><td><u>Language</u>: $language</td><td><u>Audience Restriction</u>: $restriction</td></tr><tr><td></td><td><u>Runtime</u>: $runtime minutes</td><td><u>IMDB Rating</u>: $rating/10</td></tr><tr><td></td><td valign="top"><u>Genres</u>: "$genres"<br><u>Starring</u>: "$actors"<br><br><br><table><tr><td width="95%">$description</td><td width="5%">&nbsp</td></tr></table></td><td><img src="posters/$1.jpg" width="400px" height="600px"><hr><center>Scan to watch a preview...</center><center><img src="qrcodes/$1.png" width="300px" height="300px"></center><center>Data rates may apply.</center></td></tr></table><hr><center><b>THE CONTENTS OF THIS DOCUMENT ARE NOT GUARANTEED. ALL DATA IS OBTAINED FROM PUBLICALLY MAINTAINED DATABASES. ERRORS MAY EXIST.</b><center><hr></body></html>" > ./html/$1.html
+	else
+		echo "<html><title>$title</title><body><table><tr><td width="10%"></td><td><h1>$title</h1></td><td><u>Code</u>: $1</td></tr><tr><td></td><td><u>Language</u>: $language</td><td><u>Audience Restriction</u>: $restriction</td></tr><td></td><tr><td><u>Runtime</u>: $runtime minutes</td><td><u>IMDB Rating</u>: $rating/10</td></tr><tr><td></td><td valign="top"><u>Genres</u>: "$genres"<br><u>Starring</u>: "$actors"<br><br><br><table><tr><td width="95%">$description</td><td width="5%">&nbsp</td></tr></table></td><td><img src="posters/$1.jpg" width="400px" height="600px"></td></tr></table><hr><center><b>THE CONTENTS OF THIS DOCUMENT ARE NOT GUARANTEED. ALL DATA IS OBTAINED FROM PUBLICALLY MAINTAINED DATABASES. ERRORS MAY EXIST.</b><center><hr></body></html>" > ./html/$1.html
+	fi
+	
+else
+	echo
+	echo "$1.html already exists..."
+fi
+	
+if [[ ! -e ./pdfs/$1.pdf ]]
+then
+	echo 
+	echo "Generating $1.pdf..."
+	html2pdf --allow ./html ./html/$1.html ./pdfs/$1.pdf
+else
+	echo
+	echo "$1.pdf already exists..."
+fi
+	
+echo
+echo "Saving Torrent Files..."
+for (( n=0; n<$torrentcount; n++ ))
+do
+	size=${#torrenturl[$n]}
+	((size--))
+	((size--))
+	temp=${torrenturl[$n]:1:$size}
+	torrenturl[$n]=$temp
+	
+	size=${#torrentquality[$n]}
+	((size--))
+	((size--))
+	temp=${torrentquality[$n]:1:$size}
+	torrentquality[$n]=$temp
+	
+	size=${#torrenttype[$n]}
+	((size--))
+	((size--))
+	temp=${torrenttype[$n]:1:$size}
+	torrenttype[$n]=$temp
+	
+	echo
+	echo "URL: ${torrenturl[$n]}"
+	echo "Quality: ${torrentquality[$n]}"
+	echo "Type: ${torrenttype[$n]}"
+	echo
+
+	if [[ ! -e "./torrents/$title.$1.${torrentquality[$n]}.${torrenttype[$n]}.torrent" ]]
+	then
+		wget -O "./torrents/$title.$1.${torrentquality[$n]}.${torrenttype[$n]}.torrent" ${torrenturl[$n]}
+	else
+		echo "$title.$1.${torrentquality[$n]}.${torrenttype[$n]}.torrent already exists..."
+	fi
+done
+
+echo
+echo "Done!"
+echo
diff --git a/imdb/README.md b/imdb/README.md
new file mode 100644
index 0000000..0ab241f
--- /dev/null
+++ b/imdb/README.md
@@ -0,0 +1,59 @@
+# title.akas.tsv.gz
+
++ titleId (string) - a tconst, an alphanumeric unique identifier of the title
++ ordering (integer) - a number to uniquely identify rows for a given titleId
++ title (string) - the localized title
++ region (string) - the region for this version of the title
++ language (string) - the language of the title
++ types (array) - Enumerated set of attributes for this alternative title. One or more of the following: "alternative", "dvd", "festival", "tv", "video", "working", "original", "imdbDisplay". New values may be added in the future without warning
++ attributes (array) - Additional terms to describe this alternative title, not enumerated
++ isOriginalTitle (boolean) - 0: not original title; 1: original title
+
+# title.basics.tsv.gz
+
++ tconst (string) - alphanumeric unique identifier of the title
++ titleType (string) - the type/format of the title (e.g. movie, short, tvseries, tvepisode, video, etc)
++ primaryTitle (string) - the more popular title / the title used by the filmmakers on promotional materials at the point of release
++ originalTitle (string) - original title, in the original language
++ isAdult (boolean) - 0: non-adult title; 1: adult title
++ startYear (YYYY) - represents the release year of a title. In the case of TV Series, it is the series start year
++ endYear (YYYY) - TV Series end year. "\N" for all other title types
++ runtimeMinutes - primary runtime of the title, in minutes
++ genres (string array) - includes up to three genres associated with the title
+
+# title.crew.tsv.gz
+
++ tconst (string) - alphanumeric unique identifier of the title
++ directors (array of nconsts) - director(s) of the given title
++ writers (array of nconsts) - writer(s) of the given title
+
+# title.episode.tsv.gz
+
++ tconst (string) - alphanumeric identifier of episode
++ parentTconst (string) - alphanumeric identifier of the parent TV Series
++ seasonNumber (integer) - season number the episode belongs to
++ episodeNumber (integer) - episode number of the tconst in the TV series
+
+# title.principals.tsv.gz
+
++ tconst (string) - alphanumeric unique identifier of the title
++ ordering (integer) - a number to uniquely identify rows for a given titleId
++ nconst (string) - alphanumeric unique identifier of the name/person
++ category (string) - the category of job that person was in
++ job (string) - the specific job title if applicable, else '\N'
++ characters (string) - the name of the character played if applicable, else '\N'
+
+# title.ratings.tsv.gz
+
++ tconst (string) - alphanumeric unique identifier of the title
++ averageRating - weighted average of all the individual user ratings
++ numVotes - number of votes the title has received
+
+# name.basics.tsv.gz
+
++ nconst (string) - alphanumeric unique identifier of the name/person
++ primaryName (string) - name by which the person is most often credited
++ birthYear - in YYYY format
++ deathYear - in YYYY format if applicable, else '\N'
++ primaryProfession (array of strings) - the top-3 professions of the person
++ knownForTitles (array of tconsts) - titles the person is known for
diff --git a/imdb/fetchimdbdata.sh b/imdb/fetchimdbdata.sh
new file mode 100755
index 0000000..8e444e6
--- /dev/null
+++ b/imdb/fetchimdbdata.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+rm *.tsv
+
+
+wget -O - https://datasets.imdbws.com/name.basics.tsv.gz | gunzip > name.basics.tsv
+wget -O - https://datasets.imdbws.com/title.akas.tsv.gz | gunzip > title.akas.tsv
+wget -O - https://datasets.imdbws.com/title.basics.tsv.gz | gunzip > title.basics.tsv
+wget -O - https://datasets.imdbws.com/title.crew.tsv.gz | gunzip > title.crew.tsv
+wget -O - https://datasets.imdbws.com/title.episode.tsv.gz | gunzip > title.episode.tsv
+wget -O - https://datasets.imdbws.com/title.principals.tsv.gz | gunzip > title.principals.tsv
+wget -O - https://datasets.imdbws.com/title.ratings.tsv.gz | gunzip > title.ratings.tsv
diff --git a/job.sh b/job.sh
new file mode 100755
index 0000000..ebda8db
--- /dev/null
+++ b/job.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+while read -r imdbid
+do
+	./fetchyts.sh $imdbid
+done < movies.list