
# ------------------------------------------------------------
# š¬ MOVIE DATA ANALYSIS PROGRAM
# Class 12 Computer Science Project
# ------------------------------------------------------------
# Made by: Bhavika Kokane & Madina Sidi
# School: Sundaram Central School
# Subject: Informatics Practises
# Year: 2025-26
# ------------------------------------------------------------
print("======================================================")
print(" š MOVIE DATA ANALYSIS PROGRAM š")
print("======================================================")
print("Loading movie data from CSV file...\n")
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("C:\mydata.csv")
print(df)
# Show a sample of the data
print("Sample movie records:")
print(df.head())
print("\nTotal movies loaded:", len(df))
print("======================================================\n")
plt.show()
# histogram
plt.figure(figsize=(8,5))
plt.hist(df['Rating'], bins=10, color='purple', edgecolor='black')
plt.title('Histogram')
plt.xlabel('Numeric_column')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
# -------------------------------
# STEP 2: Menu Loop
# -------------------------------
while True:
print("\n⨠MOVIE DATA MENU āØ")
print("-------------------------------------------------------------------")
print("| 1. Sort movies by rating |")
print("| 2. Filter movies by minimum rating |")
print("| 3. Add a new movie |")
print("| 4. Delete a movie |")
print("| 5. Update a movie's rating |")
print("| 6. Search for a movie |")
print("| 7. Display movies by genre |")
print("| 8. Display movies by director |")
print("| 9. Average rating by genre |")
print("| 10. Average rating by director |")
print("| 11. Compare ratings of two movies |")
print("| 12. Highest rated movie in a genre |")
print("| 13. Lowest rated movie in a genre |")
print("| 14. Bar Chart of Ratings |")
print("| 15. Histogram of Ratings |")
print("| 16. Line Chart of Ratings over Release Date |")
print("| 17. Show total number of movies |")
print("| 18. Show top N movies by rating |")
print("| 19. Save updated movie list to CSV |")
print("| 20. Exit |")
print("-------------------------------------------------------------------")
choice = input("Enter your choice (1-21): ")
# ------------------ OPTION 1 ------------------
if choice == "1":
print("\nMovies sorted by rating (highest first):\n")
print(df.sort_values(by="Rating", ascending=False))
# ------------------ OPTION 2 ------------------
elif choice == "2":
min_rating = float(input("Enter minimum rating: "))
filtered_df = df[df["Rating"] >= min_rating]
print(f"\nMovies with rating >= {min_rating}:\n")
print(filtered_df)
# ------------------ OPTION 3 ------------------
elif choice == "3":
title = input("Enter title: ")
genre = input("Enter genre: ")
year = int(input("Enter year: "))
rating = float(input("Enter rating: "))
revenue = float(input("Enter revenue (Billion $): "))
country = input("Enter country: ")
actor_name = input("Enter actor name: ")
actress_name = input("Enter actress name: ")
actor_likes = int(input("Enter actor's social media likes (M): "))
runtime = int(input("Enter runtime (min): "))
gross = int(input("Enter gross ($M): "))
release_date = input("Enter release date: ")
genre_count = int(input("Enter genre count: "))
avg_genre_rating = float(input("Enter average genre rating: "))
director_name = input("Enter director name: ")
year_trend_score = int(input("Enter year trend score: "))
famous_song = input("Enter famous song: ")
# Append to DataFrame (properly indented)
df.loc[len(df)] = [
title, genre, year, rating, revenue, country,
actor_name, actress_name, actor_likes, runtime,
gross, release_date, genre_count, avg_genre_rating,
director_name, year_trend_score, famous_song
]
print("ā
Movie added successfully!")
# ------------------ OPTION 4 ------------------
elif choice == "4":
title = input("Enter exact movie title to delete: ")
if title in df["Title"].values:
df = df[df["Title"] != title]
print("ā
Movie deleted successfully!")
else:
print("ā Movie not found.")
# ------------------ OPTION 5 ------------------
elif choice == "5":
title = input("Enter exact movie title to update: ")
if title in df["Title"].values:
new_rating = float(input("Enter new rating: "))
df.loc[df["Title"] == title, "Rating"] = new_rating
print("ā
Rating updated successfully!")
else:
print("ā Movie not found.")
# ------------------ OPTION 6 ------------------
elif choice == "6":
title = input("Enter movie title to search: ")
result = df[df["Title"].str.lower() == title.lower()]
if not result.empty:
print(result)
else:
print("ā Movie not found.")
# ------------------ OPTION 7 ------------------
elif choice == "7":
genre = input("Enter genre: ")
result = df[df["Genre"].str.lower() == genre.lower()]
if not result.empty:
print(result)
else:
print("ā No movies found for this genre.")
# ------------------ OPTION 8 ------------------
elif choice == "8":
director = input("Enter director name: ")
result = df[df["Director Name"].str.lower() == director.lower()]
if not result.empty:
print(result)
else:
print("ā No movies found for this director.")
# ------------------ OPTION 9 ------------------
elif choice == "9":
print("\nAverage rating by genre:")
print(df.groupby("Genre")["Rating"].mean())
# ------------------ OPTION 10 ------------------
elif choice == "10":
print("\nAverage rating by director:")
print(df.groupby("Director Name")["Rating"].mean())
# ------------------ OPTION 11 ------------------
elif choice == "11":
m1 = input("Enter first movie: ")
m2 = input("Enter second movie: ")
r1 = df.loc[df["Title"].str.lower() == m1.lower(), "Rating"].values
r2 = df.loc[df["Title"].str.lower() == m2.lower(), "Rating"].values
if r1.size and r2.size:
print(f"{m1}: {r1[0]} | {m2}: {r2[0]}")
else:
print("ā One or both movies not found.")
# ------------------ OPTION 12 ------------------
elif choice == "12":
genre = input("Enter genre: ")
g_df = df[df["Genre"].str.lower() == genre.lower()]
if not g_df.empty:
print("\nHighest rated movie in this genre:\n")
print(g_df.loc[g_df["Rating"].idxmax()])
else:
print("ā No movies found in this genre.")
# ------------------ OPTION 13 ------------------
elif choice == "13":
genre = input("Enter genre: ")
g_df = df[df["Genre"].str.lower() == genre.lower()]
if not g_df.empty:
print("\nLowest rated movie in this genre:\n")
print(g_df.loc[g_df["Rating"].idxmin()])
else:
print("ā No movies found in this genre.")
# ------------------ OPTION 14 ------------------
elif choice == "14":
df_sorted = df.sort_values(by="Rating", ascending=False)
plt.figure(figsize=(10, 6))
plt.bar(df_sorted["Title"], df_sorted["Rating"], color='skyblue')
plt.xticks(rotation=90)
plt.xlabel("Movie Title")
plt.ylabel("Rating")
plt.title("Movie Ratings (Bar Chart)")
plt.tight_layout()
plt.show()
# ------------------ OPTION 15 ------------------
elif choice == "15":
plt.figure(figsize=(8, 6))
plt.hist(df["Rating"], bins=10, color='lightgreen', edgecolor='black')
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.title("Distribution of Movie Ratings")
plt.grid(axis='y')
plt.show()
# ------------------ OPTION 16 ------------------
elif choice == "16":
df_sorted = df.sort_values(by="Release Date")
plt.figure(figsize=(10, 6))
plt.plot(df_sorted["Release Date"], df_sorted["Rating"], marker='o', color='orange')
plt.xticks(rotation=45)
plt.xlabel("Release Date")
plt.ylabel("Rating")
plt.title("Ratings Over Time (Line Chart)")
plt.tight_layout()
plt.show()
# ------------------ OPTION 17 ------------------
elif choice == "17":
print(f"\nTotal number of movies in database: {len(df)}")
# ------------------ OPTION 18 ------------------
elif choice == "18":
n = int(input("Enter how many top movies to display: "))
print(df.sort_values(by="Rating", ascending=False).head(n))
# ------------------ OPTION 19 ------------------
elif choice == "19":
df.to_csv("updated_movies__new.csv", index=False)
print("ā
Updated movie list saved to 'updated_movies.csv'!")
# ------------------ OPTION 20 ------------------
elif choice == "20":
print("\nThank you for using the Movie Data Analysis Program! š¬")
break
else:
print("ā Invalid choice. Please try again.")