# ------------------------------------------------------------
# 🎬 MOVIE DATA ANALYSIS PROGRAM
# Class 12 Computer Science Project 
# ------------------------------------------------------------
# Made by: Bhavika Kokane &amp; Madina Sidi
# School: Sundaram Central School
# Subject: Informatics Practises
# Year: 2025-26
# ------------------------------------------------------------


print(&quot;======================================================&quot;)
print(&quot;      📊 MOVIE DATA ANALYSIS PROGRAM 📊&quot;)
print(&quot;======================================================&quot;)
print(&quot;Loading movie data from CSV file...\n&quot;)

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(&quot;C:\mydata.csv&quot;)
print(df)

# Show a sample of the data
print(&quot;Sample movie records:&quot;)
print(df.head())
print(&quot;\nTotal movies loaded:&quot;, len(df))
print(&quot;======================================================\n&quot;)


plt.show()
# histogram 
plt.figure(figsize=(8,5))
plt.hist(df['Rating'], bins=10, color='purple', edgecolor='black')
plt.title('Histogram')
plt.xlabel('Numeric_column')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()



# -------------------------------
# STEP 2: Menu Loop
# -------------------------------
while True:
    print(&quot;\n✨ MOVIE DATA MENU ✨&quot;)
    print(&quot;-------------------------------------------------------------------&quot;)
    print(&quot;| 1.  Sort movies by rating                                       |&quot;)
    print(&quot;| 2.  Filter movies by minimum rating                             |&quot;)
    print(&quot;| 3.  Add a new movie                                             |&quot;)
    print(&quot;| 4.  Delete a movie                                              |&quot;)
    print(&quot;| 5.  Update a movie's rating                                     |&quot;)
    print(&quot;| 6.  Search for a movie                                          |&quot;)
    print(&quot;| 7.  Display movies by genre                                     |&quot;)
    print(&quot;| 8.  Display movies by director                                  |&quot;)
    print(&quot;| 9.  Average rating by genre                                     |&quot;)
    print(&quot;| 10. Average rating by director                                  |&quot;)
    print(&quot;| 11. Compare ratings of two movies                               |&quot;)
    print(&quot;| 12. Highest rated movie in a genre                              |&quot;)
    print(&quot;| 13. Lowest rated movie in a genre                               |&quot;)
    print(&quot;| 14. Bar Chart of Ratings                                        |&quot;)
    print(&quot;| 15. Histogram of Ratings                                        |&quot;)
    print(&quot;| 16. Line Chart of Ratings over Release Date                     |&quot;)
    print(&quot;| 17. Show total number of movies                                 |&quot;)
    print(&quot;| 18. Show top N movies by rating                                 |&quot;)
    print(&quot;| 19. Save updated movie list to CSV                              |&quot;)
    print(&quot;| 20. Exit                                                        |&quot;)
    print(&quot;-------------------------------------------------------------------&quot;)

    choice = input(&quot;Enter your choice (1-21): &quot;)

    # ------------------ OPTION 1 ------------------
    if choice == &quot;1&quot;:
        print(&quot;\nMovies sorted by rating (highest first):\n&quot;)
        print(df.sort_values(by=&quot;Rating&quot;, ascending=False))

    # ------------------ OPTION 2 ------------------
    elif choice == &quot;2&quot;:
        min_rating = float(input(&quot;Enter minimum rating: &quot;))
        filtered_df = df[df[&quot;Rating&quot;] &gt;= min_rating]
        print(f&quot;\nMovies with rating &gt;= {min_rating}:\n&quot;)
        print(filtered_df)

    # ------------------ OPTION 3 ------------------
    elif choice == &quot;3&quot;:
        title = input(&quot;Enter title: &quot;)
        genre = input(&quot;Enter genre: &quot;)
        year = int(input(&quot;Enter year: &quot;))
        rating = float(input(&quot;Enter rating: &quot;))
        revenue = float(input(&quot;Enter revenue (Billion $): &quot;))
        country = input(&quot;Enter country: &quot;)
        actor_name = input(&quot;Enter actor name: &quot;)
        actress_name = input(&quot;Enter actress name: &quot;)
        actor_likes = int(input(&quot;Enter actor's social media likes (M): &quot;))
        runtime = int(input(&quot;Enter runtime (min): &quot;))
        gross = int(input(&quot;Enter gross ($M): &quot;))
        release_date = input(&quot;Enter release date: &quot;)
        genre_count = int(input(&quot;Enter genre count: &quot;))
        avg_genre_rating = float(input(&quot;Enter average genre rating: &quot;))
        director_name = input(&quot;Enter director name: &quot;)
        year_trend_score = int(input(&quot;Enter year trend score: &quot;))
        famous_song = input(&quot;Enter famous song: &quot;)

        # Append to DataFrame (properly indented)
        df.loc[len(df)] = [
            title, genre, year, rating, revenue, country,
            actor_name, actress_name, actor_likes, runtime,
            gross, release_date, genre_count, avg_genre_rating,
            director_name, year_trend_score, famous_song
        ]
        print(&quot;✅ Movie added successfully!&quot;)

    # ------------------ OPTION 4 ------------------
    elif choice == &quot;4&quot;:
        title = input(&quot;Enter exact movie title to delete: &quot;)
        if title in df[&quot;Title&quot;].values:
            df = df[df[&quot;Title&quot;] != title]
            print(&quot;✅ Movie deleted successfully!&quot;)
        else:
            print(&quot;❌ Movie not found.&quot;)

    # ------------------ OPTION 5 ------------------
    elif choice == &quot;5&quot;:
        title = input(&quot;Enter exact movie title to update: &quot;)
        if title in df[&quot;Title&quot;].values:
            new_rating = float(input(&quot;Enter new rating: &quot;))
            df.loc[df[&quot;Title&quot;] == title, &quot;Rating&quot;] = new_rating
            print(&quot;✅ Rating updated successfully!&quot;)
        else:
             print(&quot;❌ Movie not found.&quot;)

    # ------------------ OPTION 6 ------------------
    elif choice == &quot;6&quot;:
        title = input(&quot;Enter movie title to search: &quot;)
        result = df[df[&quot;Title&quot;].str.lower() == title.lower()]
        if not result.empty:
            print(result)
        else:
            print(&quot;❌ Movie not found.&quot;)

    # ------------------ OPTION 7 ------------------
    elif choice == &quot;7&quot;:
        genre = input(&quot;Enter genre: &quot;)
        result = df[df[&quot;Genre&quot;].str.lower() == genre.lower()]
        if not result.empty:
            print(result)
        else:
            print(&quot;❌ No movies found for this genre.&quot;)

    # ------------------ OPTION 8 ------------------
    elif choice == &quot;8&quot;:
        director = input(&quot;Enter director name: &quot;)
        result = df[df[&quot;Director Name&quot;].str.lower() == director.lower()]
        if not result.empty:
            print(result)
        else:
            print(&quot;❌ No movies found for this director.&quot;)

    # ------------------ OPTION 9 ------------------
    elif choice == &quot;9&quot;:
        print(&quot;\nAverage rating by genre:&quot;)
        print(df.groupby(&quot;Genre&quot;)[&quot;Rating&quot;].mean())

    # ------------------ OPTION 10 ------------------
    elif choice == &quot;10&quot;:
        print(&quot;\nAverage rating by director:&quot;)
        print(df.groupby(&quot;Director Name&quot;)[&quot;Rating&quot;].mean())

    # ------------------ OPTION 11 ------------------
    elif choice == &quot;11&quot;:
        m1 = input(&quot;Enter first movie: &quot;)
        m2 = input(&quot;Enter second movie: &quot;)
        r1 = df.loc[df[&quot;Title&quot;].str.lower() == m1.lower(), &quot;Rating&quot;].values
        r2 = df.loc[df[&quot;Title&quot;].str.lower() == m2.lower(), &quot;Rating&quot;].values
        if r1.size and r2.size:
            print(f&quot;{m1}: {r1[0]} | {m2}: {r2[0]}&quot;)
        else:
            print(&quot;❌ One or both movies not found.&quot;)

    # ------------------ OPTION 12 ------------------
    elif choice == &quot;12&quot;:
        genre = input(&quot;Enter genre: &quot;)
        g_df = df[df[&quot;Genre&quot;].str.lower() == genre.lower()]
        if not g_df.empty:
            print(&quot;\nHighest rated movie in this genre:\n&quot;)
            print(g_df.loc[g_df[&quot;Rating&quot;].idxmax()])
        else:
            print(&quot;❌ No movies found in this genre.&quot;)

    # ------------------ OPTION 13 ------------------
    elif choice == &quot;13&quot;:
        genre = input(&quot;Enter genre: &quot;)
        g_df = df[df[&quot;Genre&quot;].str.lower() == genre.lower()]
        if not g_df.empty:
            print(&quot;\nLowest rated movie in this genre:\n&quot;)
            print(g_df.loc[g_df[&quot;Rating&quot;].idxmin()])
        else:
            print(&quot;❌ No movies found in this genre.&quot;)

    # ------------------ OPTION 14 ------------------
    elif choice == &quot;14&quot;:
        df_sorted = df.sort_values(by=&quot;Rating&quot;, ascending=False)
        plt.figure(figsize=(10, 6))
        plt.bar(df_sorted[&quot;Title&quot;], df_sorted[&quot;Rating&quot;], color='skyblue')
        plt.xticks(rotation=90)
        plt.xlabel(&quot;Movie Title&quot;)
        plt.ylabel(&quot;Rating&quot;)
        plt.title(&quot;Movie Ratings (Bar Chart)&quot;)
        plt.tight_layout()
        plt.show()

    # ------------------ OPTION 15 ------------------
    elif choice == &quot;15&quot;:
        plt.figure(figsize=(8, 6))
        plt.hist(df[&quot;Rating&quot;], bins=10, color='lightgreen', edgecolor='black')
        plt.xlabel(&quot;Rating&quot;)
        plt.ylabel(&quot;Frequency&quot;)
        plt.title(&quot;Distribution of Movie Ratings&quot;)
        plt.grid(axis='y')
        plt.show()

    # ------------------ OPTION 16 ------------------
    elif choice == &quot;16&quot;:
        df_sorted = df.sort_values(by=&quot;Release Date&quot;)
        plt.figure(figsize=(10, 6))
        plt.plot(df_sorted[&quot;Release Date&quot;], df_sorted[&quot;Rating&quot;], marker='o', color='orange')
        plt.xticks(rotation=45)
        plt.xlabel(&quot;Release Date&quot;)
        plt.ylabel(&quot;Rating&quot;)
        plt.title(&quot;Ratings Over Time (Line Chart)&quot;)
        plt.tight_layout()
        plt.show()

    # ------------------ OPTION 17 ------------------
    elif choice == &quot;17&quot;:
        print(f&quot;\nTotal number of movies in database: {len(df)}&quot;)

    # ------------------ OPTION 18 ------------------
    elif choice == &quot;18&quot;:
        n = int(input(&quot;Enter how many top movies to display: &quot;))
        print(df.sort_values(by=&quot;Rating&quot;, ascending=False).head(n))

    # ------------------ OPTION 19 ------------------
    elif choice == &quot;19&quot;:
        df.to_csv(&quot;updated_movies__new.csv&quot;, index=False)
        print(&quot;✅ Updated movie list saved to 'updated_movies.csv'!&quot;)

    # ------------------ OPTION 20 ------------------
    elif choice == &quot;20&quot;:
        print(&quot;\nThank you for using the Movie Data Analysis Program! 🎬&quot;)
        break

    else:
        print(&quot;❌ Invalid choice. Please try again.&quot;)
# ------------------------------------------------------------
# š¬ MOVIE DATA ANALYSIS PROGRAM
# Class 12 Computer Science Project
# ------------------------------------------------------------
# Made by: Bhavika Kokane & Madina Sidi
# School: Sundaram Central School
# Subject: Informatics Practises
# Year: 2025-26
# ------------------------------------------------------------
print("======================================================")
print(" š MOVIE DATA ANALYSIS PROGRAM š")
print("======================================================")
print("Loading movie data from CSV file...\n")
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("C:\mydata.csv")
print(df)
# Show a sample of the data
print("Sample movie records:")
print(df.head())
print("\nTotal movies loaded:", len(df))
print("======================================================\n")
plt.show()
# histogram
plt.figure(figsize=(8,5))
plt.hist(df['Rating'], bins=10, color='purple', edgecolor='black')
plt.title('Histogram')
plt.xlabel('Numeric_column')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
# -------------------------------
# STEP 2: Menu Loop
# -------------------------------
while True:
print("\n⨠MOVIE DATA MENU āØ")
print("-------------------------------------------------------------------")
print("| 1. Sort movies by rating |")
print("| 2. Filter movies by minimum rating |")
print("| 3. Add a new movie |")
print("| 4. Delete a movie |")
print("| 5. Update a movie's rating |")
print("| 6. Search for a movie |")
print("| 7. Display movies by genre |")
print("| 8. Display movies by director |")
print("| 9. Average rating by genre |")
print("| 10. Average rating by director |")
print("| 11. Compare ratings of two movies |")
print("| 12. Highest rated movie in a genre |")
print("| 13. Lowest rated movie in a genre |")
print("| 14. Bar Chart of Ratings |")
print("| 15. Histogram of Ratings |")
print("| 16. Line Chart of Ratings over Release Date |")
print("| 17. Show total number of movies |")
print("| 18. Show top N movies by rating |")
print("| 19. Save updated movie list to CSV |")
print("| 20. Exit |")
print("-------------------------------------------------------------------")
choice = input("Enter your choice (1-21): ")
# ------------------ OPTION 1 ------------------
if choice == "1":
print("\nMovies sorted by rating (highest first):\n")
print(df.sort_values(by="Rating", ascending=False))
# ------------------ OPTION 2 ------------------
elif choice == "2":
min_rating = float(input("Enter minimum rating: "))
filtered_df = df[df["Rating"] >= min_rating]
print(f"\nMovies with rating >= {min_rating}:\n")
print(filtered_df)
# ------------------ OPTION 3 ------------------
elif choice == "3":
title = input("Enter title: ")
genre = input("Enter genre: ")
year = int(input("Enter year: "))
rating = float(input("Enter rating: "))
revenue = float(input("Enter revenue (Billion $): "))
country = input("Enter country: ")
actor_name = input("Enter actor name: ")
actress_name = input("Enter actress name: ")
actor_likes = int(input("Enter actor's social media likes (M): "))
runtime = int(input("Enter runtime (min): "))
gross = int(input("Enter gross ($M): "))
release_date = input("Enter release date: ")
genre_count = int(input("Enter genre count: "))
avg_genre_rating = float(input("Enter average genre rating: "))
director_name = input("Enter director name: ")
year_trend_score = int(input("Enter year trend score: "))
famous_song = input("Enter famous song: ")
# Append to DataFrame (properly indented)
df.loc[len(df)] = [
title, genre, year, rating, revenue, country,
actor_name, actress_name, actor_likes, runtime,
gross, release_date, genre_count, avg_genre_rating,
director_name, year_trend_score, famous_song
]
print("ā
Movie added successfully!")
# ------------------ OPTION 4 ------------------
elif choice == "4":
title = input("Enter exact movie title to delete: ")
if title in df["Title"].values:
df = df[df["Title"] != title]
print("ā
Movie deleted successfully!")
else:
print("ā Movie not found.")
# ------------------ OPTION 5 ------------------
elif choice == "5":
title = input("Enter exact movie title to update: ")
if title in df["Title"].values:
new_rating = float(input("Enter new rating: "))
df.loc[df["Title"] == title, "Rating"] = new_rating
print("ā
Rating updated successfully!")
else:
print("ā Movie not found.")
# ------------------ OPTION 6 ------------------
elif choice == "6":
title = input("Enter movie title to search: ")
result = df[df["Title"].str.lower() == title.lower()]
if not result.empty:
print(result)
else:
print("ā Movie not found.")
# ------------------ OPTION 7 ------------------
elif choice == "7":
genre = input("Enter genre: ")
result = df[df["Genre"].str.lower() == genre.lower()]
if not result.empty:
print(result)
else:
print("ā No movies found for this genre.")
# ------------------ OPTION 8 ------------------
elif choice == "8":
director = input("Enter director name: ")
result = df[df["Director Name"].str.lower() == director.lower()]
if not result.empty:
print(result)
else:
print("ā No movies found for this director.")
# ------------------ OPTION 9 ------------------
elif choice == "9":
print("\nAverage rating by genre:")
print(df.groupby("Genre")["Rating"].mean())
# ------------------ OPTION 10 ------------------
elif choice == "10":
print("\nAverage rating by director:")
print(df.groupby("Director Name")["Rating"].mean())
# ------------------ OPTION 11 ------------------
elif choice == "11":
m1 = input("Enter first movie: ")
m2 = input("Enter second movie: ")
r1 = df.loc[df["Title"].str.lower() == m1.lower(), "Rating"].values
r2 = df.loc[df["Title"].str.lower() == m2.lower(), "Rating"].values
if r1.size and r2.size:
print(f"{m1}: {r1[0]} | {m2}: {r2[0]}")
else:
print("ā One or both movies not found.")
# ------------------ OPTION 12 ------------------
elif choice == "12":
genre = input("Enter genre: ")
g_df = df[df["Genre"].str.lower() == genre.lower()]
if not g_df.empty:
print("\nHighest rated movie in this genre:\n")
print(g_df.loc[g_df["Rating"].idxmax()])
else:
print("ā No movies found in this genre.")
# ------------------ OPTION 13 ------------------
elif choice == "13":
genre = input("Enter genre: ")
g_df = df[df["Genre"].str.lower() == genre.lower()]
if not g_df.empty:
print("\nLowest rated movie in this genre:\n")
print(g_df.loc[g_df["Rating"].idxmin()])
else:
print("ā No movies found in this genre.")
# ------------------ OPTION 14 ------------------
elif choice == "14":
df_sorted = df.sort_values(by="Rating", ascending=False)
plt.figure(figsize=(10, 6))
plt.bar(df_sorted["Title"], df_sorted["Rating"], color='skyblue')
plt.xticks(rotation=90)
plt.xlabel("Movie Title")
plt.ylabel("Rating")
plt.title("Movie Ratings (Bar Chart)")
plt.tight_layout()
plt.show()
# ------------------ OPTION 15 ------------------
elif choice == "15":
plt.figure(figsize=(8, 6))
plt.hist(df["Rating"], bins=10, color='lightgreen', edgecolor='black')
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.title("Distribution of Movie Ratings")
plt.grid(axis='y')
plt.show()
# ------------------ OPTION 16 ------------------
elif choice == "16":
df_sorted = df.sort_values(by="Release Date")
plt.figure(figsize=(10, 6))
plt.plot(df_sorted["Release Date"], df_sorted["Rating"], marker='o', color='orange')
plt.xticks(rotation=45)
plt.xlabel("Release Date")
plt.ylabel("Rating")
plt.title("Ratings Over Time (Line Chart)")
plt.tight_layout()
plt.show()
# ------------------ OPTION 17 ------------------
elif choice == "17":
print(f"\nTotal number of movies in database: {len(df)}")
# ------------------ OPTION 18 ------------------
elif choice == "18":
n = int(input("Enter how many top movies to display: "))
print(df.sort_values(by="Rating", ascending=False).head(n))
# ------------------ OPTION 19 ------------------
elif choice == "19":
df.to_csv("updated_movies__new.csv", index=False)
print("ā
Updated movie list saved to 'updated_movies.csv'!")
# ------------------ OPTION 20 ------------------
elif choice == "20":
print("\nThank you for using the Movie Data Analysis Program! š¬")
break
else:
print("ā Invalid choice. Please try again.")