fork download
  1. import pandas as pd
  2.  
  3. # Create a DataFrame from the sample data
  4. data = {
  5. 'Duration': [30, 45, 30, 30, 45],
  6. 'Date': ['2024-11-01', '2024-11-01', '2024-11-02', '2024-11-01', '2024-11-03'],
  7. 'Pulse': [75, 80, 75, 75, 80],
  8. 'MaxPulse': [150, 155, 150, 150, 155],
  9. 'Calories': [250, 300, 250, 250, 310]
  10. }
  11.  
  12. df = pd.DataFrame(data)
  13.  
  14. # Check for duplicate rows in the entire DataFrame
  15. duplicates = df[df.duplicated()]
  16.  
  17. # Display duplicate entries
  18. print("Duplicate entries in the dataset:")
  19. print(duplicates)
  20.  
  21. # If you want to find duplicates based on specific columns (e.g., 'Date', 'Pulse', 'MaxPulse'):
  22. duplicates_specific_columns = df[df.duplicated(subset=['Date', 'Pulse', 'MaxPulse', 'Calories'])]
  23.  
  24. print("\nDuplicates based on specific columns:")
  25. print(duplicates_specific_columns)
  26.  
  27. # Optionally, show all duplicates (including the first occurrence)
  28. all_duplicates = df[df.duplicated(keep=False)]
  29.  
  30. print("\nAll duplicate entries (including first occurrence):")
  31. print(all_duplicates)
  32.  
Success #stdin #stdout 0.46s 60908KB
stdin
mushroom
stdout
Duplicate entries in the dataset:
   Calories        Date  Duration  MaxPulse  Pulse
3       250  2024-11-01        30       150     75

Duplicates based on specific columns:
   Calories        Date  Duration  MaxPulse  Pulse
3       250  2024-11-01        30       150     75

All duplicate entries (including first occurrence):
   Calories        Date  Duration  MaxPulse  Pulse
0       250  2024-11-01        30       150     75
3       250  2024-11-01        30       150     75