# Challenge 1 billion rows (c) 2024 Baltasar MIT License <baltasarq@gmail.com>
# Dataset: https://g...content-available-to-author-only...b.com/gunnarmorling/1brc/raw/main/data/weather_stations.csv


import pandas as pd
from datetime import datetime


url = "https://g...content-available-to-author-only...b.com/gunnarmorling/1brc/raw/main/data/weather_stations.csv"
t1 = datetime.now()

# Compile the data
df_temperatures_by_city = pd.read_csv(url,
                                sep=';',
                                names=("city", "temperature"),
                                dtype={"city": str, "temperature": float},
                                comment='#',
                                skip_blank_lines=True).groupby("city")

# Build a dictionary with the data
temperatures_by_city = {}
for city, df_group in df_temperatures_by_city:
    temperatures_by_city[city] = f"{df_group.temperature.min(): 3.1f}/" \
                                 f"{df_group.temperature.mean(): 3.1f}/" \
                                 f"{df_group.temperature.max(): 3.1f}"

# Show
cities = sorted(temperatures_by_city.keys())

print("{",
      str.join(", ",
            (f"{city}={temperatures_by_city[city]}" for city in cities)),
      "}",
      sep="")

t2 = datetime.now()

print("Paris:", temperatures_by_city["Paris"])
print("Elapsed time:", t2 - t1, "num cities:", len(temperatures_by_city))
