fork download
  1. # Challenge 1 billion rows (c) 2024 Baltasar MIT License <baltasarq@gmail.com>
  2. # Dataset: https://g...content-available-to-author-only...b.com/gunnarmorling/1brc/raw/main/data/weather_stations.csv
  3.  
  4.  
  5. import pandas as pd
  6. from datetime import datetime
  7.  
  8.  
  9. url = "https://g...content-available-to-author-only...b.com/gunnarmorling/1brc/raw/main/data/weather_stations.csv"
  10. t1 = datetime.now()
  11.  
  12. # Compile the data
  13. df_temperatures_by_city = pd.read_csv(url,
  14. sep=';',
  15. names=("city", "temperature"),
  16. dtype={"city": str, "temperature": float},
  17. comment='#',
  18. skip_blank_lines=True).groupby("city")
  19.  
  20. # Build a dictionary with the data
  21. temperatures_by_city = {}
  22. for city, df_group in df_temperatures_by_city:
  23. temperatures_by_city[city] = f"{df_group.temperature.min(): 3.1f}/" \
  24. f"{df_group.temperature.mean(): 3.1f}/" \
  25. f"{df_group.temperature.max(): 3.1f}"
  26.  
  27. # Show
  28. cities = sorted(temperatures_by_city.keys())
  29.  
  30. print("{",
  31. str.join(", ",
  32. (f"{city}={temperatures_by_city[city]}" for city in cities)),
  33. "}",
  34. sep="")
  35.  
  36. t2 = datetime.now()
  37.  
  38. print("Paris:", temperatures_by_city["Paris"])
  39. print("Elapsed time:", t2 - t1, "num cities:", len(temperatures_by_city))
  40.  
Runtime error #stdin #stdout #stderr 0.49s 68344KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "/usr/lib/python3.9/urllib/request.py", line 1346, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "/usr/lib/python3.9/http/client.py", line 1257, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/usr/lib/python3.9/http/client.py", line 1303, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/usr/lib/python3.9/http/client.py", line 1252, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/usr/lib/python3.9/http/client.py", line 1012, in _send_output
    self.send(msg)
  File "/usr/lib/python3.9/http/client.py", line 952, in send
    self.connect()
  File "/usr/lib/python3.9/http/client.py", line 1419, in connect
    super().connect()
  File "/usr/lib/python3.9/http/client.py", line 923, in connect
    self.sock = self._create_connection(
  File "/usr/lib/python3.9/socket.py", line 822, in create_connection
    for res in getaddrinfo(host, port, 0, SOCK_STREAM):
  File "/usr/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "./prog.py", line 13, in <module>
  File "/usr/local/lib/python3.9/dist-packages/pandas/util/_decorators.py", line 311, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/readers.py", line 586, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/readers.py", line 482, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/readers.py", line 811, in __init__
    self._engine = self._make_engine(self.engine)
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/readers.py", line 1040, in _make_engine
    return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/c_parser_wrapper.py", line 51, in __init__
    self._open_handles(src, kwds)
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/parsers/base_parser.py", line 222, in _open_handles
    self.handles = get_handle(
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/common.py", line 609, in get_handle
    ioargs = _get_filepath_or_buffer(
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/common.py", line 312, in _get_filepath_or_buffer
    with urlopen(req_info) as req:
  File "/usr/local/lib/python3.9/dist-packages/pandas/io/common.py", line 212, in urlopen
    return urllib.request.urlopen(*args, **kwargs)
  File "/usr/lib/python3.9/urllib/request.py", line 214, in urlopen
    return opener.open(url, data, timeout)
  File "/usr/lib/python3.9/urllib/request.py", line 517, in open
    response = self._open(req, data)
  File "/usr/lib/python3.9/urllib/request.py", line 534, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "/usr/lib/python3.9/urllib/request.py", line 494, in _call_chain
    result = func(*args)
  File "/usr/lib/python3.9/urllib/request.py", line 1389, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/usr/lib/python3.9/urllib/request.py", line 1349, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno -3] Temporary failure in name resolution>