Bike Share Toronto#

Dataset visualization (Python)#

# python
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

%cd -q "../dataset/data/weather/"
years = ["2017", "2018", "2019", "2020", "2021"]

for year in years:
    files_weather = os.listdir()

    files_weather_selected = []
    for file in files_weather:
        if year in file:
            files_weather_selected.append(file)
    
    df_weather = pd.DataFrame()
    for file in files_weather_selected:
        df_weather = df_weather.append(pd.read_csv(file))

    plt.figure()
    palette = sns.diverging_palette(250, 15, s=100, l=50, n=9, center="light")
    ax = sns.boxplot(x="Month", y="Temp (°C)", data=df_weather, palette=palette)
    ax.grid(linewidth= 0.2)
    ax.set_axisbelow(True)
    ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun", \
                        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
    plt.title(f"Monthly Weather in Toronto ({year})")
    plt.savefig(f"../../../project/weather_{year}.svg", dpi=1200, \
                format="svg", transparent=False, facecolor="white")
Saved figures from Python

Dataset visualization (Julia)#

# Julia
home_dir = homedir()

using Pkg; Pkg.activate(home_dir * ".julia/environments/DataFrames")
import DataFrames as DF
using CSV, Gadfly

years = ["2017", "2018", "2019", "2020", "2021"]

folder_weather = home_dir * "/github/BikeShareToronto/dataset/data/weather/"
files_weather = readdir(folder_weather)

for year in years
    files_weather_selected = []
    for file in files_weather
        if occursin(year, file)
            push!(files_weather_selected, file)
        end
    end

    df_all = DF.DataFrame()
    for file in files_weather_selected
        append!(df_all, DF.DataFrame(CSV.File(folder_weather * file));
                cols = :union)
    end

    DF.describe(df_all, :all)
    df_weather = df_all[:, ["Temp (°C)", "Month"]]
    DF.rename!(df_weather, "Temp (°C)" => :temp, "Month" => :month)
    DF.dropmissing!(df_weather)

    df_weather[!,:month] = string.(df_weather[!,:month])
    xticks = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 
              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

    for i in 1:12
        replace!(df_weather.month, string(i) => xticks[i])
    end

    set_default_plot_size(16cm, 9cm)

    p = plot(df_weather,
        x = :month,
        y = :temp,
        color = :month,
        Geom.boxplot,
        Guide.xlabel("Months"),
        Guide.ylabel("Temperature (°C)"),
        Guide.title("Monthly Weather in Toronto ($year)"),
        Guide.xticks(ticks=[0:13;]),
        Guide.colorkey(title = "Months"),
        Scale.y_continuous(format = :plain),
        Theme(background_color = "white")
    )

    img = SVG(home_dir * "/github/BikeShareToronto/project/weather_$(year)_julia.svg",
             16cm, 9cm)
    draw(img, p)
end
Saved figures from Julia