Temperature Analysis and Visualization

Temperature Analysis and Visualization

Preparation

First we need to import all the modules we need

import pandas as pd
import sqlite3
import numpy as np
from plotly import express as px
from sklearn.linear_model import LinearRegression
import calendar
import plotly.graph_objects as go
from plotly.io import write_html

Data Import

Import the temps, countries and stations as data frames

temps = pd.read_csv("temps_stacked.csv")
temps.head()
ID Year Month Temp
0 ACW00011604 1961 1 -0.89
1 ACW00011604 1961 2 2.36
2 ACW00011604 1961 3 4.72
3 ACW00011604 1961 4 7.73
4 ACW00011604 1961 5 11.28
countries = pd.read_csv('countries.csv')
countries.head()
FIPS 10-4 ISO 3166 Name
0 AF AF Afghanistan
1 AX - Akrotiri
2 AL AL Albania
3 AG DZ Algeria
4 AQ AS American Samoa
countries = countries.rename(columns= {"FIPS 10-4": "FIPS_10-4"})
countries.head()
FIPS_10-4 ISO 3166 Name
0 AF AF Afghanistan
1 AX - Akrotiri
2 AL AL Albania
3 AG DZ Algeria
4 AQ AS American Samoa
stations = pd.read_csv('station-metadata.csv')
stations.head()
ID LATITUDE LONGITUDE STNELEV NAME
0 ACW00011604 57.7667 11.8667 18.0 SAVE
1 AE000041196 25.3330 55.5170 34.0 SHARJAH_INTER_AIRP
2 AEM00041184 25.6170 55.9330 31.0 RAS_AL_KHAIMAH_INTE
3 AEM00041194 25.2550 55.3640 10.4 DUBAI_INTL
4 AEM00041216 24.4300 54.4700 3.0 ABU_DHABI_BATEEN_AIR

Create a Database

Create a database with the three data frames.

conn = sqlite3.connect("temps.db")
temps.to_sql("temperatures", conn, if_exists="replace", index=False)
countries.to_sql("countries", conn, if_exists="replace", index=False)
stations.to_sql("stations", conn, if_exists="replace", index=False)
# always close your connection
conn.close()
C:\Users\tfq21\anaconda3\lib\site-packages\pandas\core\generic.py:2872: UserWarning:

The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.

The Query Function

Define a query function of sqlite to get the data we need.

def query_climate_database(country, year_begin, year_end, month):
    #Input: three variables
    #Country: the country we want to investigate
    #year_begin,year_end: the year period starting at year_begin, ending at year_end
    #month: the month that we want to investigate
    #Output: the data frame with all the desired data
    conn = sqlite3.connect("temps.db")
    cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
              FROM temperatures T\
              LEFT JOIN stations S ON T.id=S.id\
              LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
              WHERE C.Name = '{country}' AND T.year >= {year_begin} AND T.year<={year_end} AND T.month ={month}"""
    
    df = pd.read_sql(cmd, conn)
    conn.close()
    return df    

Get the data of Inida from 1980 to 2020 on Janurary

df = query_climate_database("India",1980,2020,1)
df.head()
NAME LATITUDE LONGITUDE Name Year Month Temp
0 PBO_ANANTAPUR 14.583 77.633 India 1980 1 23.48
1 PBO_ANANTAPUR 14.583 77.633 India 1981 1 24.57
2 PBO_ANANTAPUR 14.583 77.633 India 1982 1 24.19
3 PBO_ANANTAPUR 14.583 77.633 India 1983 1 23.51
4 PBO_ANANTAPUR 14.583 77.633 India 1984 1 24.81

Geographic Scatter Function

First we need a helper function coef to compute the slope

def coef(data_group):
    #Input:a data frame
    #output: the slope of the temperature
    X = data_group[["Year"]]
    y = data_group["Temp"]
    LR = LinearRegression()
    LR.fit(X, y)
    slope = LR.coef_[0]
    return slope

Define the plot function.

def temperature_coefficient_plot(country, year_begin, year_end, month, min_obs, **kwargs):
    #Input: six variables
    #Country: the country we want to investigate
    #year_begin,year_end: the year period starting at year_begin, ending at year_end
    #month: the month that we want to investigate
    #Output: the plot we generate
    df = query_climate_database(country, year_begin, year_end, month)
    df['len']=df.groupby(["NAME"])["Temp"].transform(len)
    df=df[df['len']>= min_obs]
    
    coefs = df.groupby(["NAME"]).apply(coef)
    coefs = coefs.reset_index()
    coefs[0]=coefs[0].round(4)
    df =pd.merge(df,coefs,how='right')
    df.rename(columns={0:'Estimated Yearly Increase (°C)'},inplace = True)
    fig = px.scatter_mapbox(df, # data for the points you want to plot
                            lat = "LATITUDE", # column name for latitude informataion
                            lon = "LONGITUDE", # column name for longitude information
                            hover_name = "NAME", # what's the bold text that appears when you hover over
                            zoom = 1, # how much you want to zoom into the map
                            height = 500, # control aspect ratio
                            mapbox_style="carto-positron", # map style
                            opacity=0.2,# opacity for each data point
                            color = "Estimated Yearly Increase (°C)",# represent temp increase using color
                            color_continuous_midpoint = 0,
                            title = f"Estimated Yearly Increase in temperature in {calendar.month_name[month]} for stations in {country}, years {year_begin}-{year_end}"
                           )
    return fig

Draw the graph of India from 1980 to 2020 Janurary.

color_map = px.colors.diverging.RdGy_r # choose a colormap

fig_india = temperature_coefficient_plot("India", 1980, 2020, 1, 
                                   min_obs = 10,
                                   zoom = 2,
                                   mapbox_style="carto-positron",
                                   color_continuous_scale=color_map)

write_html(fig_india, "fig_india.html")

Draw the graph of China from 1985 to 2015, Janurary.

fig_China = temperature_coefficient_plot("China", 1985, 2015, 1, 
                                   min_obs = 10,
                                   zoom = 2,
                                   mapbox_style="carto-positron")
write_html(fig_China, "fig_China.html")

The Query Function 2

Extract the temperature data, given a longtitude range and time period

def query_climate_database2(year_begin,year_end, month, long_begin, long_end):
    #Input: five variables
    #year_begin,year_end: the year period starting at year_begin, ending at year_end
    #month: the month that we want to investigate
    #long_begin, long_end: the longitude range starting at long_begin, ending at long_end
    #Output: the data frame with all the desired data
    conn = sqlite3.connect("temps.db")
    cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
              FROM temperatures T\
              LEFT JOIN stations S ON T.id=S.id\
              LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
              WHERE S.longitude >={long_begin} AND S.longitude <={long_end} \
              AND T.year >= {year_begin} AND T.year <= {year_end} AND T.month ={month}"""
    
    df = pd.read_sql(cmd, conn)
    conn.close()
    return df

Extract the temperature for longitude 30 to 32, from 2018 to 2020.

df2 = query_climate_database2(2018,2020, 1, 30, 32)
df2.head()
NAME LATITUDE LONGITUDE Name Year Month Temp
0 VITEBSK 55.167 30.217 Belarus 2018 1 -3.30
1 VITEBSK 55.167 30.217 Belarus 2019 1 -6.30
2 VITEBSK 55.167 30.217 Belarus 2020 1 1.05
3 ORSHA 54.500 30.417 Belarus 2018 1 -3.58
4 ORSHA 54.500 30.417 Belarus 2019 1 -6.01

Scatter plot function

We want to analyze the relation between temperature and latitude

def scatter_plot(year_begin,year_end, month, long_begin, long_end, **kwags):
    #Input: five variables
    #year_begin,year_end: the year period starting at year_begin, ending at year_end
    #month: the month that we want to investigate
    #long_begin, long_end: the longitude range starting at long_begin, ending at long_end
    #Kwags: optional argument for graph
    #Output: the scatter plot
    df = query_climate_database2(year_begin,year_end, month, long_begin, long_end)


    fig = px.scatter(data_frame = df,
                      x = "LATITUDE",
                      y = "Temp",
                      hover_name = "NAME",
                      hover_data =["Name","LONGITUDE"],
                      color ="Name",
                      facet_row = "Year"
                      )
    return fig

Visualize the realtion between temperatures and latitude for longtitude 30 to 32, from 2018 to 2020

fig_scatter = scatter_plot(2018,2020, 1, 30, 32)
write_html(fig_scatter, "fig_scatter.html")

We can see from the plot that as the latitude increases, the temperature drops.

The Query Function 3

Extract the temperature data, given a location with latitude range and longitude, for a year

def query_climate_database3(year,long_begin, long_end, lat_begin,lat_end):
    #Input: five variables
    #year: the year to be analyzed
    #long_begin, long_end: the longitude range starting at long_begin, ending at long_end
    #lat_begin, lat_end: the latitude range starting at lat_begin, ending at lat_end
    #Output: the data frame with all the desired data
    conn = sqlite3.connect("temps.db")
    cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
              FROM temperatures T\
              LEFT JOIN stations S ON T.id=S.id\
              LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
              WHERE T.year={year} AND (T.month=1 or T.month=7) AND S.longitude >={long_begin} \
              AND S.longitude <={long_end} AND S.latitude >={lat_begin} AND S.latitude <={lat_end}
              """
    
    df = pd.read_sql(cmd, conn)
    conn.close()
    return df

Density_heat plot function

Visualize the temperature difference between Janurary and July

Define a plot function with the given data

def heat_density_plot(year,long_begin, long_end, lat_begin,lat_end,**kwags):
    #Input: five variables
    #year: the year to be analyzed
    #long_begin, long_end: the longitude range starting at long_begin, ending at long_end
    #lat_begin, lat_end: the latitude range starting at lat_begin, ending at lat_end
    #Kwags: optional argument for graph
    #Output: the heat density plot
    df = query_climate_database3(year,long_begin, long_end, lat_begin,lat_end)
    fig = px.density_heatmap(df, 
                             x = "LATITUDE",
                             y = "LONGITUDE",
                             z = "Temp",
                             facet_row = "Month",
                             hover_name = "NAME",
                             nbinsx = 25,
                             nbinsy = 25)
    return fig

Draw the density graph for the location with latitude from 30 to 50, longitude from 80 to 110 of 2019

fig_heat = heat_density_plot(2019,80,110,30,50)
write_html(fig_heat, "fig_heat.html")
Written on April 17, 2022