Temperature Analysis and Visualization
Temperature Analysis and Visualization
Preparation
First we need to import all the modules we need
import pandas as pd
import sqlite3
import numpy as np
from plotly import express as px
from sklearn.linear_model import LinearRegression
import calendar
import plotly.graph_objects as go
from plotly.io import write_html
Data Import
Import the temps, countries and stations as data frames
temps = pd.read_csv("temps_stacked.csv")
temps.head()
| ID | Year | Month | Temp | |
|---|---|---|---|---|
| 0 | ACW00011604 | 1961 | 1 | -0.89 |
| 1 | ACW00011604 | 1961 | 2 | 2.36 |
| 2 | ACW00011604 | 1961 | 3 | 4.72 |
| 3 | ACW00011604 | 1961 | 4 | 7.73 |
| 4 | ACW00011604 | 1961 | 5 | 11.28 |
countries = pd.read_csv('countries.csv')
countries.head()
| FIPS 10-4 | ISO 3166 | Name | |
|---|---|---|---|
| 0 | AF | AF | Afghanistan |
| 1 | AX | - | Akrotiri |
| 2 | AL | AL | Albania |
| 3 | AG | DZ | Algeria |
| 4 | AQ | AS | American Samoa |
countries = countries.rename(columns= {"FIPS 10-4": "FIPS_10-4"})
countries.head()
| FIPS_10-4 | ISO 3166 | Name | |
|---|---|---|---|
| 0 | AF | AF | Afghanistan |
| 1 | AX | - | Akrotiri |
| 2 | AL | AL | Albania |
| 3 | AG | DZ | Algeria |
| 4 | AQ | AS | American Samoa |
stations = pd.read_csv('station-metadata.csv')
stations.head()
| ID | LATITUDE | LONGITUDE | STNELEV | NAME | |
|---|---|---|---|---|---|
| 0 | ACW00011604 | 57.7667 | 11.8667 | 18.0 | SAVE |
| 1 | AE000041196 | 25.3330 | 55.5170 | 34.0 | SHARJAH_INTER_AIRP |
| 2 | AEM00041184 | 25.6170 | 55.9330 | 31.0 | RAS_AL_KHAIMAH_INTE |
| 3 | AEM00041194 | 25.2550 | 55.3640 | 10.4 | DUBAI_INTL |
| 4 | AEM00041216 | 24.4300 | 54.4700 | 3.0 | ABU_DHABI_BATEEN_AIR |
Create a Database
Create a database with the three data frames.
conn = sqlite3.connect("temps.db")
temps.to_sql("temperatures", conn, if_exists="replace", index=False)
countries.to_sql("countries", conn, if_exists="replace", index=False)
stations.to_sql("stations", conn, if_exists="replace", index=False)
# always close your connection
conn.close()
C:\Users\tfq21\anaconda3\lib\site-packages\pandas\core\generic.py:2872: UserWarning:
The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.
The Query Function
Define a query function of sqlite to get the data we need.
def query_climate_database(country, year_begin, year_end, month):
#Input: three variables
#Country: the country we want to investigate
#year_begin,year_end: the year period starting at year_begin, ending at year_end
#month: the month that we want to investigate
#Output: the data frame with all the desired data
conn = sqlite3.connect("temps.db")
cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
FROM temperatures T\
LEFT JOIN stations S ON T.id=S.id\
LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
WHERE C.Name = '{country}' AND T.year >= {year_begin} AND T.year<={year_end} AND T.month ={month}"""
df = pd.read_sql(cmd, conn)
conn.close()
return df
Get the data of Inida from 1980 to 2020 on Janurary
df = query_climate_database("India",1980,2020,1)
df.head()
| NAME | LATITUDE | LONGITUDE | Name | Year | Month | Temp | |
|---|---|---|---|---|---|---|---|
| 0 | PBO_ANANTAPUR | 14.583 | 77.633 | India | 1980 | 1 | 23.48 |
| 1 | PBO_ANANTAPUR | 14.583 | 77.633 | India | 1981 | 1 | 24.57 |
| 2 | PBO_ANANTAPUR | 14.583 | 77.633 | India | 1982 | 1 | 24.19 |
| 3 | PBO_ANANTAPUR | 14.583 | 77.633 | India | 1983 | 1 | 23.51 |
| 4 | PBO_ANANTAPUR | 14.583 | 77.633 | India | 1984 | 1 | 24.81 |
Geographic Scatter Function
First we need a helper function coef to compute the slope
def coef(data_group):
#Input:a data frame
#output: the slope of the temperature
X = data_group[["Year"]]
y = data_group["Temp"]
LR = LinearRegression()
LR.fit(X, y)
slope = LR.coef_[0]
return slope
Define the plot function.
def temperature_coefficient_plot(country, year_begin, year_end, month, min_obs, **kwargs):
#Input: six variables
#Country: the country we want to investigate
#year_begin,year_end: the year period starting at year_begin, ending at year_end
#month: the month that we want to investigate
#Output: the plot we generate
df = query_climate_database(country, year_begin, year_end, month)
df['len']=df.groupby(["NAME"])["Temp"].transform(len)
df=df[df['len']>= min_obs]
coefs = df.groupby(["NAME"]).apply(coef)
coefs = coefs.reset_index()
coefs[0]=coefs[0].round(4)
df =pd.merge(df,coefs,how='right')
df.rename(columns={0:'Estimated Yearly Increase (°C)'},inplace = True)
fig = px.scatter_mapbox(df, # data for the points you want to plot
lat = "LATITUDE", # column name for latitude informataion
lon = "LONGITUDE", # column name for longitude information
hover_name = "NAME", # what's the bold text that appears when you hover over
zoom = 1, # how much you want to zoom into the map
height = 500, # control aspect ratio
mapbox_style="carto-positron", # map style
opacity=0.2,# opacity for each data point
color = "Estimated Yearly Increase (°C)",# represent temp increase using color
color_continuous_midpoint = 0,
title = f"Estimated Yearly Increase in temperature in {calendar.month_name[month]} for stations in {country}, years {year_begin}-{year_end}"
)
return fig
Draw the graph of India from 1980 to 2020 Janurary.
color_map = px.colors.diverging.RdGy_r # choose a colormap
fig_india = temperature_coefficient_plot("India", 1980, 2020, 1,
min_obs = 10,
zoom = 2,
mapbox_style="carto-positron",
color_continuous_scale=color_map)
write_html(fig_india, "fig_india.html")
Draw the graph of China from 1985 to 2015, Janurary.
fig_China = temperature_coefficient_plot("China", 1985, 2015, 1,
min_obs = 10,
zoom = 2,
mapbox_style="carto-positron")
write_html(fig_China, "fig_China.html")
The Query Function 2
Extract the temperature data, given a longtitude range and time period
def query_climate_database2(year_begin,year_end, month, long_begin, long_end):
#Input: five variables
#year_begin,year_end: the year period starting at year_begin, ending at year_end
#month: the month that we want to investigate
#long_begin, long_end: the longitude range starting at long_begin, ending at long_end
#Output: the data frame with all the desired data
conn = sqlite3.connect("temps.db")
cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
FROM temperatures T\
LEFT JOIN stations S ON T.id=S.id\
LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
WHERE S.longitude >={long_begin} AND S.longitude <={long_end} \
AND T.year >= {year_begin} AND T.year <= {year_end} AND T.month ={month}"""
df = pd.read_sql(cmd, conn)
conn.close()
return df
Extract the temperature for longitude 30 to 32, from 2018 to 2020.
df2 = query_climate_database2(2018,2020, 1, 30, 32)
df2.head()
| NAME | LATITUDE | LONGITUDE | Name | Year | Month | Temp | |
|---|---|---|---|---|---|---|---|
| 0 | VITEBSK | 55.167 | 30.217 | Belarus | 2018 | 1 | -3.30 |
| 1 | VITEBSK | 55.167 | 30.217 | Belarus | 2019 | 1 | -6.30 |
| 2 | VITEBSK | 55.167 | 30.217 | Belarus | 2020 | 1 | 1.05 |
| 3 | ORSHA | 54.500 | 30.417 | Belarus | 2018 | 1 | -3.58 |
| 4 | ORSHA | 54.500 | 30.417 | Belarus | 2019 | 1 | -6.01 |
Scatter plot function
We want to analyze the relation between temperature and latitude
def scatter_plot(year_begin,year_end, month, long_begin, long_end, **kwags):
#Input: five variables
#year_begin,year_end: the year period starting at year_begin, ending at year_end
#month: the month that we want to investigate
#long_begin, long_end: the longitude range starting at long_begin, ending at long_end
#Kwags: optional argument for graph
#Output: the scatter plot
df = query_climate_database2(year_begin,year_end, month, long_begin, long_end)
fig = px.scatter(data_frame = df,
x = "LATITUDE",
y = "Temp",
hover_name = "NAME",
hover_data =["Name","LONGITUDE"],
color ="Name",
facet_row = "Year"
)
return fig
Visualize the realtion between temperatures and latitude for longtitude 30 to 32, from 2018 to 2020
fig_scatter = scatter_plot(2018,2020, 1, 30, 32)
write_html(fig_scatter, "fig_scatter.html")
We can see from the plot that as the latitude increases, the temperature drops.
The Query Function 3
Extract the temperature data, given a location with latitude range and longitude, for a year
def query_climate_database3(year,long_begin, long_end, lat_begin,lat_end):
#Input: five variables
#year: the year to be analyzed
#long_begin, long_end: the longitude range starting at long_begin, ending at long_end
#lat_begin, lat_end: the latitude range starting at lat_begin, ending at lat_end
#Output: the data frame with all the desired data
conn = sqlite3.connect("temps.db")
cmd = f"""SELECT S.name, S.latitude, S.longitude, C.name, T.year, T.month, T.temp\
FROM temperatures T\
LEFT JOIN stations S ON T.id=S.id\
LEFT JOIN countries C ON C."FIPS_10-4" =substring(T.id,1,2)\
WHERE T.year={year} AND (T.month=1 or T.month=7) AND S.longitude >={long_begin} \
AND S.longitude <={long_end} AND S.latitude >={lat_begin} AND S.latitude <={lat_end}
"""
df = pd.read_sql(cmd, conn)
conn.close()
return df
Density_heat plot function
Visualize the temperature difference between Janurary and July
Define a plot function with the given data
def heat_density_plot(year,long_begin, long_end, lat_begin,lat_end,**kwags):
#Input: five variables
#year: the year to be analyzed
#long_begin, long_end: the longitude range starting at long_begin, ending at long_end
#lat_begin, lat_end: the latitude range starting at lat_begin, ending at lat_end
#Kwags: optional argument for graph
#Output: the heat density plot
df = query_climate_database3(year,long_begin, long_end, lat_begin,lat_end)
fig = px.density_heatmap(df,
x = "LATITUDE",
y = "LONGITUDE",
z = "Temp",
facet_row = "Month",
hover_name = "NAME",
nbinsx = 25,
nbinsy = 25)
return fig
Draw the density graph for the location with latitude from 30 to 50, longitude from 80 to 110 of 2019
fig_heat = heat_density_plot(2019,80,110,30,50)
write_html(fig_heat, "fig_heat.html")
