Data Collection
HIT_PATH = '../../../../src/'
institution_id = 7
lang = 'en'
import os,sys, folium
sys.path.insert(0, os.path.normpath(os.path.join(os.path.abspath(''), HIT_PATH)))
import hedera_types as hedera
import odk_interface as odk
mfi = hedera.mfi(institution_id,setPathBook=True)
data = mfi.read_survey(mfi.odk_data_name)
mfi.HH = odk.households(data)
import matplotlib.pyplot as plt
select = mfi.HH['GPS_Latitude']!=0
HH_with_GPS = mfi.HH[select]
# change plot layout
plt.rcParams["font.family"] = "TW Cen MT"
plt.rcParams.update({'font.size': 20})
#Define initial geolocation
lat_center = HH_with_GPS['GPS_Latitude'].mean()
lon_center = HH_with_GPS['GPS_Longitude'].mean()
max_var = max(HH_with_GPS['GPS_Latitude'].var(),HH_with_GPS['GPS_Longitude'].var())
zoom_start = 8
if max_var>0.1:
zoom_start -= 1
if max_var>1:
zoom_start -= 1
initial_location = [lat_center, lon_center]
# create map
map_osm = folium.Map(initial_location, zoom_start=zoom_start)
colors = {0: hedera.tier_color(0), 1 : hedera.tier_color(1), 2 : hedera.tier_color(2),
3 : hedera.tier_color(3), 4 : hedera.tier_color(4), 5: hedera.tier_color(5)}
HH_with_GPS.apply(lambda row:folium.CircleMarker(location=[row["GPS_Latitude"], row["GPS_Longitude"]],
radius=10,fill_color="#FF5733",popup=(row["GPS_Latitude"],row["GPS_Longitude"],row["locality"])).add_to(map_osm), axis=1)
map_osm
import numpy as np
S = odk.get_survey_duration(data)
dates = np.unique(np.array(mfi.HH['date']))
ind = np.arange(len(dates))
dates_plot = []
dates_labels = []
mean_e = []
mean_c = []
mean_tot = []
for d in dates:
select = mfi.HH['date']== d
dates_plot.append( sum(select) )
dates_labels.append(d)
# get surveys data on a diven date
surveys = S[select]
selectE = surveys['electricity']>0
surveysE = surveys[selectE]
mean_e.append(surveysE['electricity'].mean())
selectC = surveys['cooking']>0
surveysC = surveys[selectC]
mean_c.append(surveysC['cooking'].mean())
selectT = surveys['total']>0
surveysT = surveys[selectT]
mean_tot.append(surveys['total'].mean())
import matplotlib.pyplot as plt
# change plot layout
plt.rcParams["font.family"] = "TW Cen MT"
plt.rcParams.update({'font.size': 20})
# survey per date
fig, ax = plt.subplots(figsize=(10,8))
plt.bar(ind, dates_plot, width=0.95,edgecolor='white')
plt.xticks(ind, dates, rotation=90)
ax.yaxis.grid(color='grey', linestyle='--', linewidth=0.5)
plt.show()
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "TW Cen MT"
plt.rcParams.update({'font.size': 14})
# this is needed if the surveys do not cover all states/offices
empty = []
for o in mfi.offices:
select = mfi.HH['locality']==o
if sum(select)==0:
empty.append(o)
for o in empty:
mfi.offices.remove(o)
mfi.plot_collection_barh()
import matplotlib.pyplot as plt
# change plot layout
plt.rcParams["font.family"] = "TW Cen MT"
plt.rcParams.update({'font.size': 20})
# survey duration
fig, ax = plt.subplots(figsize=(10,8))
plt.bar(ind, mean_e, width=0.95,edgecolor='white',color='#5DADE2',label='Electricity')
plt.bar(ind, mean_c, bottom=mean_e,width=0.95,edgecolor='white',color='#DC7633',label='Cooking')
plt.xticks(ind, dates, rotation=90)
plt.legend(framealpha=1,frameon=False,bbox_to_anchor=(1.25,1.0),
loc='upper center').set_draggable(True)
ax.yaxis.grid(color='grey', linestyle='--', linewidth=0.85) # vertical lines
plt.show()
Duration of the entire interview.
Note: Some interviews only covered the household roster and are therefore much shorter.
import matplotlib.pyplot as plt
# change plot layout
plt.rcParams["font.family"] = "TW Cen MT"
plt.rcParams.update({'font.size': 20})
# survey duration
fig, ax = plt.subplots(figsize=(10,8))
plt.bar(ind, mean_tot, width=0.95,edgecolor='white',color='blue',label='Total')
plt.xticks(ind, dates, rotation=90)
plt.legend(framealpha=1,frameon=False,bbox_to_anchor=(1.25,1.0),
loc='upper center').set_draggable(True)
ax.yaxis.grid(color='grey', linestyle='--', linewidth=0.85) # vertical lines
plt.show()
#from plotly.offline import iplot
#from plotly.offline import init_notebook_mode, plot
#from IPython.core.display import display, HTML
#import plotly as py
#import plotly.tools as tls
##print({'plotly version'+' '+py.__version__})
##py.offline.init_notebook_mode(connected=True)
#init_notebook_mode(connected=True)
##import cufflinks as cf
##cf.go_offline()
#df = data.groupby(['internal_version']).size().reset_index(name='count')
#fig = {
# "data": [{"type": "bar",
# "x": curps,
# "y": surveys}],
# "layout": {"title": {"text": "Encuestas por usuario"}}
#}
#
##df.plot(kind = 'bar', x ='internal_version', y ='count', filename = 'figure.html')
#plot(fig, filename = 'figure.html')
#display(HTML('figure.html'))