#!/usr/bin/env python
# coding: utf-8

# ### Telemetry Analysis

# In[140]:


import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client

get_ipython().run_line_magic('pylab', 'inline')


# How many parallel workers do we have?

# In[141]:


sc.defaultParallelism


# Fetch all the submissions for the nightly build made on 2015-07-15 to 2015-07-25:

# In[149]:


pings = get_pings(sc, app="Firefox", channel="nightly", build_id=("20150715000000", "20150725999999"), fraction=1.0)


# Extract the device pixels per virtual pixel setting from each ping (one per user) and filter out the unspecified values:

# In[150]:


subset = get_pings_properties(pings, ["clientID", "environment/settings/userPrefs/layout.css.devPixelsPerPx"])
subset = get_one_ping_per_client(subset)
valid_subset = subset.filter(lambda x: x['environment/settings/userPrefs/layout.css.devPixelsPerPx'] != None)


# What percentage of users have this setting enabled?

# In[153]:


valid_subset.first()


# In[152]:


valid_count, total_count = valid_subset.count(), subset.count()
"{}% ({} of {})".format(100.0 * valid_count / total_count, valid_count, total_count)


# Caching is fundamental as it allows for an iterative, real-time development workflow:

# In[154]:


cached = valid_subset.cache()


# Aggregate the settings by their value:

# In[159]:


settings = cached.map(lambda x: (x['environment/settings/userPrefs/layout.css.devPixelsPerPx'], 1)).reduceByKey(lambda a, b: a + b).collectAsMap()
settings


# And finally plot the data:

# In[161]:


plt.figure(figsize=(15, 7))
pairs = sorted(settings.items(), key=lambda x: x[0])
width = 0.8
plt.bar(range(len(pairs)), map(lambda x: x[1], pairs), width=width)
ax = plt.gca()
ax.set_xticks(np.arange(len(pairs)) + width/2)
ax.set_xticklabels(map(lambda x: x[0], pairs), rotation=90)
plt.xlabel("layout.css.devPixelsPerPx Value")
plt.ylabel("Number of client IDs")
plt.show()


# Some of these values are duplicated. To group settings by their numerical values, we parse the labels:

# In[164]:


plt.figure(figsize=(15, 7))
from collections import defaultdict
numerical_settings = defaultdict(int)
for setting, count in settings.items():
    try: numerical_settings[float(setting)] += count
    except ValueError: pass
pairs = sorted(numerical_settings.items(), key=lambda x: x[0])
width = 0.8
plt.bar(range(len(pairs)), map(lambda x: x[1], pairs), width=width)
ax = plt.gca()
ax.set_xticks(np.arange(len(pairs)) + width/2)
ax.set_xticklabels(map(lambda x: x[0], pairs), rotation=90)
plt.xlabel("layout.css.devPixelsPerPx Value")
plt.ylabel("Number of client IDs")
plt.show()


# In[ ]: