# read my matomo api token
with open("matomo-token") as f:
token = f.read().strip()
url = "https://mybinder.org/matomo/index.php"
import requests
def api_request(method, **params):
"""Make a matomo API request"""
data={
"token_auth": token,
"format": "JSON",
"module": "API",
"method": method,
"idSite": "1",
}
data.update(params)
r = requests.post(url, data=data)
try:
r.raise_for_status()
except requests.HTTPError as e:
print(e)
if r.status_code >= 500:
# monthly queries seem to time out a lot
# eventually, the result is cached and it responds quickly
print("retrying")
time.sleep(1)
return api_request(method, **params)
else:
raise
return r.json()
api_request("API.getMatomoVersion")
{'value': '3.14.1'}
import pandas as pd
months = {}
retry a bunch because it seems to time out the first time we request each month, but eventually succeed
for month in range(1, 11):
if month not in months:
date = f"2021-{month:02}-01"
print(f"Fetching {date}")
records = api_request(
"UserCountry.getContinent",
period="month",
date=date,
showColumns="label,nb_visits,sum_daily_nb_uniq_visitors",
)
for record in records:
record["date"] = date
months[month] = records
df.tail(10)
label | nb_visits | sum_daily_nb_uniq_visitors | code | date | |
---|---|---|---|---|---|
70 | Africa | 664 | 529 | Africa | 2021-09-01 |
71 | Central America | 129 | 101 | Central America | 2021-09-01 |
72 | North America | 248606 | 204204 | North America | 2021-10-01 |
73 | Europe | 160872 | 131755 | Europe | 2021-10-01 |
74 | Asia | 83917 | 68437 | Asia | 2021-10-01 |
75 | South America | 15423 | 12659 | South America | 2021-10-01 |
76 | Unknown | 7102 | 5686 | Unknown | 2021-10-01 |
77 | Oceania | 1743 | 1447 | Oceania | 2021-10-01 |
78 | Africa | 829 | 665 | Africa | 2021-10-01 |
79 | Central America | 99 | 76 | Central America | 2021-10-01 |
from itertools import chain
import altair as alt
df = pd.DataFrame(chain(*months.values()))
df.head()
alt.Chart(df).mark_line().encode(
color=alt.Color(
"label", title="Continent", sort={"encoding": "y", "order": "descending"}
),
x=alt.Y("date", title="Month"),
y=alt.Y(
"nb_visits",
title="Monthly visits",
scale=alt.Scale(type="log"),
),
).interactive()
And again, summing all non-NA-EU together, ignoring "Unknown"
gross = df[df.label != "Unknown"]
gross["Gross Region"] = gross.label
gross.loc[~gross.label.isin(["North America", "Europe"]), "Gross Region"] = "Rest of World"
# regroup sum by gross region
gross = gross.groupby(["date", "Gross Region"]).nb_visits.sum().reset_index()
gross
/Users/minrk/conda/lib/python3.9/site-packages/pandas/core/frame.py:3607: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._set_item(key, value) /Users/minrk/conda/lib/python3.9/site-packages/pandas/core/indexing.py:1817: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._setitem_single_column(loc, value, pi)
date | Gross Region | nb_visits | |
---|---|---|---|
0 | 2021-01-01 | Europe | 131855 |
1 | 2021-01-01 | North America | 194257 |
2 | 2021-01-01 | Rest of World | 58754 |
3 | 2021-02-01 | Europe | 143334 |
4 | 2021-02-01 | North America | 221450 |
5 | 2021-02-01 | Rest of World | 53489 |
6 | 2021-03-01 | Europe | 186893 |
7 | 2021-03-01 | North America | 266684 |
8 | 2021-03-01 | Rest of World | 85011 |
9 | 2021-04-01 | Europe | 165829 |
10 | 2021-04-01 | North America | 243889 |
11 | 2021-04-01 | Rest of World | 85013 |
12 | 2021-05-01 | Europe | 147872 |
13 | 2021-05-01 | North America | 225208 |
14 | 2021-05-01 | Rest of World | 81959 |
15 | 2021-06-01 | Europe | 119218 |
16 | 2021-06-01 | North America | 213772 |
17 | 2021-06-01 | Rest of World | 81045 |
18 | 2021-07-01 | Europe | 93324 |
19 | 2021-07-01 | North America | 199248 |
20 | 2021-07-01 | Rest of World | 73201 |
21 | 2021-08-01 | Europe | 94277 |
22 | 2021-08-01 | North America | 213809 |
23 | 2021-08-01 | Rest of World | 75746 |
24 | 2021-09-01 | Europe | 135679 |
25 | 2021-09-01 | North America | 260938 |
26 | 2021-09-01 | Rest of World | 94296 |
27 | 2021-10-01 | Europe | 160872 |
28 | 2021-10-01 | North America | 248606 |
29 | 2021-10-01 | Rest of World | 102011 |
alt.Chart(gross).mark_line().encode(
color=alt.Color(
"Gross Region", sort={"encoding": "y", "order": "descending"}
),
x=alt.Y("date", title="Month"),
y=alt.Y(
"nb_visits",
title="Monthly visits",
scale=alt.Scale(type="log"),
),
tooltip=[
"Gross Region",
"date",
"nb_visits",
],
).interactive()