#!/usr/bin/env python
# coding: utf-8

# In[1]:


import keyring


# In[2]:


from getpass import getpass

token = keyring.get_password("readthedocs.org", "_api")
if not token:
    token = getpass("readthedocs.org token: ")
    keyring.set_password("readthedocs.org", "_api", token)


# In[3]:


import requests
import requests_cache
from yarl import URL


# In[4]:


s = requests_cache.CachedSession()
s.headers["Authorization"] = f"Token {token}"
readthedocs_api = URL("https://readthedocs.org/api/v3/")


# In[5]:


def list_projects(url=readthedocs_api / "projects"):
    r = s.get(url)
    r.raise_for_status()
    resp = r.json()
    for project in resp["results"]:
        yield project
    if resp['next']:
        yield from list_projects(resp['next'])
                                  
projects = list(list_projects())


# In[6]:


len(projects)


# In[16]:


projects[0]


# In[ ]:


# In[17]:


project_names = [
    p["slug"] for p in projects
]


# In[9]:


from pathlib import Path
import json
from playwright.async_api import Page, async_playwright


cookie_path = Path("cookies.json")
login_url = "https://readthedocs.org/accounts/login/?next=/dashboard/"
dashboard_url = "https://readthedocs.org/dashboard/"

async def login() -> list[dict]:
    """Login to devilry and save cookies"""
    if cookie_path.exists():
        with cookie_path.open() as f:
            # TODO: check if valid
            return json.load(f)

    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=False)
        page = await browser.new_page()
        await page.goto(login_url)
        print("Login to readthedocs.org ...")
        await page.wait_for_url(dashboard_url + "*", timeout=120_000)
        cookies = await browser.contexts[0].cookies()
    print(f"Saving cookies to {cookie_path}")
    with cookie_path.open("w") as f:
        json.dump(cookies, f)
    return cookies


# In[10]:


cookies = await login()


# In[11]:


project_list = [
    "jupyterhub",
    "oauthenticator",
    "zero-to-jupyterhub",
    "oauthenticator",
    "jupyterhub-kubespawner",
]


# In[22]:


stats_dir = Path("stats")
stats_dir.mkdir(exist_ok=True)

async def download_stats(project_name: str):
    cookies = await login()
    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=False)
        page = await browser.new_page()
        await browser.contexts[0].add_cookies(cookies)
        for kind in ("traffic", "search"):
            url = URL(dashboard_url) / project_name / f"{kind}-analytics"
            
            await page.goto(str(url))
            btn = page.get_by_text("Download all data", exact=True)
            async with page.expect_download() as download_info:
                await btn.click()
            download = await download_info.value
            dest = stats_dir / download.suggested_filename
            print(f"Downloading {dest}")
            await download.save_as(stats_dir / download.suggested_filename)


# In[23]:


# manually derived from above
project_names = [
    "binderhub",
    "ipykernel",
    "ipyparallel",
    "ipython",
    "ipywidgets",
    "jupyter",
    "jupyter-client",
    "jupyter-console",
    "jupyter-core",
    "jupyter-docker-stacks",
    "jupyterhub",
    "jupyterhub-deploy-teaching",
    "jupyterhub-dockerspawner",
    "jupyterhub-grafana",
    "jupyterhub-kubespawner",
    "jupyterhub-python-repo-template",
    "jupyterhub-team-compass",
    "jupyterhub-traefik-proxy",
    "jupyterhub-tutorial",
    "jupyter-notebook",
    "jupyter-server",
    "jupyter-server-proxy",
    "jupyter-software-steering-council-team-compass",
    "ltiauthenticator",
    "mybinder-sre",
    "nbconvert",
    "nbdime",
    "nbformat",
    "nbgitpuller",
    "oauthenticator",
    "pytest-jupyterhub",
    "qtconsole",
    "repo2docker",
    "the-littlest-jupyterhub",
    "traitlets",
    "zero-to-jupyterhub",
]


# In[24]:


import asyncio
concurrency = 5

async def concurrent_call(semaphore, f, *args, **kwargs):
    """limit concurrency because too many playwrights can crash"""
    async with semaphore:
        return await f(*args, **kwargs)

# for project_name in project_names:
    # await download_stats(project_name)
# semaphore = asyncio.Semaphore(concurrency)
# await asyncio.gather(*[concurrent_call(semaphore, download_stats, project_name) for project_name in project_names])
    

# In[25]:


# download serially
for project_name in project_names:
    await download_stats(project_name)


# In[26]:


get_ipython().system('open stats')