#!/usr/bin/env python
# coding: utf-8

# In[1]:


import requests
import urllib.request
import time
from bs4 import BeautifulSoup


# https://towardsdatascience.com/how-to-web-scrape-with-python-in-4-minutes-bc49186a8460

# In[2]:


baseurl = "http://www.nanotube.msu.edu/fullerene/"
url = "http://www.nanotube.msu.edu/fullerene/fullerene-isomers.html"


# In[3]:


response = requests.get(url)


# In[4]:


soup = BeautifulSoup(response.text, "html.parser")


# In[5]:


soup.find_all("a")


# In[35]:


for item in soup.find_all("a"):
    time.sleep(1)
    
    href = item["href"]
    print("\n--->", href)
    if href[:15] != "fullerene.php?C":
        continue
    fullerene_page = requests.get(baseurl + href)
    fullerene_soup = BeautifulSoup(fullerene_page.text, "html.parser")
    
    files = list()
    for structure in fullerene_soup.find_all("a"):
        s_href = structure["href"]
        if ".xyz" == s_href[-4:]:
            filename = s_href.split("/")[-1]
            if filename in files:
                continue
            files.append(filename)
            print("Download: ", filename)
            urllib.request.urlretrieve(baseurl + s_href[1:], filename)
            time.sleep(1)


# In[37]:


import os
import shutil
for file in os.listdir():
    if file[-4:] == ".xyz":
        folder = file.split("-")[0]
        if not os.path.exists(folder):
            os.mkdir(folder)
        shutil.move(file, folder)