#!/usr/bin/env python # coding: utf-8 # In[1]: import requests, pandas as pd, numpy as np from requests import session from bs4 import BeautifulSoup # In[2]: url='http://www.omnibus.ro/index.php/hu/szekelyfoldi-top-listak/arbevetel/also-haromszek-2015-2018#oldal' r = requests.get(url) for c in r.cookies: print(c.name, c.value) # In[23]: url='http://www.omnibus.ro/index.php/hu/szekelyfoldi-top-listak/kozbirtokossag/top25-kozbirtokossag-2019#oldal' with session() as c: response = c.get(url) #print(response.headers) #print(response.text) df=pd.read_html(response.text)[1] df.columns=list(df.loc[0]) df=df.loc[2:] #df=df[df.columns[1:-1]] df=df.loc[list(df.index)[:-1]] #df['nr']=df.index soup = BeautifulSoup(response.content) links=soup.findAll('table')[3].findAll('a') # In[25]: coords=[] kws=[] cms=[] for i in range(len(links)): print(i,) r=requests.get(links[i]['href']) g=repr(r.content) coord_start=g.find('GLatLng') coord_end=coord_start+g[coord_start:].find(')') coord=g[coord_start+len('GLatLng')+1:coord_end].split(',') kw_start=g.find('