http://e4ftl01.cr.usgs.gov/MODIS_Composites/MOTA/MCD15A2.005/2013.01.17/MCD15A2.A2013017.h18v03.005.2013026065052.hdf http://e4ftl01.cr.usgs.gov/WORKING/BRWS/Browse.001/2013.01.26/BROWSE.MCD15A2.A2013017.h18v03.005.2013026065052.1.jpg http://e4ftl01.cr.usgs.gov/WORKING/BRWS/Browse.001/2013.01.26/BROWSE.MCD15A2.A2013017.h18v03.005.2013026065052.2.jpg http://e4ftl01.cr.usgs.gov/MODIS_Composites/MOTA/MCD15A2.005/2013.01.17/MCD15A2.A2013017.h18v03.005.2013026065052.hdf.xml import urllib2 url_base = 'http://e4ftl01.cr.usgs.gov/MODIS_Composites/MOTA/MCD15A2.005' response = urllib2.urlopen(url_base) html = response.read() # print the first 30 lines html.split('\n')[:30] dirs = [] for line in html.split('\n'): if line.find('[DIRS]'): dirs.append(line) # or more succinctly dirs = [line for line in html.split('\n') if line.find('[DIR]') != -1] dirs[:3] dirs = [line for line in html.split('\n') if line.find('[DIR]') != -1][1:] dirs[:3] print dirs[1] print dirs[1].split('href="')[1] print dirs[1].split('href="')[1].split('/">')[0] dirs = [line.split('href="')[1].split('/">')[0] for line in html.split('\n') if line.find('[DIR]') != -1][1:] # print the first 10 dirs[:10] dirs = np.array([line.split('href="')[1].split('/">')[0].split('.') \ for line in html.split('\n') if line.find('[DIR]') != -1][1:]) dirs[:10] all_years = np.sort(np.unique(dirs[:,0])) all_months = np.sort(np.unique(dirs[:,1])) all_doys = np.sort(np.unique(dirs[:,2])) years,months,doys import urllib2 url_base = 'http://e4ftl01.cr.usgs.gov/MODIS_Composites/MOTA/MCD15A2.005' response = urllib2.urlopen(url_base) dirs = np.array([line.split('href="')[1].split('/">')[0] for line in html.split('\n') if line.find('[DIR]') != -1][1:]) years = np.array([i.split('.')[0] for i in dirs]) # year mask year = '2012' mask = (year == years) sub_dirs = dirs[mask] print sub_dirs # test with first one this_date = sub_dirs[0] url_date = url_base + '/' + this_date print url_date response1 = urllib2.urlopen(url_date) html1 = response1.read() # print the first 21 lines html1.split('\n')[:21] tile = 'h18v03' lines = [line for line in html1.split('\n') if line.find(tile) != -1] lines tile = 'h18v03' hdf_lines = [i for i in [line for line in html1.split('\n') \ if line.find(tile) != -1] if i.find('.hdf"') != -1] hdf_lines hdf_lines[0].split('')[0] tile = 'h18v03' hdf_lines = [i for i in [line for line in html1.split('\n') \ if line.find(tile) != -1] if i.find('.hdf"') != -1] hdf_file = hdf_lines[0].split('')[0] year = '2012' tile = 'h17v03' hdf_files = [] import urllib2 # base URL for the product url_base = 'http://e4ftl01.cr.usgs.gov/MODIS_Composites/MOTA/MCD15A2.005' response = urllib2.urlopen(url_base) html = response.read() dirs = np.array([line.split('href="')[1].split('/">')[0] for line in html.split('\n') if line.find('[DIR]') != -1][1:]) # identify years years = np.array([i.split('.')[0] for i in dirs]) # year mask mask = (year == years) sub_dirs = dirs[mask] for this_date in sub_dirs: url_date = url_base + '/' + this_date print url_date response1 = urllib2.urlopen(url_date) html1 = response1.read() hdf_lines = [i for i in [line for line in html1.split('\n') \ if line.find(tile) != -1] if i.find('.hdf"') != -1] hdf_file = url_date + '/' + hdf_lines[0].split('')[0] hdf_files.append(hdf_file+'\n') f = open('files/data/lai_list.txt','w') f.writelines(hdf_files) f.close() import urllib2 f = open('files/data/lai_list.txt','r') hdf_files = f.readlines() f.close() for url in hdf_files: url = url.strip() print url response = urllib2.urlopen(url.strip()) ofile = 'files/data/' + url.split('/')[-1] f = open(ofile,'w') f.write(response.read()) f.close() !gdalinfo files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf | head -20 %%bash # Filter lines that do not have BOUNDINGCOORDINATE in them file=files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf gdalinfo $file | grep BOUNDINGCOORDINATE %%bash # a bash script # set the variables file to be the filename for convenience file=files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf # dselete the output file if it exists rm -f files/data/output_file.tif # reproject the data gdalwarp -of GTiff \ -t_srs '+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 \ +y_0=0 +ellps=clrk66 +units=m +no_defs' -tr 1000 1000 \ 'HDF4_EOS:EOS_GRID:'${file}':MOD_Grid_MOD15A2:Lai_1km' files/data/output_file.tif # convert to gif for viewing gdal_translate -outsize 30% 30% -of gif \ files/data/output_file.tif files/data/output_file.gif