#!/usr/bin/env python # coding: utf-8 # This notebook was prepared by [Algorithmia](algorithmia.com). Source and license info is on [GitHub](https://github.com/donnemartin/data-science-ipython-notebooks). # # Algorithmia # # Reference: [Algorithmia Documentation](http://docs.algorithmia.com/) # # Table of Contents: # 1. Installation # 2. Authentication # 3. Face Detection # 4. Content Summarizer # 5. Latent Dirichlet Allocation # 6. Optical Character Recognition # # 1. Installation # # You need to have the `algorithmia` package (version 0.9.3) installed for this notebook. # # You can install the package using the pip package manager: # In[ ]: pip install algorithmia==0.9.3 # In[1]: import Algorithmia import pprint pp = pprint.PrettyPrinter(indent=2) # # 2. Authentication # # You only need your Algorithmia API Key to run the following commands. # In[2]: API_KEY = 'YOUR_API_KEY' # Create a client instance client = Algorithmia.client(API_KEY) # # 3. Face Detection # # Uses a pretrained model to detect faces in a given image. # # Read more about Face Detection [here](https://algorithmia.com/algorithms/opencv/FaceDetection) # In[3]: from IPython.display import Image face_url = 'https://s3.amazonaws.com/algorithmia-assets/data-science-ipython-notebooks/face.jpg' # Sample Face Image Image(url=face_url) # In[4]: Algorithmia.apiKey = 'Simple ' + API_KEY input = [face_url, "data://.algo/temp/face_result.jpg"] algo = client.algo('opencv/FaceDetection/0.1.8') algo.pipe(input) # Result Image is in under another algorithm name because FaceDetection calls ObjectDetectionWithModels result_image_data_api_path = '.algo/opencv/ObjectDetectionWithModels/temp/face_result.jpg' # Result Image with coordinates for the detected face region result_coord_data_api_path = '.algo/opencv/ObjectDetectionWithModels/temp/face_result.jpgrects.txt' result_file = Algorithmia.file(result_image_data_api_path).getBytes() result_coord = Algorithmia.file(result_coord_data_api_path).getString() # Show Result Image Image(data=result_file) # In[5]: # Show detected face region coordinates print 'Detected face region coordinates: ' + result_coord # # 4. Content Summarizer # # SummarAI is an advanced content summarizer with the option of generating context-controlled summaries. It is based on award-winning patented methods related to artificial intelligence and vector space developed at Lawrence Berkeley National Laboratory. # In[6]: # Get a Wikipedia article as content wiki_article_name = 'Technological Singularity' client = Algorithmia.client(API_KEY) algo = client.algo('web/WikipediaParser/0.1.0') wiki_page_content = algo.pipe(wiki_article_name)['content'] print 'Wikipedia article length: ' + str(len(wiki_page_content)) # In[7]: # Summarize the Wikipedia article client = Algorithmia.client(API_KEY) algo = client.algo('SummarAI/Summarizer/0.1.2') summary = algo.pipe(wiki_page_content.encode('utf-8')) print 'Wikipedia generated summary length: ' + str(len(summary['summarized_data'])) print summary['summarized_data'] # # 5. Latent Dirichlet Allocation # # This algorithm takes a group of documents (anything that is made of up text), and returns a number of topics (which are made up of a number of words) most relevant to these documents. # # Read more about Latent Dirichlet Allocation [here](https://algorithmia.com/algorithms/nlp/LDA) # In[8]: # Get up to 20 random Wikipedia articles client = Algorithmia.client(API_KEY) algo = client.algo('web/WikipediaParser/0.1.0') random_wiki_article_names = algo.pipe({"random":20}) random_wiki_articles = [] for article_name in random_wiki_article_names: try: article_content = algo.pipe(article_name)['content'] random_wiki_articles.append(article_content) except: pass print 'Number of Wikipedia articles scraped: ' + str(len(random_wiki_articles)) # In[9]: # Find topics from 20 random Wikipedia articles algo = client.algo('nlp/LDA/0.1.0') input = {"docsList": random_wiki_articles, "mode": "quality"} topics = algo.pipe(input) pp.pprint(topics) # # 6. Optical Character Recognition # # Recognize text in your images. # # Read more about Optical Character Recognition [here](https://algorithmia.com/algorithms/tesseractocr/OCR) # In[10]: from IPython.display import Image businesscard_url = 'https://s3.amazonaws.com/algorithmia-assets/data-science-ipython-notebooks/businesscard.jpg' # Sample Image Image(url=businesscard_url) # In[11]: input = {"src": businesscard_url, "hocr":{ "tessedit_create_hocr":1, "tessedit_pageseg_mode":1, "tessedit_char_whitelist":"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-@/.,:()"}} algo = client.algo('tesseractocr/OCR/0.1.0') pp.pprint(algo.pipe(input))