#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np from __future__ import division import itertools import matplotlib.pyplot as plt import seaborn as sns import logging logger = logging.getLogger() # 16 Greedy Algorithms # =========== # # **intent**: it makes a locally optimal choice in the hope that the choice will lead to a globally optimal solution. # ### 16.1 An activity-selection problem # # **Given**: # an activity $a_i$ has a start and finish time $[s_i, f_i)$. # activities set $S = \{a_1, a_2, \dotsc, a_n\}$, ordered by $f_i$. # # $a_i$ and $a_j$ are compatible if their intervals don't overlap. # # **Problem**: # We wish to select a maximum-size subsets of mutually compatible activities. # In[2]: # eg raw_data = [ [1, 3, 0, 5, 3, 5, 6, 8, 8, 2, 12], [4, 5, 6, 7, 9, 9, 10, 11, 12, 14, 16] ] df = pd.DataFrame(raw_data, index=['s', 'f'], columns=xrange(1,12,1)) df # #### Dynamic Programming # denote: $S_{ij} = \{a_{i+1}, \dotsc, a_{j-1}\}$. # # Suppose $a_k$ in an optimal solution, then the left two subproblems: $S_{ik}$ and $S_{kj}$, whose optimal solution should be within the optimal solution of $S_{ij}$. # # \begin{equation} # c[i,j] = \begin{cases} # 0 & \quad \text{ if } S_{ij} = \emptyset \\ # max_{a_k \in S_{ij}} c[i,k] + c[k,j] + 1 & \quad \text{ otherwise} # \end{cases} # \end{equation} # In[3]: #todo # #### Making the greedy choice # Intuition: # we should choose an activity that leaves the resource available for as many other activities as possible. # Hence, the first one should be the activity in $S$ with the earliest finish time. # # ##### Is the greedy choice always part of some optimal solution? # # **Theorem 16.1**: # Let $S_k = \{a_i \in S: s_i \geq f_k\}$. # We have $a_m$ s.t. $f_m = min(f_i : a_i \in S)$ # Then $a_m$ must be included in some maximum-size subset of mutually compatible activities of $S_k$. # # Proof: (构造法) # # Let $A_k$ be a maximum-size subset, and $a_j$ s.t. $f_j = min(f_i: a_i in A_k)$. # Because $f_m \leq f_j$, # hence we could replace $a_j$ with $a_m$, namely, $A'_k = (A_k - a_j) \cup a_m$. # then we get a new maximum-size subset $A'_k$ which includs $a_m$. # # ##### An recursive greedy algorithm # tail recursive # # ##### An iterative greedy algorithm # In[4]: df # In[5]: def greedy_activity_selector(df): n = df.shape[1] + 1 k = 1 A = [k] for m in xrange(2, n): if df.loc['s', m] >= df.loc['f', k]: A.append(m) k = m return A greedy_activity_selector(df) # ### 16.2 Elements of the greedy strategy # ###### design # 1. Cast the optimization problem as one in which we make a choice and are left with one subproblem to solve. # # 2. Prove that there is always an optimal solution to the original problem that makes the greedy choice, so that the greedy choice is always safe. # # 3. Demonstrate optimal substructure, and combine solutions. # # Beneath every greedy algorithm, there is almost always a more cumbersome dynamic-programming solution. # # **Two key ingredients**: greedy-choice property and optimal substructure. # # # ##### Greedy -choice property # idea: we can assemble a globally optimal solution by making locally optimal (greedy) choices. # # Dynamic programming: solves the subproblems before making the first choice. $\to$ bottom-up, iteration # Greedy algorithm: makes its first choice before solving any subproblems. $\to$ top-down, recursive # # + usually more efficient without considering a wider set of choices. # # # ##### Optimal substruture # A problem exhibits optimal substruture if an optimal solution to the problem contains whithin it optimal solutions to subproblems. # # namely, an optimal solution to the subproblem, combined with the greedy choice already made, yields an optimal solution to the original problem. # # # ##### Greedy versus Dynamic programming # 1. 0-1 knapscak problem: # Given: A thief robbing a store finds $n$ items. The $i$th item is worth $v_i$ dollars and weighs $w_i$ pounds. # The thife can carry at most $W$ pounds in his knapsack. # For a item, the thife must either take it or leave it behind. # # Problem: Which combination of itmes the thief choose to take as valuable a load as possible? # # 2. fractional knapsack problem: # diff: For a item, the thife can take fraction of items. # ### 16.3 Huffman codes # ##### Prefix codes # **prefix codes**: codes in which no codeword is also a prefix of some other codeword. # $implies$ unambiguous. # # + A prefix code can always achieve the **optimal data compression** among any character code. # # + An optimal code is always represented by a **full** binary tree. # The number of bits required to encode a file is thus: $$B(T) = \displaystyle\sum_{c \in C} c.\text{freq} \cdot d_T(c)$$ which we define as the cose the **cost** of the tree $T$. # # # #### Constructing a Hunffman code # The algorithm uses a min-priority queue $Q$, keyed on the _freq_ attribute, to identify the two least-frequent object to merge together. # # running time: $O(n \lg n)$ # # # ##### Correctness of Huffman's algorithm # ###### 1. greedy-choice property # 构造法:任意最优解,转变成希望形式的最优解 # ###### 2. optimal-substructure property # 反证法 # # 详见书和 penultimate 笔记 # In[ ]: