def automatic_readability_index(n_chars, n_words, n_sents):
    # insert your code here

# do not modify the code below, it is for testing your answer only!
# it should output True if you did well
print(abs(automatic_readability_index(300, 40, 10) - 15.895) < 0.001)

from pyhum.preprocessing import read_corpus

def extract_counts(sentences):
    # insert your code here

# do not modify the code below, for testing only!
print(extract_counts(
    [["this", "was", "rather", "easy"], 
     ["please", "give", "me", "something", "more", "challenging"]]) == (53, 10, 2))

sentences = [["this", "was", "rather", "easy"], 
             ["Please", "give", "me", "something", "more", "challenging"]]

n_chars, n_words, n_sents = extract_counts(sentences)
print(automatic_readability_index(n_chars, n_words, n_sents))

def compute_ARI(sentences):
    # insert your code here
    
# do not modify the code below, it is for testing your answer only!
# it should output True if you did well
print(abs(compute_ARI(sentences) - 4.442) < 0.001)

def compute_ARIs(directory):
    # insert your code here

import matplotlib.pyplot as plt

# insert your code here

def predict_author(text, feature_database):
    "Predict who wrote this text."
    return classify(score(extract_features(text), feature_database))

scores = {"Hermans": 0.15, "Voskuil": 0.55, "Reve": 0.2, "Mulisch": 0.18, "Claus": 0.02}

def classify(scores):
    # insert your code here
    
print(classify(scores) == "Voskuil")

from pyhum.preprocessing import read_corpus_file, tokenize

def extract_features(filename):
    return tokenise(read_corpus_file(filename))

from collections import defaultdict

feature_database = defaultdict(lambda: defaultdict(int))

def extract_author(filename):
    # insert your code here

# do not modify the code below, it is for testing your answer only!
# it should output True if you did well
print(extract_author("Austen-emma.txt") == "Austen")
print(extract_author("/path/to/Austen-emma.txt") == "Austen")

from preprocess import tokenise

def update_counts(author, text, feature_database):
    # insert your code here
    return feature_database

# do not modify the code below, for testing only!
feature_database = defaultdict(lambda: defaultdict(int))
feature_database = update_counts("Anonymous", "This was written with a lack of inspiration", 
                                 feature_database)
test_database = defaultdict(lambda: defaultdict(int))
for word in "This was written with a lack of inspiration".split():
    test_database["Anonymous"][word] += 1
print(sorted(feature_database.items()) == sorted(test_database.items()))

def add_file_to_database(filename, feature_database):
    return update_counts(extract_author(filename), 
                         extract_features(filename), 
                         feature_database)

import os

def add_directory_to_database(directory, feature_database):
    # insert your code here
    return feature_database

x = 0.00000000000000001
for i in range(30):
    x = x * 0.000000000000001
    print(x)

from math import log

def log_probability(feature_counts, features_sum, n_features):
    return log((feature_counts + 1.0) / (features_sum + n_features))

def score(features, feature_database):
    "Predict who wrote the document on the basis of the corpus."
    scores = defaultdict(float)
    # compute the number of features in the feature database here
    for author in feature_database:
        # compute the probability of features given that author here
    return scores

# do not modify the code below, for testing your answer only! 
# It should return True if you did well!
features = ["the", "a", "the", "be", "book"]
feature_database = defaultdict(lambda: defaultdict(int))
feature_database["A"]["the"] = 2
feature_database["A"]["a"] = 5
feature_database["A"]["book"]= 1
feature_database["B"]["the"] = 5
feature_database["B"]["a"] = 1
feature_database["B"]["book"] = 6
print(abs(dict(score(features, feature_database))["A"] - -7.30734) < 0.001)

# first define the feature_database
feature_database = defaultdict(lambda: defaultdict(int))
feature_database = add_directory_to_database("data/gutenberg/training", feature_database)
print(predict_author("data/gutenberg/testing/milton-poetical.txt", feature_database))

def test_from_corpus(directory, feature_database):
    results = []
    # insert your code here
    return results

def analyze_results(results):
    # insert your code here

# do not modify the code below, for testing only!
print(analyze_results([("A", "A"), ("A", "B"), ("C", "C"), ("D", "C"), ("E", "E")]) == 0.6)

from IPython.core.display import HTML
def css_styling():
    styles = open("styles/custom.css", "r").read()
    return HTML(styles)
css_styling()