import flame
import laff

def LU_unb_var5(A):

    ATL, ATR, \
    ABL, ABR  = flame.part_2x2(A, \
                               0, 0, 'TL')

    while ATL.shape[0] < A.shape[0]:

        A00,  a01,     A02,  \
        a10t, alpha11, a12t, \
        A20,  a21,     A22   = flame.repart_2x2_to_3x3(ATL, ATR, \
                                                       ABL, ABR, \
                                                       1, 1, 'BR')

        #------------------------------------------------------------#

        laff.invscal( alpha11, a21 )        #  a21 := a21 / alpha11
        laff.ger( -1.0, a21, a12t, A22 )    #  A22 := A22 - a21 * a12t

        #------------------------------------------------------------#

        ATL, ATR, \
        ABL, ABR  = flame.cont_with_3x3_to_2x2(A00,  a01,     A02,  \
                                               a10t, alpha11, a12t, \
                                               A20,  a21,     A22,  \
                                               'TL')

    flame.merge_2x2(ATL, ATR, \
                    ABL, ABR, A)


import numpy as np
import laff
import flame

# Form the matrices
m = 200

L = np.matrix( np.tril(np.random.rand(m,m), -1 ) + np.eye(m) )
U = np.matrix( np.triu(np.random.rand(m,m) ) + np.eye(m) * m )

A = L * U

# Create a large, random solution vector x
x = np.matrix( np.random.rand(m,1) )

# Store the original value of x
xold = np.matrix( np.copy( x ) )

# Create a solution vector b so that A x = b
b = A * x


# Later, we are also going to solve A x = b2.  Here we create that b2:
x2 = np.matrix( np.random.rand(m,1) )
b2 = A * x2

# Set the block size, we'll use it later. 
# Since we're just messing around with blocked algorithms,
# we set this totally arbitrarily
nb = 20

import flame
import laff

def LU_blk_var5(A, nb_alg):

    ATL, ATR, \
    ABL, ABR  = flame.part_2x2(A, \
                               0, 0, 'TL')

    while ATL.shape[0] < A.shape[0]:

        block_size = min(ABR.shape[0], nb_alg)

        A00, A01, A02, \
        A10, A11, A12, \
        A20, A21, A22  = flame.repart_2x2_to_3x3(ATL, ATR, \
                                                 ABL, ABR, \
                                                 block_size, block_size, 'BR')

        #------------------------------------------------------------#

        LU_unb_var5( A11 )
        laff.trsm( 'Lower triangular', 'No transpose', 'Unit diagonal', A11, A12 )
        laff.trsm( 'Upper triangular', 'Transpose', 'Nonunit diagonal', A11, A21 )
        laff.gemm( -1.0, A21, A12, 1.0, A22 )

        #------------------------------------------------------------#

        ATL, ATR, \
        ABL, ABR  = flame.cont_with_3x3_to_2x2(A00, A01, A02, \
                                               A10, A11, A12, \
                                               A20, A21, A22, \
                                               'TL')

    flame.merge_2x2(ATL, ATR, \
                    ABL, ABR, A)

# recreate matrix A
A = L * U

# recreate the right-hand side
b = A * xold

# apply blocked LU to matrix A
# remember nb holds our block size
LU_blk_var5( A, nb )

# Compare A to the original L and U matrices
print( 'Maximum value of (A - L - U) after factorization' )
print( np.max( np.abs( A - np.tril(L,-1) - U ) ) ) #The "-1" ignores the diagonal

def Solve( A, b ):
    
    # insert appropriate calls to routines you have written here!
    # remember the variable nb holds our block size
    LU_blk_var5( A, 1 )
    laff.trsv( 'Lower triangular', 'No transpose', 'Unit diagonal', A, b )
    laff.trsv( 'Upper triangular', 'No transpose', 'Nonunit diagonal', A, b )
    

# just to be sure, let's start over.  We'll recreate A, x, and b, run all the routines, and
# then compare the updated b to the original vector x.

A = L * U
b = A * x

Solve( A, b )

print( '2-Norm of Updated b - original x' )
print( np.linalg.norm(b - x) )


# insert appropriate calls here.

laff.trsv( 'Lower triangular', 'No transpose', 'Unit diagonal', A, b2 )
laff.trsv( 'Upper triangular', 'No transpose', 'Nonunit diagonal', A, b2 )

print( '2-Norm of updated b2 - original x2' )
print( np.linalg.norm(b2 - x2) )