Vectorisation in python using numpy

Published

October 11, 2019

import numpy as np 
import time 

a = np.random.randint(10E6,size=(50,1000))
print(np.shape(a))

w = np.random.randint(100,size=(50,1))
print(np.shape(w))
(50, 1000)
(50, 1)
#Vectorisation
t_start = time.time()
z = np.dot(w.T,a).T
t_stop = time.time()
print('Time take: {} ms'.format(1000*(t_stop-t_start)))

#Non vectorized version 
z_for = []
t_start = time.time()
for j in range(np.shape(a)[1]):
    _count = 0.0
    for i in range(np.shape(a)[0]):
        _count+=w[i,0]*a[i,j]
    z_for.append(_count)
t_stop = time.time()
print('Time take for for-loop: {} ms'.format(1000*(t_stop-t_start)))

#Check the output 
print('Check sum: {}'.format(np.sum(np.asarray(z_for).reshape(np.shape(z))-z)))
Time take: 0.3979206085205078 ms
Time take for for-loop: 33.74624252319336 ms
Check sum: 0.0
#Valued function evaluation 
#If I want to have expoenential of different values in the array
a = np.random.randint(10,size=(10,2))
#With for loops:
import math
exp_a = np.zeros(np.shape(a))
for j in range(np.shape(a)[1]):
    for i in range(np.shape(a)[0]):
        exp_a[i,j] = math.exp(a[i,j])
#without for loop 
exp_a_numpy = np.exp(a) #Vector already setup -- element-wise exponential

#Other vectorized functions: 
# np.log(x)
# np.abs(x)
# np.maximum(x,0) -- computes element-wise maximum comparing to 0 
# x**2 for numpy array 
# 1/x for numpy array 
exp_a_numpy - exp_a
array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])
#Broadcasting 
food_cal = np.array([[56.0,0.0,4.4,68.0],
                     [1.2, 104, 52, 8.],
                     [1.8, 135.,99., 0.9]])
#Calculate % of calories from Carb, Protein, Fat for each food 
carb = np.array([food_cal[0,i]/np.sum(food_cal[:,i])*100 for i in range(4)])
protein = np.array([food_cal[1,i]/np.sum(food_cal[:,i])*100 for i in range(4)])
fat = np.array([food_cal[2,i]/np.sum(food_cal[:,i])*100 for i in range(4)])

cal = np.array([carb,protein,fat])
print(cal)
[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]
#Andrew Ng's 
cal = food_cal.sum(axis=0) 
#AXIS = 0 is sum vertically -- along column
#AXIS = 1 is sum horizontally -- along row 

print(cal)
[ 59.  239.  155.4  76.9]
#Example of broadcasting here: 
#Here the cal is BROADCASTING from 1,4 to 4,4 
percentage = 100*food_cal/cal.reshape(1,4)
print(percentage)
[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]
#More examples of broadcasting  
#Example 1 
A = np.linspace(1,5,5)
print(A.shape)
B = A+10.
print(A, B, B.shape)
# Here 10. was broadcasted into 5x1 vector 
(5,)
[1. 2. 3. 4. 5.] [11. 12. 13. 14. 15.] (5,)
#Example 2
A = np.array([[1,2,3],
              [4,5,6]])
print(A.shape)
B = np.array([100,200,300])
print(B.shape)
C = A + B 
print(C.shape)
print(A,B)
print(C)
# Here B was broadcasted from (3,) to 2x3!
(2, 3)
(3,)
(2, 3)
[[1 2 3]
 [4 5 6]] [100 200 300]
[[101 202 303]
 [104 205 306]]

General principle

(m,n) matrix with (+, -, *, /) with (1,n) or (m,1) lead of copying it to (m,n) before conducting computing.

Good practices and tips


import numpy as np 
a = np.random.randn(5)
print(a)
[ 0.68281763 -1.3579685   0.99577659  0.31269709  0.595569  ]
print(a.shape)
(5,)

Here a is a array of rank 1. It is neither a row or a column vector. So this has some non-intuitive effects

print(a.T)
[ 0.68281763 -1.3579685   0.99577659  0.31269709  0.595569  ]
print(np.dot(a,a.T))
3.7543713020122427

So it is recommended for consistency to NOT use data-structures have rank 1 like the one above but instead instantiate the array as the fixed array of known size

ALWAYS COMMIT TO MAKING DEFINED ROW AND COLUMN VECTORS

a1 = np.random.randn(5,1)
print(a1)
print(a1.shape)
[[-0.7474656 ]
 [-0.75790159]
 [ 0.30984002]
 [ 0.18874051]
 [-0.80470167]]
(5, 1)
print(a1.T)
[[-0.7474656  -0.75790159  0.30984002  0.18874051 -0.80470167]]

Here there are two Square Brackets compared to the previous transport of a suggesting in the case of a1 it is well-defined 1x5 row vector

print(np.dot(a1,a1.T)) #Outer product 
[[ 0.55870482  0.56650536 -0.23159476 -0.14107704  0.60148682]
 [ 0.56650536  0.57441482 -0.23482825 -0.14304673  0.60988468]
 [-0.23159476 -0.23482825  0.09600084  0.05847936 -0.24932878]
 [-0.14107704 -0.14304673  0.05847936  0.03562298 -0.1518798 ]
 [ 0.60148682  0.60988468 -0.24932878 -0.1518798   0.64754478]]
assert(a1.shape==(5,1)) #Assertion statement to check the known size 
a = a.reshape((5,1))
print(a.shape)
(5, 1)
A = np.random.randn(4,3)
print(A)
[[ 0.22469294  0.78832742 -1.13148285]
 [-0.04070683 -0.74061401 -1.59838506]
 [ 0.12821164  0.72892812  0.4912876 ]
 [ 0.09323584  1.66090848  1.87905216]]
np.sum(A,axis=1,keepdims=True).shape
(4, 1)