# Vectorisation in python using numpy

Published

October 11, 2019

``````import numpy as np
import time

a = np.random.randint(10E6,size=(50,1000))
print(np.shape(a))

w = np.random.randint(100,size=(50,1))
print(np.shape(w))``````
``````(50, 1000)
(50, 1)``````
``````#Vectorisation
t_start = time.time()
z = np.dot(w.T,a).T
t_stop = time.time()
print('Time take: {} ms'.format(1000*(t_stop-t_start)))

#Non vectorized version
z_for = []
t_start = time.time()
for j in range(np.shape(a)[1]):
_count = 0.0
for i in range(np.shape(a)[0]):
_count+=w[i,0]*a[i,j]
z_for.append(_count)
t_stop = time.time()
print('Time take for for-loop: {} ms'.format(1000*(t_stop-t_start)))

#Check the output
print('Check sum: {}'.format(np.sum(np.asarray(z_for).reshape(np.shape(z))-z)))``````
``````Time take: 0.3979206085205078 ms
Time take for for-loop: 33.74624252319336 ms
Check sum: 0.0``````
``````#Valued function evaluation
#If I want to have expoenential of different values in the array
a = np.random.randint(10,size=(10,2))
#With for loops:
import math
exp_a = np.zeros(np.shape(a))
for j in range(np.shape(a)[1]):
for i in range(np.shape(a)[0]):
exp_a[i,j] = math.exp(a[i,j])``````
``````#without for loop
exp_a_numpy = np.exp(a) #Vector already setup -- element-wise exponential

#Other vectorized functions:
# np.log(x)
# np.abs(x)
# np.maximum(x,0) -- computes element-wise maximum comparing to 0
# x**2 for numpy array
# 1/x for numpy array ``````
``exp_a_numpy - exp_a``
``````array([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]])``````
``````#Broadcasting
food_cal = np.array([[56.0,0.0,4.4,68.0],
[1.2, 104, 52, 8.],
[1.8, 135.,99., 0.9]])``````
``````#Calculate % of calories from Carb, Protein, Fat for each food
carb = np.array([food_cal[0,i]/np.sum(food_cal[:,i])*100 for i in range(4)])
protein = np.array([food_cal[1,i]/np.sum(food_cal[:,i])*100 for i in range(4)])
fat = np.array([food_cal[2,i]/np.sum(food_cal[:,i])*100 for i in range(4)])

cal = np.array([carb,protein,fat])
print(cal)``````
``````[[94.91525424  0.          2.83140283 88.42652796]
[ 2.03389831 43.51464435 33.46203346 10.40312094]
[ 3.05084746 56.48535565 63.70656371  1.17035111]]``````
``````#Andrew Ng's
cal = food_cal.sum(axis=0)
#AXIS = 0 is sum vertically -- along column
#AXIS = 1 is sum horizontally -- along row

print(cal)``````
``[ 59.  239.  155.4  76.9]``
``````#Example of broadcasting here:
#Here the cal is BROADCASTING from 1,4 to 4,4
percentage = 100*food_cal/cal.reshape(1,4)
print(percentage)``````
``````[[94.91525424  0.          2.83140283 88.42652796]
[ 2.03389831 43.51464435 33.46203346 10.40312094]
[ 3.05084746 56.48535565 63.70656371  1.17035111]]``````
``````#More examples of broadcasting
#Example 1
A = np.linspace(1,5,5)
print(A.shape)
B = A+10.
print(A, B, B.shape)
# Here 10. was broadcasted into 5x1 vector ``````
``````(5,)
[1. 2. 3. 4. 5.] [11. 12. 13. 14. 15.] (5,)``````
``````#Example 2
A = np.array([[1,2,3],
[4,5,6]])
print(A.shape)
B = np.array([100,200,300])
print(B.shape)
C = A + B
print(C.shape)
print(A,B)
print(C)
# Here B was broadcasted from (3,) to 2x3!``````
``````(2, 3)
(3,)
(2, 3)
[[1 2 3]
[4 5 6]] [100 200 300]
[[101 202 303]
[104 205 306]]``````

## General principle

(m,n) matrix with (+, -, *, /) with (1,n) or (m,1) lead of copying it to (m,n) before conducting computing.

# Good practices and tips

``````
import numpy as np
a = np.random.randn(5)
print(a)``````
``[ 0.68281763 -1.3579685   0.99577659  0.31269709  0.595569  ]``
``print(a.shape)``
``(5,)``

Here `a` is a array of rank 1. It is neither a row or a column vector. So this has some non-intuitive effects

``print(a.T)``
``[ 0.68281763 -1.3579685   0.99577659  0.31269709  0.595569  ]``
``print(np.dot(a,a.T))``
``3.7543713020122427``

So it is recommended for consistency to NOT use data-structures have rank 1 like the one above but instead instantiate the array as the fixed array of known size

ALWAYS COMMIT TO MAKING DEFINED ROW AND COLUMN VECTORS

``````a1 = np.random.randn(5,1)
print(a1)
print(a1.shape)``````
``````[[-0.7474656 ]
[-0.75790159]
[ 0.30984002]
[ 0.18874051]
[-0.80470167]]
(5, 1)``````
``print(a1.T)``
``[[-0.7474656  -0.75790159  0.30984002  0.18874051 -0.80470167]]``

Here there are two Square Brackets compared to the previous transport of `a` suggesting in the case of `a1` it is well-defined 1x5 row vector

``print(np.dot(a1,a1.T)) #Outer product ``
``````[[ 0.55870482  0.56650536 -0.23159476 -0.14107704  0.60148682]
[ 0.56650536  0.57441482 -0.23482825 -0.14304673  0.60988468]
[-0.23159476 -0.23482825  0.09600084  0.05847936 -0.24932878]
[-0.14107704 -0.14304673  0.05847936  0.03562298 -0.1518798 ]
[ 0.60148682  0.60988468 -0.24932878 -0.1518798   0.64754478]]``````
``````assert(a1.shape==(5,1)) #Assertion statement to check the known size
a = a.reshape((5,1))
print(a.shape)``````
``(5, 1)``
``A = np.random.randn(4,3)``
``print(A)``
``````[[ 0.22469294  0.78832742 -1.13148285]
[-0.04070683 -0.74061401 -1.59838506]
[ 0.12821164  0.72892812  0.4912876 ]
[ 0.09323584  1.66090848  1.87905216]]``````
``np.sum(A,axis=1,keepdims=True).shape``
``(4, 1)``