275 lines
4.8 KiB
Python
275 lines
4.8 KiB
Python
"""
|
|
02_stats.py
|
|
"""
|
|
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
from scipy import stats
|
|
|
|
# Numpy and Statistics
|
|
|
|
# Matrix in numpy
|
|
|
|
four_by_four_matrix = np.matrix(np.ones((4, 4), dtype=float))
|
|
print(four_by_four_matrix)
|
|
|
|
np.asarray(four_by_four_matrix)[2] = 2
|
|
print(four_by_four_matrix)
|
|
|
|
print()
|
|
|
|
# Numpy numpy.arange()
|
|
|
|
# creating list using range(starting, stop, step)
|
|
lst = range(0, 11, 2)
|
|
print(lst)
|
|
|
|
print()
|
|
|
|
for l in lst:
|
|
print(l)
|
|
|
|
print()
|
|
|
|
# Similar to range arange numpy.arange(start, stop, step)
|
|
whole_numbers = np.arange(0, 20, 1)
|
|
print(whole_numbers)
|
|
|
|
print()
|
|
|
|
natural_numbers = np.arange(1, 20, 1)
|
|
print(natural_numbers)
|
|
|
|
print()
|
|
|
|
odd_numbers = np.arange(1, 20, 2)
|
|
print(odd_numbers)
|
|
|
|
print()
|
|
|
|
even_numbers = np.arange(2, 20, 2)
|
|
print(even_numbers)
|
|
|
|
print()
|
|
|
|
# Creating sequence of numbers using linspace
|
|
|
|
# numpy.linspace()
|
|
# numpy.logspace() in Python with Example
|
|
# For instance, it can be used to create 10 values from 1 to 5 evenly spaced.
|
|
print(np.linspace(1.0, 5.0, num=10))
|
|
|
|
print()
|
|
|
|
# not to include the last value in the interval
|
|
print(np.linspace(1.0, 5.0, num=5, endpoint=False))
|
|
|
|
print()
|
|
|
|
# LogSpace
|
|
# LogSpace returns even spaced numbers on a log scale. Logspace has the same parameters as np.linspace.
|
|
|
|
# Syntax:
|
|
|
|
# numpy.logspace(start, stop, num, endpoint)
|
|
|
|
print(np.logspace(2, 4.0, num=4))
|
|
|
|
print()
|
|
|
|
# to check the size of an array
|
|
x = np.array([1, 2, 3], dtype=np.complex128)
|
|
print(x)
|
|
|
|
print()
|
|
|
|
print(x.itemsize)
|
|
|
|
print()
|
|
|
|
# indexing and Slicing NumPy Arrays in Python
|
|
np_list = np.array([(1, 2, 3), (4, 5, 6)])
|
|
print(np_list)
|
|
|
|
print()
|
|
|
|
print('First row: ', np_list[0])
|
|
print('Second row: ', np_list[1])
|
|
|
|
print()
|
|
|
|
print('First column: ', np_list[:, 0])
|
|
print('Second column: ', np_list[:, 1])
|
|
print('Third column: ', np_list[:, 2])
|
|
|
|
print()
|
|
|
|
# NumPy Statistical Functions with Example
|
|
|
|
np_normal_dis = np.random.normal(5, 0.5, 100)
|
|
print(np_normal_dis)
|
|
|
|
print()
|
|
|
|
# min, max, mean, median, sd
|
|
|
|
two_dimension_array = np.array([
|
|
(1, 2, 3),
|
|
(4, 5, 6),
|
|
(7, 8, 9)
|
|
])
|
|
print('min: ', two_dimension_array.min())
|
|
print('max: ', two_dimension_array.max())
|
|
print('mean: ', two_dimension_array.mean())
|
|
# print('median: ', two_dimension_array.median())
|
|
print('sd: ', two_dimension_array.std())
|
|
|
|
print()
|
|
|
|
print(two_dimension_array)
|
|
print('Column with minimum: ', np.amin(two_dimension_array, axis=0))
|
|
print('Column with maximum: ', np.amax(two_dimension_array, axis=0))
|
|
print('=== Row ==')
|
|
print('Row with minimum: ', np.amin(two_dimension_array, axis=1))
|
|
print('Row with maximum: ', np.amax(two_dimension_array, axis=1))
|
|
|
|
print()
|
|
|
|
# How to create repeating sequences?
|
|
|
|
a = [1, 2, 3]
|
|
|
|
# Repeat whole of 'a' two times
|
|
print('Tile: ', np.tile(a, 2))
|
|
|
|
# Repeat each element of 'a' two times
|
|
print('Repeat: ', np.repeat(a, 2))
|
|
|
|
print()
|
|
|
|
|
|
# How to generate random numbers?
|
|
|
|
# One random number between [0,1)
|
|
one_random_num = np.random.random()
|
|
one_random_in = np.random
|
|
print(one_random_num)
|
|
|
|
print()
|
|
|
|
# Random numbers between [0,1) of shape 2,3
|
|
r = np.random.random(size=[2, 3])
|
|
print(r)
|
|
|
|
print()
|
|
|
|
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10))
|
|
|
|
print()
|
|
|
|
# Random numbers between [0, 1] of shape 2, 2
|
|
rand = np.random.rand(2, 2)
|
|
print(rand)
|
|
|
|
print()
|
|
|
|
rand2 = np.random.randn(2, 2)
|
|
print(rand2)
|
|
|
|
print()
|
|
|
|
# Random integers between [0, 10) of shape 2,5
|
|
rand_int = np.random.randint(0, 10, size=[5, 3])
|
|
print(rand_int)
|
|
|
|
print()
|
|
|
|
|
|
# mean, standard deviation, number of samples
|
|
np_normal_dis = np.random.normal(5, 0.5, 1000)
|
|
np_normal_dis
|
|
# min, max, mean, median, sd
|
|
print('min: ', np.min(np_normal_dis))
|
|
print('max: ', np.max(np_normal_dis))
|
|
print('mean: ', np.mean(np_normal_dis))
|
|
print('median: ', np.median(np_normal_dis))
|
|
print('mode: ', stats.mode(np_normal_dis))
|
|
print('sd: ', np.std(np_normal_dis))
|
|
|
|
print()
|
|
|
|
plt.hist(np_normal_dis, color="grey", bins=21)
|
|
plt.show()
|
|
|
|
print()
|
|
|
|
|
|
# Linear algebra
|
|
|
|
# Dot product: product of two arrays
|
|
f = np.array([1, 2, 3])
|
|
g = np.array([4, 5, 3])
|
|
# 1*4+2*5 + 3*6
|
|
dot_product = np.dot(f, g)
|
|
print(dot_product)
|
|
|
|
print()
|
|
|
|
# Matmul: matruc product of two arrays
|
|
h = [[1, 2], [3, 4]]
|
|
i = [[5, 6], [7, 8]]
|
|
# 1*5+2*7 = 19
|
|
matmul = np.matmul(h, i)
|
|
print(matmul)
|
|
|
|
print()
|
|
|
|
# Determinant 2*2 matrix
|
|
# 5*8-7*6np.linalg.det(i)
|
|
matri = np.linalg.det(i)
|
|
print(matri)
|
|
|
|
print()
|
|
|
|
Z = np.zeros((8, 8))
|
|
Z[1::2, ::2] = 1
|
|
Z[::2, 1::2] = 1
|
|
print(Z)
|
|
|
|
print()
|
|
|
|
new_list = [x + 2 for x in range(0, 11)]
|
|
print(new_list)
|
|
|
|
print()
|
|
|
|
np_arr = np.array(range(0, 11))
|
|
np_arr + 2
|
|
print(np_arr)
|
|
|
|
print()
|
|
|
|
|
|
temp = np.array([1, 2, 3, 4, 5])
|
|
pressure = temp * 2 + 5
|
|
print(pressure)
|
|
|
|
plt.plot(temp, pressure)
|
|
plt.xlabel('Temperature in oC')
|
|
plt.ylabel('Pressure in atm')
|
|
plt.title('Temperature vs Pressure')
|
|
plt.xticks(np.arange(0, 6, step=0.5))
|
|
plt.show()
|
|
|
|
|
|
mu = 28
|
|
sigma = 15
|
|
samples = 100000
|
|
|
|
x = np.random.normal(mu, sigma, samples)
|
|
ax = sns.distplot(x)
|
|
ax.set(xlabel="x", ylabel='y')
|
|
plt.show()
|