Some issues related with Unified State Examination in Informatics in Russian Federation: April 2022

Friday, April 29, 2022

PyTorch neural networks to predict match results in soccer championships (Python)

(.env) [boris@fedora35server SOCCER]$ cat soccerTorchPredict.py

import pandas

filepath = "./training_2010.csv"

df = pandas.read_csv(filepath)

print(df) # print to check

extract = [5, 6, 7, 8, 9, 10, 13, 14, 16, 17, 18, 20, 21, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 38, 47, 48, 49]

df = df.iloc[:, extract] # df with the desired columns (features)

print(df) # print to the check

training = df.iloc[:-20] #select all the rows except for the last 20

test = df.iloc[-20:] #select the last 20 rows

print(training) #print to check

print(test) #print to check

# normalize the data between 0 and 1

for e in range(len(training.columns) - 3): #iterate for each column

num = max(training.iloc[:, e].max(), test.iloc[:, e].max()) #check the maximum value in each column

if num < 10:

training.iloc[:, e] /= 10

test.iloc[:, e] /= 10

elif num < 100:

training.iloc[:, e] /= 100

test.iloc[:, e] /= 100

elif num < 1000:

training.iloc[:, e] /= 1000

test.iloc[:, e] /= 1000

else:

print("Error in normalization! Please check!")

print(training) #print to check

print(test) #print to check

training = training.sample(frac=1) #shuffle the training data

test = test.sample(frac=1) #shuffle the test data

print(training) #print to check

print(test) #print to check

#all rows, all columns except for the last 3 columns

training_input = training.iloc[:, :-3]

#all rows, the last 3 columns

training_output = training.iloc[:, -3:]

#all rows, all columns except for the last 3 columns

test_input = test.iloc[:, :-3]

#all rows, the last 3 columns

test_output = test.iloc[:, -3:]

print(test_input) #print to check

print(test_output) #print to check

# separating the output into two classes: win or draw-defeat

# for the winners convert the output:

# from (1, 0, 0) to 1

# from (0, 1, 0) to 0

# from (0, 0, 1) to 0

def convert_output_win(source):

target = source.copy() # make a copy from source

target['new'] = 2 # create a new column with any value

for i, rows in target.iterrows():

if rows['win'] == 1:

rows['new'] = 1

if rows['draw'] == 1:

rows['new'] = 0

if rows['defeat'] == 1:

rows['new'] = 0

return target.iloc[:, -1] # return all rows, the last column

training_output = convert_output_win(training_output)

test_output = convert_output_win(test_output)

import torch

class Net(torch.nn.Module):

def __init__(self, input_size, hidden_size):

super(Net, self).__init__()

self.input_size = input_size

self.hidden_size = hidden_size

self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)

self.relu = torch.nn.ReLU()

self.fc2 = torch.nn.Linear(self.hidden_size, 1)

self.sigmoid = torch.nn.Sigmoid()

def forward(self, x):

hidden = self.fc1(x)

relu = self.relu(hidden)

output = self.fc2(relu)

output = self.sigmoid(output)

return output

#convert to tensors

training_input = torch.FloatTensor(training_input.values)

training_output = torch.FloatTensor(training_output.values)

test_input = torch.FloatTensor(test_input.values)

test_output = torch.FloatTensor(test_output.values)

input_size = training_input.size()[1] # number of features selected

hidden_size = 30 # number of nodes/neurons in the hidden layer

model = Net(input_size, hidden_size) # create the model

criterion = torch.nn.BCELoss() # works for binary classification

# without momentum parameter

optimizer = torch.optim.SGD(model.parameters(), lr = 0.9)

# with momentum parameter

optimizer = torch.optim.SGD(model.parameters(), lr = 0.9, momentum=0.2)

model.eval()

y_pred = model(test_input)

before_train = criterion(y_pred.squeeze(), test_output)

print('Test loss before training' , before_train.item())

# Training model

model.train()

epochs = 5000

errors = []

for epoch in range(epochs):

optimizer.zero_grad()

# Forward pass

y_pred = model(training_input)

# Compute Loss

loss = criterion(y_pred.squeeze(), training_output)

errors.append(loss.item())

print('Epoch {}: train loss: {}'.format(epoch, loss.item()))

# Backward pass

loss.backward()

optimizer.step()

model.eval()

y_pred = model(test_input)

after_train = criterion(y_pred.squeeze(), test_output)

print('Test loss after Training' , after_train.item())

import matplotlib.pyplot as plt

import numpy as np

def plotcharts(errors):

errors = np.array(errors)

plt.figure(figsize=(12, 5))

graf02 = plt.subplot(1, 2, 1) # nrows, ncols, index

graf02.set_title('Errors')

plt.plot(errors, '-')

plt.xlabel('Epochs')

graf03 = plt.subplot(1, 2, 2)

graf03.set_title('Tests')

a = plt.plot(test_output.numpy(), 'yo', label='Real')

plt.setp(a, markersize=10)

a = plt.plot(y_pred.detach().numpy(), 'b+', label='Predicted')

plt.setp(a, markersize=10)

plt.legend(loc=7)

plt.show()

plotcharts(errors)

Эти диаграммы очень просты и предназначены только для облегчения визуализации результатов. Мы не собираемся использовать диаграммы для подтверждения или опровержения эффективности модели.

Первая диаграмма - это прогресс ошибок для каждой эпохи обучения. Посмотрите, как она уменьшается в процессе обучения. Вторая диаграмма представляет реальные значения (Победа при x = 1, Ничья-Поражение при x = 0) желтыми кружками; и прогнозируемые значения с синими крестами. Чем ближе синий крестик к желтому кругу, тем точнее этот прогноз. Если синий крест находится внутри желтого круга или очень близко к нему, это означает, что модель правильно предсказала результат. Другими словами, если синий крест находится далеко от желтого круга, это означает, что модель сделала плохой прогноз относительно этого матча.

References

https://medium.com/@andreluiz_4916/pytorch-neural-networks-to-predict-matches-results-in-soccer-championships-part-i-a6d0eefeca51

Wednesday, April 27, 2022

Cross-Entropy, Negative Log-Likelihood, and All That Jazz

Численный эксперимент

Чтобы понять разницу между CrossEntropyLoss и NLLLoss (и BCELoss и т. д.), я разработал небольшой численный эксперимент следующим образом.

В двоичной настройке я сначала генерирую случайный вектор (z) размера пять из нормального распределения и вручную создаю вектор метки (y) той же формы с элементами либо нулями, либо единицами. Затем я вычисляю предсказанные вероятности (y_hat) на основе z, используя softmax (строка 8). В строке 13 я применяю формулу отрицательного логарифмического правдоподобия, полученную в предыдущем разделе, для вычисления ожидаемого отрицательного логарифмического значения правдоподобия в этом случае. Используя BCELoss с y_hat в качестве входных данных и BCEWithLogitLoss с z в качестве входных данных, я получаю те же результаты, что и вычисленные выше.

В настройке мультикласса я генерирую z2, y2 и вычисляю yhat2 с помощью функции softmax. На этот раз NLLLoss с логарифмическими вероятностями (log of yhat2) в качестве входных данных и CrossEntropyLoss с необработанными значениями прогноза (z) в качестве входных данных дают те же результаты, вычисленные с использованием формулы, полученной ранее.

(.env) [boris@fedora35server LOSS]$ cat lossNegative1.py

import torch

torch.manual_seed(0)

# Binary setting

#########################

print(f"{'Setting up binary case':-^80}")

z = torch.randn(5)

yhat = torch.sigmoid(z)

y = torch.Tensor([0, 1, 1, 0, 1])

print(f"{z=}\n{yhat=}\n{y=}\n{'':-^80}")

# First compute the negative log likelihoods using the derived formula

l = -(y * yhat.log() + (1 - y) * (1 - yhat).log())

print(f"{l}")

# Observe that BCELoss and BCEWithLogitsLoss can produce the same results

l_BCELoss_nored = torch.nn.BCELoss(reduction="none")(yhat, y)

l_BCEWithLogitsLoss_nored = torch.nn.BCEWithLogitsLoss(reduction="none")(z, y)

print(f"{l_BCELoss_nored}\n{l_BCEWithLogitsLoss_nored}\n{'':=^80}")

# Multiclass setting

#################

print(f"{'Setting up multiclass case':-^80}")

z2 = torch.randn(5, 3)

yhat2 = torch.softmax(z2, dim=-1)

y2 = torch.Tensor([0, 2, 1, 1, 0]).long()

print(f"{z2=}\n{yhat2=}\n{y2=}\n{'':-^80}")

# First compute the negative log likelihoods using the derived formulat

l2 = -yhat2.log()[torch.arange(5), y2] # masking the correct entries

print(f"{l2}")

print(-torch.log_softmax(z2, dim=-1)[torch.arange(5), y2])

l2_NLLLoss_nored = torch.nn.NLLLoss(reduction="none")(yhat2.log(), y2)

l2_CrossEntropyLoss_nored = torch.nn.CrossEntropyLoss(reduction="none")(z2, y2)

print(f"{l2_NLLLoss_nored}\n{l2_CrossEntropyLoss_nored}\n{'':=^80}")

**********

Runtime

***********

(.env) [boris@fedora35server LOSS]$ python lossNegative1.py

-----------------------------Setting up binary case-----------------------------

z=tensor([ 1.5410, -0.2934, -2.1788, 0.5684, -1.0845])

yhat=tensor([0.8236, 0.4272, 0.1017, 0.6384, 0.2527])

y=tensor([0., 1., 1., 0., 1.])

--------------------------------------------------------------------------------

tensor([1.7351, 0.8506, 2.2860, 1.0172, 1.3757])

===============================================

---------------------------Setting up multiclass case---------------------------

z2=tensor([[-1.3986, 0.4033, 0.8380],

[-0.7193, -0.4033, -0.5966],

[ 0.1820, -0.8567, 1.1006],

[-1.0712, 0.1227, -0.5663],

[ 0.3731, -0.8920, -1.5091]])

yhat2=tensor([[0.0609, 0.3691, 0.5700],

[0.2856, 0.3916, 0.3228],

[0.2591, 0.0917, 0.6492],

[0.1679, 0.5540, 0.2781],

[0.6971, 0.1967, 0.1061]])

y2=tensor([0, 2, 1, 1, 0])

--------------------------------------------------------------------------------

tensor([2.7987, 1.1307, 2.3893, 0.5906, 0.3608])

================================================

References

https://towardsdatascience.com/cross-entropy-negative-log-likelihood-and-all-that-jazz-47a95bd2e81#

Sunday, April 24, 2022

Upgrade Ubuntu 20.04 to 22.04 right after release

Unofficial hack

$ sudo apt update

$ sudo apt upgrade

$ sudo reboot

$ sudo apt dist-upgrade

$ sudo update-manager -d

On bare metal

Source Lxer.com

Wednesday, April 20, 2022

Fast ALTER TABLE ADD COLUMN with a non-NULL default

https://googleweblight.com/sp?hl=ru-RU&geid=NSTN&u=https://www.depesz.com/2018/04/04/waiting-for-postgresql-11-fast-alter-table-add-column-with-a-non-null-default/

https://googleweblight.com/sp?hl=ru-RU&geid=NSTN&u=https://git.postgresql.org/pg/commitdiff/16828d5c0273b4fe5f10f42588005f16b415b2d8

Tuesday, April 19, 2022

How To Delete Duplicate Rows in MySQL (MariaDB)

Тестировано на Fedora 35 Server

Case 1

1. Пример удаления дублей на Mariadb Server version: 10.5.13-MariaDB MariaDB Server

=====================

MariaDB [testcte]> CREATE TABLE contacts (

-> id INT PRIMARY KEY AUTO_INCREMENT,

-> first_name VARCHAR(50) NOT NULL,

-> last_name VARCHAR(50) NOT NULL,

-> email VARCHAR(255) NOT NULL

-> );

Query OK, 0 rows affected (0.010 sec)

MariaDB [testcte]> INSERT INTO contacts (first_name,last_name,email)

-> VALUES ('Carine ','Schmitt','carine.schmitt@verizon.net'),

-> ('Jean','King','jean.king@me.com'),

-> ('Peter','Ferguson','peter.ferguson@google.com'),

-> ('Janine ','Labrune','janine.labrune@aol.com'),

-> ('Jonas ','Bergulfsen','jonas.bergulfsen@mac.com'),

-> ('Janine ','Labrune','janine.labrune@aol.com'),

-> ('Susan','Nelson','susan.nelson@comcast.net'),

-> ('Zbyszek ','Piestrzeniewicz','zbyszek.piestrzeniewicz@att.net'),

-> ('Roland','Keitel','roland.keitel@yahoo.com'),

-> ('Julie','Murphy','julie.murphy@yahoo.com'),

-> ('Kwai','Lee','kwai.lee@google.com'),

-> ('Jean','King','jean.king@me.com'),

-> ('Susan','Nelson','susan.nelson@comcast.net'),

-> ('Roland','Keitel','roland.keitel@yahoo.com');

Query OK, 14 rows affected (0.002 sec)

Records: 14 Duplicates: 0 Warnings: 0

MariaDB [testcte]> SELECT * FROM contacts

-> ORDER BY email;

+----+------------+-----------------+---------------------------------+

| id | first_name | last_name | email |

+----+------------+-----------------+---------------------------------+

| 1 | Carine | Schmitt | carine.schmitt@verizon.net |

| 4 | Janine | Labrune | janine.labrune@aol.com |

| 6 | Janine | Labrune | janine.labrune@aol.com |

| 2 | Jean | King | jean.king@me.com |

| 12 | Jean | King | jean.king@me.com |

| 5 | Jonas | Bergulfsen | jonas.bergulfsen@mac.com |

| 10 | Julie | Murphy | julie.murphy@yahoo.com |

| 11 | Kwai | Lee | kwai.lee@google.com |

| 3 | Peter | Ferguson | peter.ferguson@google.com |

| 9 | Roland | Keitel | roland.keitel@yahoo.com |

| 14 | Roland | Keitel | roland.keitel@yahoo.com |

| 7 | Susan | Nelson | susan.nelson@comcast.net |

| 13 | Susan | Nelson | susan.nelson@comcast.net |

| 8 | Zbyszek | Piestrzeniewicz | zbyszek.piestrzeniewicz@att.net |

+----+------------+-----------------+---------------------------------+

14 rows in set (0.001 sec)

MariaDB [testcte]> SELECT

-> email, COUNT(email)

-> FROM

-> contacts

-> GROUP BY

-> email

-> HAVING

-> COUNT(email) > 1;

+--------------------------+--------------+

| email | COUNT(email) |

+--------------------------+--------------+

| janine.labrune@aol.com | 2 |

| jean.king@me.com | 2 |

| roland.keitel@yahoo.com | 2 |

| susan.nelson@comcast.net | 2 |

+--------------------------+--------------+

4 rows in set (0.001 sec)

======================

Duplicates delete SQL

======================

MariaDB [testcte]> DELETE t1 FROM contacts t1

-> INNER JOIN contacts t2

-> WHERE

-> t1.id < t2.id AND

-> t1.email = t2.email;

Query OK, 4 rows affected (0.003 sec)

MariaDB [testcte]> commit;

Query OK, 0 rows affected (0.000 sec)

MariaDB [testcte]> SELECT

-> email,

-> COUNT(email)

-> FROM

-> contacts

-> GROUP BY

-> email

-> HAVING

-> COUNT(email) > 1;

Empty set (0.001 sec)

MariaDB [testcte]> SELECT * FROM contacts;

+----+------------+-----------------+---------------------------------+

| id | first_name | last_name | email |

+----+------------+-----------------+---------------------------------+

| 1 | Carine | Schmitt | carine.schmitt@verizon.net |

| 3 | Peter | Ferguson | peter.ferguson@google.com |

| 5 | Jonas | Bergulfsen | jonas.bergulfsen@mac.com |

| 6 | Janine | Labrune | janine.labrune@aol.com |

| 8 | Zbyszek | Piestrzeniewicz | zbyszek.piestrzeniewicz@att.net |

| 10 | Julie | Murphy | julie.murphy@yahoo.com |

| 11 | Kwai | Lee | kwai.lee@google.com |

| 12 | Jean | King | jean.king@me.com |

| 13 | Susan | Nelson | susan.nelson@comcast.net |

| 14 | Roland | Keitel | roland.keitel@yahoo.com |

+----+------------+-----------------+---------------------------------+

10 rows in set (0.000 sec)

*********

Case 2

*********

MariaDB [testcte]> use testdups ;

Database changed

MariaDB [testdups]> DROP TABLE IF EXISTS contacts;

Query OK, 0 rows affected, 1 warning (0.000 sec)

MariaDB [testdups]>

MariaDB [testdups]> CREATE TABLE contacts (

-> id INT PRIMARY KEY AUTO_INCREMENT,

-> first_name VARCHAR(50) NOT NULL,

-> last_name VARCHAR(50) NOT NULL,

-> email VARCHAR(255) NOT NULL

-> );

Query OK, 0 rows affected (0.006 sec)

MariaDB [testdups]> INSERT INTO contacts (first_name,last_name,email)

-> VALUES ('Carine ','Schmitt','carine.schmitt@verizon.net'),

-> ('Jean','King','jean.king@me.com'),

-> ('Peter','Ferguson','peter.ferguson@google.com'),

-> ('Janine ','Labrune','janine.labrune@aol.com'),

-> ('Jonas ','Bergulfsen','jonas.bergulfsen@mac.com'),

-> ('Janine ','Labrune','janine.labrune@aol.com'),

-> ('Susan','Nelson','susan.nelson@comcast.net'),

-> ('Zbyszek ','Piestrzeniewicz','zbyszek.piestrzeniewicz@att.net'),

-> ('Roland','Keitel','roland.keitel@yahoo.com'),

-> ('Julie','Murphy','julie.murphy@yahoo.com'),

-> ('Kwai','Lee','kwai.lee@google.com'),

-> ('Jean','King','jean.king@me.com'),

-> ('Susan','Nelson','susan.nelson@comcast.net'),

-> ('Roland','Keitel','roland.keitel@yahoo.com');

Query OK, 14 rows affected (0.002 sec)

Records: 14 Duplicates: 0 Warnings: 0

MariaDB [testdups]> SELECT id, email, ROW_NUMBER() OVER (PARTITION BY email ORDER BY email ) AS row_num

-> FROM contacts;

+----+---------------------------------+---------+

| id | email | row_num |

+----+---------------------------------+---------+

| 1 | carine.schmitt@verizon.net | 1 |

| 4 | janine.labrune@aol.com | 1 |

| 6 | janine.labrune@aol.com | 2 |

| 2 | jean.king@me.com | 1 |

| 12 | jean.king@me.com | 2 |

| 5 | jonas.bergulfsen@mac.com | 1 |

| 10 | julie.murphy@yahoo.com | 1 |

| 11 | kwai.lee@google.com | 1 |

| 3 | peter.ferguson@google.com | 1 |

| 9 | roland.keitel@yahoo.com | 1 |

| 14 | roland.keitel@yahoo.com | 2 |

| 7 | susan.nelson@comcast.net | 1 |

| 13 | susan.nelson@comcast.net | 2 |

| 8 | zbyszek.piestrzeniewicz@att.net | 1 |

+----+---------------------------------+---------+

14 rows in set (0.001 sec)

MariaDB [testdups]> SELECT id FROM (SELECT id, ROW_NUMBER() OVER ( PARTITION BY email ORDER BY email) AS row_num FROM contacts ) t WHERE row_num > 1;

+----+

| id |

+----+

| 6 |

| 12 |

| 14 |

| 13 |

+----+

4 rows in set (0.001 sec)

MariaDB [testdups]> DELETE FROM contacts

-> WHERE id IN (SELECT id FROM (SELECT id, ROW_NUMBER() OVER ( PARTITION BY email ORDER BY email) AS row_num FROM contacts) t

-> WHERE row_num > 1 );

Query OK, 4 rows affected (0.002 sec)

MariaDB [testdups]> select * from contacts ;

+----+------------+-----------------+---------------------------------+

| id | first_name | last_name | email |

+----+------------+-----------------+---------------------------------+

| 1 | Carine | Schmitt | carine.schmitt@verizon.net |

| 2 | Jean | King | jean.king@me.com |

| 3 | Peter | Ferguson | peter.ferguson@google.com |

| 4 | Janine | Labrune | janine.labrune@aol.com |

| 5 | Jonas | Bergulfsen | jonas.bergulfsen@mac.com |

| 7 | Susan | Nelson | susan.nelson@comcast.net |

| 8 | Zbyszek | Piestrzeniewicz | zbyszek.piestrzeniewicz@att.net |

| 9 | Roland | Keitel | roland.keitel@yahoo.com |

| 10 | Julie | Murphy | julie.murphy@yahoo.com |

| 11 | Kwai | Lee | kwai.lee@google.com |

+----+------------+-----------------+---------------------------------+

10 rows in set (0.001 sec)

References

https://www.mysqltutorial.org/mysql-delete-duplicate-rows/

Monday, April 18, 2022

Implementing Gradient Descent for multilinear regression from scratch

Code 1

(.env) [boris@fedora35server MULTITARGET]$ cat lossMultiBoston.py

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from tqdm import tqdm

import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn import datasets

boston = datasets.load_boston()

X = boston.data

Y = boston.target

print(X.shape)

from sklearn.preprocessing import StandardScaler

sc=StandardScaler()

X_transform=sc.fit_transform(X)

def predicted_y(weight,x,intercept):

y_lst=[]

for i in range(len(x)):

y_lst.append(weight@x[i]+intercept)

return np.array(y_lst)

# linear loss

def loss(y,y_predicted):

n=len(y)

s=0

for i in range(n):

s+=(y[i]-y_predicted[i])**2

return (1/n)*s

#derivative of loss w.r.t weight

def dldw(x,y,y_predicted):

s=0

n=len(y)

for i in range(n):

s+=-x[i]*(y[i]-y_predicted[i])

return (2/n)*s

# derivative of loss w.r.t bias

def dldb(y,y_predicted):

n=len(y)

s=0

for i in range(len(y)):

s+=-(y[i]-y_predicted[i])

return (2/n) * s

def gradient_descent(x,y):

weight_vector=np.random.randn(x.shape[1])

intercept=0

epoch = 2000

n = len(x)

linear_loss=[]

learning_rate = 0.001

for i in range(epoch):

y_predicted = predicted_y(weight_vector,x,intercept)

weight_vector = weight_vector - learning_rate *dldw(x,y,y_predicted)

intercept = intercept - learning_rate * dldb(y,y_predicted)

linear_loss.append(loss(y,y_predicted))

print(i)

plt.plot(np.arange(1,epoch),linear_loss[1:])

plt.xlabel("number of epoch")

plt.ylabel("loss")

plt.show()

return weight_vector,intercept

w,b=gradient_descent(X_transform,Y)

print("weight:",w)

print("bias:",b)

def predict(inp):

y_lst=[]

for i in range(len(inp)):

y_lst.append(w@inp[i]+b)

return np.array(y_lst)

y_pred=predict(X_transform)

df_pred=pd.DataFrame()

df_pred["y_actual"]=Y

df_pred["y_predicted"]=np.round(y_pred,1)

print(df_pred)

Вышеприведенный рисунок представляет собой график между потерями и номером эпохи.

После каждой эпохи потери уменьшаются.Первоначально потери резко уменьшаются до эпохи 1000. После эпохи 1000 наблюдается минимальное снижение потерь.Это показывает, что мы достигли глобального минимума.

References

https://medium.com/analytics-vidhya/implementing-gradient-descent-for-multi-linear-regression-from-scratch-3e31c114ae12

How to convert images to dataset Python

Code 1

(.env) [boris@fedora35server PILLOW]$ cat imageShow.py

# load and display an image with Matplotlib

from matplotlib import image

from matplotlib import pyplot

# load image as pixel array

image = image.imread('coala.jpeg')

# summarize shape of the pixel array

print(image.dtype)

print(image.shape)

# display the array of pixels as an image

pyplot.imshow(image)

pyplot.show()

Code 2

(.env) [boris@fedora35server PILLOW]$ cat imageToNDArray.py

from PIL import Image

from numpy import savetxt

from numpy import savez_compressed

from numpy import asarray

# load the image

image = Image.open('coala.jpeg')

# convert image to numpy array

data = asarray(image)

print(type(data))

print(data)

# unload 3D array to files via reshape

savetxt("data.txt", data.reshape((3,-1)), fmt="%s", header=str(data.shape))

savez_compressed("data.npz", data.reshape((3,-1)), fmt="%s", header=str(data.shape))

print("Both Unloads data done")

# summarize shape

print(data.shape)

# create Pillow image

image2 = Image.fromarray(data)

print(type(image2))

# summarize image details

print(image2.mode)

print(image2.size)

(.env) [boris@fedora35server PILLOW]$ python imageToNDArray.py

<class 'numpy.ndarray'>

[[[ 25 26 12]

[ 25 26 12]

...

[113 106 52]

[114 107 53]]

[[ 25 26 12]

[ 25 26 12]

...

[113 106 52]

[114 107 53]]

[[ 25 26 12]

[ 25 26 12]

...

[112 105 51]

[113 106 52]

[113 106 52]]

...

[[139 118 71]

[136 115 68]

[130 109 62]

...

[ 67 91 91]

[ 68 89 90]

[ 66 88 86]]

[[150 128 81]

[143 121 74]

[134 111 67]

...

[ 73 93 91]

[ 69 89 87]

[ 65 85 83]]

[[151 127 81]

[144 120 74]

[135 111 67]

...

[ 74 93 91]

[ 70 89 85]

[ 66 85 81]]]

(450, 800, 3)

<class 'PIL.Image.Image'>

RGB

(800, 450)

Можно получить CSV сразу через 2-ух цветовую гамму

(.env) [boris@fedora35server PILLOW]$ cat imageToCSV.py

from PIL import Image

import numpy as np

import sys

import os

import csv

#Useful function

def createFileList(myDir, format='.jpeg'):

fileList = []

print(myDir)

for root, dirs, files in os.walk(myDir, topdown=False):

for name in files:

if name.endswith(format):

fullName = os.path.join(root, name)

fileList.append(fullName)

return fileList

# load the original image

myFileList = createFileList('./')

for file in myFileList:

print(file)

img_file = Image.open(file)

# img_file.show()

# get original image parameters...

width, height = img_file.size

format = img_file.format

mode = img_file.mode

# Make image Greyscale

img_grey = img_file.convert('L')

img_grey.save('result.png')

img_grey.show()

# Save Greyscale values

value = np.asarray(img_grey.getdata(), dtype=int).reshape((img_grey.size[1], img_grey.size[0]))

value = value.flatten()

print(value)

with open("img_pixels.csv", 'a') as f:

writer = csv.writer(f)

writer.writerow(value)

(.env) [boris@fedora35server PILLOW]$ python imageToCSV.py

./coala.jpeg

[24 24 24 ... 87 83 79]

(.env) [boris@fedora35server PILLOW]$ ll

total 4680

-rw-r--r--. 1 boris boris 44769 Apr 18 10:42 coala.jpeg

-rw-rw-r--. 1 boris boris 318 Apr 18 10:49 imageShow.py

-rw-rw-r--. 1 boris boris 1071 Apr 18 13:44 imageToCSV.py

-rw-rw-r--. 1 boris boris 691 Apr 18 13:33 imageToNDArray.py

-rw-rw-r--. 1 boris boris 2514008 Apr 18 13:52 img_pixels.csv

-rw-rw-r--. 1 boris boris 144709 Apr 18 13:52 result.png

Преобразование изображения с помощью Keras API

(.env) [boris@fedora35server PILLOW]$ cat imageConvTF.py

from tensorflow.keras.preprocessing.image import load_img

from tensorflow.keras.preprocessing.image import img_to_array

from tensorflow.keras.preprocessing.image import array_to_img

from tensorflow.keras.preprocessing.image import save_img

# load the image

img = load_img('coala.jpeg')

print("Orignal:" ,type(img))

print(type(img))

print(img.format)

print(img.mode)

print(img.size)

img.show()

# convert to numpy array

img_array = img_to_array(img)

print("NumPy array info:")

print(type(img_array))

print("type:",img_array.dtype)

print("shape:",img_array.shape)

print(img_array)

# convert back to image

img_pil = array_to_img(img_array)

print("converting NumPy array:",type(img_pil))

References

https://www.pluralsight.com/guides/importing-image-data-into-numpy-arrays