Introduction
Often the execution speed of python is poor. Some people refuse to use python for this very reason, but there are several ways to optimize python code for both time and memory usage.
I would like to share several methods that help in real-life problems. I am using win10 x64.
Save memory with Python
Let's take a very real example as an example. Suppose we have some store in which there is a list of goods. So we needed to work with these goods. The best option is when all the goods are stored in the database, but suddenly something went wrong, and we decided to unload all the goods into memory in order to process them. And then a reasonable question arises: will we have enough memory to work with so many goods?
Let's first create a class responsible for our store. It will have only 2 fields: name and listGoods, which are responsible for the name of the store and the list of products, respectively.
class ShopClass:
def __init__(self, name=""):
self.name = name
self.listGoods = []
Now we want to fill the store with goods (namely, fill in the listGoods field). To do this, we will create a class responsible for information about one product (I use dataclass for such examples).
# dataclass,
# pip install dataclasses
#
# from dataclasses import dataclass
@dataclass
class DataGoods:
name:str
price:int
unit:str
. 200 3 :
shop = ShopClass("MyShop")
for _ in range(200):
shop.listGoods.extend([
DataGoods("", 20000, "RUB"),
DataGoods("", 45000, "RUB"),
DataGoods("", 2000, "RUB")
])
, ( pympler):
from pympler import asizeof
print(" :", asizeof.asizeof(shop))
>>> : 106648
, 106. , , , 600 , , , . , , , , , , . ( , ).
. Python , , . , python __dict__ , . , .
shop = ShopClass("MyShop")
print(shop.__dict__)
>>> {'name': 'MyShop', 'listGoods': []}
shop.city = ""
print(shop.__dict__)
>>> {'name': 'MyShop', 'listGoods': [], 'city': ''}
. , . pythonβe __slots__, __dict__. __dict__ , , . :
class ShopClass:
__slots__ = ("name", "listGoods")
def __init__(self, name=""):
self.name = name
self.listGoods = []
@dataclass
class DataGoods:
__slots__ = ("name", "price", "unit")
name:str
price:int
unit:str
.
from pympler import asizeof
print(" :", asizeof.asizeof(shop))
>>> : 43904
, , , 2.4 ( , Python, ). , . , , , :
shop = ShopClass("MyShop")
shop.city = ""
>>> AttributeError: 'ShopClass' object has no attribute 'city'
, - , __dict__ ptyhon' , . timeit, __slots__ (__dict__):
import timeit
code = """
class ShopClass:
#__slots__ = ("name", "listGoods")
def __init__(self, name=""):
self.name = name
self.listGoods = []
@dataclass
class DataGoods:
#__slots__ = ("name", "price", "unit")
name:str
price:int
unit:str
shop = ShopClass("MyShop")
for _ in range(200):
shop.listGoods.extend([
DataGoods("", 20000, "RUB"),
DataGoods("", 45000, "RUB"),
DataGoods("", 2000, "RUB")
])
"""
print(timeit.timeit(code, number=60000))
>>> 33.4812513
__slots__ (#__slots__ = ("name", "price", "unit") -> __slots__ = ("name", "price", "unit") # __slots__ = ("name", "listGoods") -> __slots__ = ("name", "listGoods")):
# __slots__
print(timeit.timeit(code, number=60000))
>>> 28.535005599999998
, 15% ( , ).
, , , .
python , (, ), C/C++ .
, .
Cython
Cython , Python, . , Python , ( 20.000.000 ):
import time
class ShopClass:
__slots__ = ("name", "listGoods")
def __init__(self, name=""):
self.name = name
self.listGoods = []
@dataclass
class DataGoods:
__slots__ = ("name", "price", "unit")
name: str
price: int
unit: str
shop = ShopClass("MyShop")
t = time.time()
for _ in range(200*100000):
shop.listGoods.extend([
DataGoods("", 20000, "RUB"),
DataGoods("", 45000, "RUB"),
DataGoods("", 2000, "RUB")
])
print(" PYTHON:", time.time()-t)
>>> PYTHON: 44.49887752532959
telephoneSum, televizorSum, tosterSum = 0, 0, 0
t = time.time()
for goods in shop.listGoods:
if goods.name == "":
telephoneSum += goods.price
elif goods.name == "":
televizorSum += goods.price
elif goods.name == "":
tosterSum += goods.price
print(" PYTHON:", time.time() - t)
>>> PYTHON: 13.135360717773438
, . cython. cython_npm (. ): pip install cython-npm. , cython_code cython_data.pyx ( cython .pyx).
cython:
cdef class CythonShopClass:
cdef str name
cdef list listGoods
def __init__(self, str name):
self.name = name
self.listGoods = []
cython , ( , , ). cdef < > < > . cython. my_def() cdef, def, python . .pyx (# cython: language_level=3).
# cython: language_level=3
#
cdef class CythonDataGoods:
cdef str name
cdef int price
cdef str unit
def __init__(self, str name, int price, str unit):
self.name = name
self.price = price
self.unit = unit
cdef int c_testFunc():
cdef CythonShopClass shop
cdef CythonDataGoods goods
cdef int i, t, telephoneSum, televizorSum, tosterSum
size, i, telephoneSum, televizorSum, tosterSum = 0, 0, 0, 0, 0
shop = CythonShopClass("MyShop")
t = time.time()
for i in range(200*100000):
shop.listGoods.extend([
CythonDataGoods("", 20000, "RUB"),
CythonDataGoods("", 45000, "RUB"),
CythonDataGoods("", 2000, "RUB")
])
print(" CYTHON:", time.time()-t)
t = time.time()
for goods in shop.listGoods:
if goods.name == "":
telephoneSum += goods.price
elif goods.name == "":
televizorSum += goods.price
elif goods.name == "":
tosterSum += goods.price
print(" CYTHON:", time.time() - t)
return 0
def my_def():
data = c_testFunc()
return data
main.py cython . :
from cython_npm.cythoncompile import export
from cython_npm.cythoncompile import install
import time
cython python
export('cython_code/cython_data.pyx')
import cython_code.cython_data as cython_data
cython
if __name__ == "__main__":
a = cython_data.my_def()
. , . cython, , :
>>> CYTHON: 4.082242012023926
:
>>> CYTHON: 1.0513946056365967
, 44 4 , 11 . 13 1 , 13 .
, cython - , , , . , , cython 100 .
Python
, - , . , , . , , . :
shop = ShopClass("MyShop")
t = time.time()
getGoods = lambda index: {0: ("", 20000, "RUB"),
1: ("", 45000, "RUB"),
2:("", 2000, "RUB")}.get(index)
shop.listGoods = [DataGoods(*getGoods(i%3)) for i in range(200*100000)]
print(" PYTHON:", time.time()-t)
>>> PYTHON: 19.719463109970093
2 , python. python - , , .
PyPy
, cython , ( ), . , . PyPy, python, JIT . PyPy , , . python PyPy .
PyPy . , cmd , pypy3.exe, . cmd :
, 19 pythonβ 4.5 , 4 .
. , , python , .
. Numba, NumPy, Nim multiprocessing. , . , python .
Before proceeding with the choice of functionality for optimizing the code, it is necessary to carry out internal optimization of the code in pure python, to get rid of loops in loops in loops in a loop to the maximum, clean up memory with your hands and remove unnecessary elements during the code execution. Do not expect that rewriting your code to another language will solve all your problems, learn to look for bottlenecks in the code and optimize them algorithmically or using tricks of the language itself.