Python & time and memory optimization

Introduction

Often the execution speed of python is poor. Some people refuse to use python for this very reason, but there are several ways to optimize python code for both time and memory usage. 





I would like to share several methods that help in real-life problems. I am using win10 x64.





Save memory with Python

Let's take a very real example as an example. Suppose we have some store in which there is a list of goods. So we needed to work with these goods. The best option is when all the goods are stored in the database, but suddenly something went wrong, and we decided to unload all the goods into memory in order to process them. And then a reasonable question arises: will we have enough memory to work with so many goods?





Let's first create a class responsible for our store. It will have only 2 fields: name and listGoods, which are responsible for the name of the store and the list of products, respectively.





class ShopClass:
    def __init__(self, name=""):
        self.name = name
        self.listGoods = []
      
      



Now we want to fill the store with goods (namely, fill in the listGoods field). To do this, we will create a class responsible for information about one product (I use dataclass for such examples).





#    dataclass,   
# pip install dataclasses
#     
# from dataclasses import dataclass 
@dataclass
class DataGoods:
    name:str
    price:int
    unit:str
      
      



. 200 3 :





shop = ShopClass("MyShop")
for _ in range(200):
    shop.listGoods.extend([
        DataGoods("", 20000, "RUB"),
        DataGoods("", 45000, "RUB"),
        DataGoods("", 2000, "RUB")
    ])
      
      



, ( pympler):





from pympler import asizeof
print(" :", asizeof.asizeof(shop))
>>>  : 106648
      
      



, 106. , , , 600 , , , . , , , , , , . ( , ).





. Python , , . , python __dict__ , . , .





shop = ShopClass("MyShop")
print(shop.__dict__)  
>>> {'name': 'MyShop', 'listGoods': []}

shop.city = ""
print(shop.__dict__) 
>>> {'name': 'MyShop', 'listGoods': [], 'city': ''}
      
      



. , . python’e __slots__, __dict__. __dict__ , , . :





class ShopClass:
    __slots__ = ("name", "listGoods")
    def __init__(self, name=""):
        self.name = name
        self.listGoods = []
@dataclass
class DataGoods:
    __slots__ = ("name", "price", "unit")
    name:str
    price:int
    unit:str
      
      



.





from pympler import asizeof
print(" :", asizeof.asizeof(shop))
>>>  : 43904
      
      



, , , 2.4 ( ,  Python, ). , . , , , :





shop = ShopClass("MyShop")
shop.city = ""
>>> AttributeError: 'ShopClass' object has no attribute 'city'
      
      



, - , __dict__ ptyhon' , . timeit, __slots__ (__dict__):





import timeit
code = """
class ShopClass:
    #__slots__ = ("name", "listGoods")
    def __init__(self, name=""):
        self.name = name
        self.listGoods = []
@dataclass
class DataGoods:
    #__slots__ = ("name", "price", "unit")
    name:str
    price:int
    unit:str
shop = ShopClass("MyShop")
for _ in range(200):
    shop.listGoods.extend([
        DataGoods("", 20000, "RUB"),
        DataGoods("", 45000, "RUB"),
        DataGoods("", 2000, "RUB")
])
"""
print(timeit.timeit(code, number=60000))
>>> 33.4812513
      
      



__slots__ (#__slots__ = ("name", "price", "unit") -> __slots__ = ("name", "price", "unit") # __slots__ = ("name", "listGoods") -> __slots__ = ("name", "listGoods")):





#  __slots__   
print(timeit.timeit(code, number=60000))
>>> 28.535005599999998
      
      



, 15% ( , ).





, , , .





python , (, ), C/C++ .





, .





Cython

Cython , Python, . , Python , ( 20.000.000 ):





import time
class ShopClass:
   __slots__ = ("name", "listGoods")
   def __init__(self, name=""):
      self.name = name
      self.listGoods = []
@dataclass
class DataGoods:
   __slots__ = ("name", "price", "unit")
   name: str
   price: int
   unit: str
shop = ShopClass("MyShop")
t = time.time()
for _ in range(200*100000):
   shop.listGoods.extend([
      DataGoods("", 20000, "RUB"),
      DataGoods("", 45000, "RUB"),
      DataGoods("", 2000, "RUB")
   ])
print("   PYTHON:", time.time()-t)
>>>    PYTHON: 44.49887752532959
telephoneSum, televizorSum, tosterSum = 0, 0, 0
t = time.time()
for goods in shop.listGoods:
   if goods.name == "":
      telephoneSum += goods.price
   elif goods.name == "":
      televizorSum += goods.price
   elif goods.name == "":
      tosterSum += goods.price
print("    PYTHON:", time.time() - t)
>>>     PYTHON: 13.135360717773438
      
      



, . cython. cython_npm (. ): pip install cython-npm. , cython_code cython_data.pyx ( cython .pyx).





cython:





cdef class CythonShopClass:
   cdef str name
   cdef list listGoods

   def __init__(self, str name):
       self.name = name
       self.listGoods = []
      
      



cython , ( , , ). cdef < > < > . cython. my_def() cdef, def, python . .pyx (# cython: language_level=3).





# cython: language_level=3
#      
cdef class CythonDataGoods:
   cdef str name
   cdef int price
   cdef str unit
   def __init__(self, str name, int price, str unit):
       self.name = name
       self.price = price
       self.unit = unit
cdef int c_testFunc():
    cdef CythonShopClass shop
    cdef CythonDataGoods goods
    cdef int i, t, telephoneSum, televizorSum, tosterSum
    size, i, telephoneSum, televizorSum, tosterSum = 0, 0, 0, 0, 0
    shop = CythonShopClass("MyShop")
    t = time.time()
    for i in range(200*100000):
       shop.listGoods.extend([
           CythonDataGoods("", 20000, "RUB"),
           CythonDataGoods("", 45000, "RUB"),
           CythonDataGoods("", 2000, "RUB")
       ])
    print("   CYTHON:", time.time()-t)
    t = time.time()
    for goods in shop.listGoods:
        if goods.name == "":
            telephoneSum += goods.price
        elif goods.name == "":
            televizorSum += goods.price
        elif goods.name == "":
            tosterSum += goods.price
    print("    CYTHON:", time.time() - t)
    return 0
def my_def():
    data = c_testFunc()
    return data
      
      



main.py cython . :





from cython_npm.cythoncompile import export
from cython_npm.cythoncompile import install
import time
      
      



cython python





export('cython_code/cython_data.pyx')
import cython_code.cython_data as cython_data
      
      



cython





if __name__ == "__main__":
   a = cython_data.my_def()
      
      



. , . cython, , :





>>>    CYTHON: 4.082242012023926
      
      



:





>>>     CYTHON: 1.0513946056365967
      
      



, 44 4 , 11 . 13 1 , 13 .





, cython - , , , . , , cython 100 .





Python

, - , . , , . , , . :





shop = ShopClass("MyShop")
t = time.time()
getGoods = lambda index: {0: ("", 20000, "RUB"), 
                          1: ("", 45000, "RUB"), 
                          2:("", 2000, "RUB")}.get(index) 
shop.listGoods = [DataGoods(*getGoods(i%3)) for i in range(200*100000)]
print("   PYTHON:", time.time()-t)
>>>     PYTHON: 19.719463109970093
      
      



2 , python. python - , , .





PyPy

, cython , ( ), . , . PyPy, python, JIT . PyPy , , . python PyPy . 





PyPy . , cmd , pypy3.exe, . cmd :





, 19 python’ 4.5 , 4 .





. , , python , .





. Numba, NumPy, Nim multiprocessing. , . , python .





Before proceeding with the choice of functionality for optimizing the code, it is necessary to carry out internal optimization of the code in pure python, to get rid of loops in loops in loops in a loop to the maximum, clean up memory with your hands and remove unnecessary elements during the code execution. Do not expect that rewriting your code to another language will solve all your problems, learn to look for bottlenecks in the code and optimize them algorithmically or using tricks of the language itself.








All Articles