Python のループ処理の最適化

元ネタ: このページは削除されました

これが「やっぱPythonですって」と言いたい人の一助になれば完璧。

さくらのブログ

これは素晴らしい結果です(^ ^;;
しかしながら「ランダム数値リスト作成」で僅かに Ruby に遅れを取っています。
以下がそのソースコードになります。

    list = []
    start = time.time()
        for i in range( cnt ):
           list.append( random.randint(0, 2147483647) )
    print( time.time() - start )

Python プログラムソース
単純にリストに値を追加していく処理の比較と言うのであれば正しい処理です。しかし、さらに Python にはリスト内包表記という素晴らしい仕組みがあります。

Learning Python 3rd Edition, Generator Expression Iterators Meet List Comprehensions によると、実行するループ処理の特性にも依りますが、for ループよりもリスト内包表記の方が最適化されているから速いとあります。

もし可能であるならば、上述のソースコードを以下のように修正して比較してみてほしいです。

    start = time.time()
    list = [ random.randint(0, 2147483647) for i in range(cnt) ]
    print( time.time() - start )

以下が、私の環境(MacOS X 10.5.7, 2GHz Core2Duo, 2GB 667MHz DDR2)におけるプロファイル結果になります。python3 ではループ処理そのものも高速化されているのが分かります。

python 2.6.2
$ python profile_list_operation_26.py 100000
it took 5.347 seconds to run for_statement 10 times 
it took 5.278 seconds to run list_comprehension 10 times 
it took 5.643 seconds to run map_function 10 times 
it took 5.319 seconds to run generator_expression 10 times 

python 3.1
$ python3.1 profile_list_operation_31.py 100000
it took 4.314 seconds to run for_statement 10 times 
it took 4.077 seconds to run list_comprehension 10 times 
it took 4.456 seconds to run map_function 10 times 
it took 4.151 seconds to run generator_expression 10 times  

python2.x のソースコード

#!/bin/env python

import sys
import time
import random

rand_max = 2147483647

def main():
    get_profile(10, int(sys.argv[1]), for_statement, 
        list_comprehension, map_function, generator_expression)

def get_profile(pro_times, max, *funcs):
    totals = {}
    for func in funcs:
        totals[func] = 0.0
        starttime = time.clock()
        for x in range(pro_times):
            apply(func, [max])
        stoptime = time.clock()
        elapsed = stoptime - starttime
        totals[func] = totals[func] + elapsed
    for func in funcs:
        print "it took %.3f seconds to run %s %d times " % (
                totals[func], func.__name__, pro_times)

def for_statement(max):
    l = []
    for i in range(max):
        l.append(random.randint(0, rand_max))

def list_comprehension(max):
    l = [ random.randint(0, rand_max) for i in range(max) ]

def map_function(max):
    l = map((lambda x: random.randint(0, rand_max)), range(max))

def generator_expression(max):
    l = list(random.randint(0, rand_max) for x in range(max))

if __name__ == '__main__':
    main()

python3 のソースコード

#!/bin/env python

import sys
import time
import random

rand_max = 2147483647

def main():
    get_profile(10, int(sys.argv[1]), for_statement, 
        list_comprehension, map_function, generator_expression)

def get_profile(pro_times, max, *funcs):
    totals = {}
    for func in funcs:
        totals[func] = 0.0
        starttime = time.clock()
        for x in range(pro_times):
            func(*[max])
        stoptime = time.clock()
        elapsed = stoptime - starttime
        totals[func] = totals[func] + elapsed
    for func in funcs:
        print("it took %.3f seconds to run %s %d times " % (
                totals[func], func.__name__, pro_times))

def for_statement(max):
    l = []
    for i in range(max):
        l.append(random.randint(0, rand_max))

def list_comprehension(max):
    l = [ random.randint(0, rand_max) for i in range(max) ]

def map_function(max):
    l = list(map((lambda x: random.randint(0, rand_max)), list(range(max))))

def generator_expression(max):
    l = list(random.randint(0, rand_max) for x in range(max))

if __name__ == '__main__':
    main()

2to3 の diff。

$ 2to3 -w -n profile_list_operation_31.py 
RefactoringTool: Skipping implicit fixer: buffer
RefactoringTool: Skipping implicit fixer: idioms
RefactoringTool: Skipping implicit fixer: set_literal
RefactoringTool: Skipping implicit fixer: ws_comma
--- profile_list_operation_26.py (original)
+++ profile_list_operation_31.py (refactored)
@@ -16,13 +16,13 @@
         totals[func] = 0.0
         starttime = time.clock()
         for x in range(pro_times):
-            apply(func, [max])
+            func(*[max])
         stoptime = time.clock()
         elapsed = stoptime - starttime
         totals[func] = totals[func] + elapsed
     for func in funcs:
-        print "it took %.3f seconds to run %s %d times " % (
-                totals[func], func.__name__, pro_times)
+        print("it took %.3f seconds to run %s %d times " % (
+                totals[func], func.__name__, pro_times))
 
 def for_statement(max):
     l = []
@@ -33,7 +33,7 @@
     l = [ random.randint(0, rand_max) for i in range(max) ]
 
 def map_function(max):
-    l = map((lambda x: random.randint(0, rand_max)), range(max))
+    l = list(map((lambda x: random.randint(0, rand_max)), list(range(max))))
 
 def generator_expression(max):
     l = list(random.randint(0, rand_max) for x in range(max))
RefactoringTool: Files that were modified:
RefactoringTool: profile_list_operation_31.py