再帰的にディレクトリを探索する

呼称: 再帰的なディレクトリ探索 + ファイルマッチング
目的: あるディレクトリ配下にある特定のファイルを抽出する
特徴: 正規表現を用いてファイルマッチングを行う
用例: 設定ファイルやモジュールの検索
備考: os.walk() は python 2.3 以上でのみ有効

やりたい事は以下のようなイメージ。

$ find /usr/lib/python2.4/xml -type f -name "*.py"

search_path2file() の中で、filelist からパターンマッチしないファイルを削除するために、copy.copy() で予めリストをコピーしている。直接、filelist を指定すると for ループが意図した通りに動作しない。python における変数への代入は、リファレンスのコピーになるので、別のオブジェクトとしてコピーしたい場合に copy モジュールを使用する。

#!/bin/env python

import sys
import os

def search_path2file(path, pattern=None):
    filelist = []
    for root, dirs, files in os.walk(path):
        for f in files:
            filelist.append(os.path.join(root, f))

    # narrow search by pattern
    if pattern:
        import copy
        copylist = copy.copy(filelist)
        import re
        r = re.compile(r'%s' % pattern)
        for f in copylist:
            if not r.match(f):
                filelist.remove(f)

    return filelist

if __name__ == '__main__':
    if sys.version.split(' ')[0] < str(2.3):
        print 'This script is available above python 2.3'
        sys.exit(0)

    dir = '/usr/lib/python2.4/xml'
    print ''.join(['* ', dir, ' has as follows files'])
    for f in search_path2file(dir):
        print f

    print '-' * 40

    repat = '.*\.py\Z'
    print ''.join(['* ', dir, ' has as follows files mached ', repat])
    for f in search_path2file(dir, repat):
        print f

実行結果。

 * /usr/lib/python2.4/xml has as follows files
/usr/lib/python2.4/xml/__init__.pyo
/usr/lib/python2.4/xml/__init__.pyc
/usr/lib/python2.4/xml/__init__.py
/usr/lib/python2.4/xml/dom/minidom.pyo
/usr/lib/python2.4/xml/dom/xmlbuilder.pyo
/usr/lib/python2.4/xml/dom/pulldom.pyc
/usr/lib/python2.4/xml/dom/expatbuilder.pyo
/usr/lib/python2.4/xml/dom/minidom.py
/usr/lib/python2.4/xml/dom/NodeFilter.py
/usr/lib/python2.4/xml/dom/__init__.pyo
/usr/lib/python2.4/xml/dom/NodeFilter.pyo
/usr/lib/python2.4/xml/dom/domreg.pyc
/usr/lib/python2.4/xml/dom/NodeFilter.pyc
/usr/lib/python2.4/xml/dom/__init__.pyc
/usr/lib/python2.4/xml/dom/minicompat.pyc
/usr/lib/python2.4/xml/dom/xmlbuilder.py
/usr/lib/python2.4/xml/dom/pulldom.pyo
/usr/lib/python2.4/xml/dom/expatbuilder.pyc
/usr/lib/python2.4/xml/dom/xmlbuilder.pyc
/usr/lib/python2.4/xml/dom/minicompat.py
/usr/lib/python2.4/xml/dom/expatbuilder.py
/usr/lib/python2.4/xml/dom/domreg.pyo
/usr/lib/python2.4/xml/dom/__init__.py
/usr/lib/python2.4/xml/dom/minicompat.pyo
/usr/lib/python2.4/xml/dom/domreg.py
/usr/lib/python2.4/xml/dom/pulldom.py
/usr/lib/python2.4/xml/dom/minidom.pyc
/usr/lib/python2.4/xml/sax/_exceptions.py
/usr/lib/python2.4/xml/sax/expatreader.pyo
/usr/lib/python2.4/xml/sax/_exceptions.pyo
/usr/lib/python2.4/xml/sax/__init__.pyo
/usr/lib/python2.4/xml/sax/expatreader.py
/usr/lib/python2.4/xml/sax/handler.pyo
/usr/lib/python2.4/xml/sax/__init__.pyc
/usr/lib/python2.4/xml/sax/_exceptions.pyc
/usr/lib/python2.4/xml/sax/saxutils.py
/usr/lib/python2.4/xml/sax/handler.pyc
/usr/lib/python2.4/xml/sax/xmlreader.pyc
/usr/lib/python2.4/xml/sax/saxutils.pyo
/usr/lib/python2.4/xml/sax/__init__.py
/usr/lib/python2.4/xml/sax/saxutils.pyc
/usr/lib/python2.4/xml/sax/expatreader.pyc
/usr/lib/python2.4/xml/sax/xmlreader.pyo
/usr/lib/python2.4/xml/sax/handler.py
/usr/lib/python2.4/xml/sax/xmlreader.py
/usr/lib/python2.4/xml/parsers/__init__.pyo
/usr/lib/python2.4/xml/parsers/__init__.pyc
/usr/lib/python2.4/xml/parsers/__init__.py
/usr/lib/python2.4/xml/parsers/expat.py
/usr/lib/python2.4/xml/parsers/expat.pyo
/usr/lib/python2.4/xml/parsers/expat.pyc
                                                                              • -
* /usr/lib/python2.4/xml has as follows files mached .*\.py\Z /usr/lib/python2.4/xml/__init__.py /usr/lib/python2.4/xml/dom/minidom.py /usr/lib/python2.4/xml/dom/NodeFilter.py /usr/lib/python2.4/xml/dom/xmlbuilder.py /usr/lib/python2.4/xml/dom/minicompat.py /usr/lib/python2.4/xml/dom/expatbuilder.py /usr/lib/python2.4/xml/dom/__init__.py /usr/lib/python2.4/xml/dom/domreg.py /usr/lib/python2.4/xml/dom/pulldom.py /usr/lib/python2.4/xml/sax/_exceptions.py /usr/lib/python2.4/xml/sax/expatreader.py /usr/lib/python2.4/xml/sax/saxutils.py /usr/lib/python2.4/xml/sax/__init__.py /usr/lib/python2.4/xml/sax/handler.py /usr/lib/python2.4/xml/sax/xmlreader.py /usr/lib/python2.4/xml/parsers/__init__.py /usr/lib/python2.4/xml/parsers/expat.py

リファレンス:
3.18 copy -- 浅いコピーおよび深いコピー操作
6.1.4 ファイルとディレクトリ