-
Notifications
You must be signed in to change notification settings - Fork 2
/
find.py
executable file
·191 lines (171 loc) · 7.37 KB
/
find.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Functions for finding files and dirs
"""
import logging
logger = logging.getLogger("find")
logger.debug("loading find module")
import os
import sys
import itertools
import fnmatch
from collections import defaultdict
def find(search_dir, inclusion_patterns = ('*',), exclusion_patterns = (), search_type = 'all', num_limit = None, level_limit = None, match_mode = "any"):
"""
Function to search for files and directories
Parameters
----------
search_dir: str
path to the directory in which to search for files and subdirectories
inclusion_patterns: list or tuple
a list or tuple of patterns to match files/dirs against for inclusion in match output
exclusion_patterns: list or tuple
a list or tuple of patterns to match files/dirs against for exclusion from match output
num_limit: int
the number of matches to return; use `None` for no limit
level_limit: int
the number of directory levels to recurse; 0 is parent dir only
match_mode:
'any' or 'all'; matches any of the provided inclusion_patterns, or all of them
search_type:
'all', 'file', or 'dir'; type of items to find
Returns
-------
list
a list of matching file or directory paths
"""
import sys
import itertools
if num_limit != None:
matches = []
for item in find_gen(search_dir = search_dir, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, search_type = search_type, level_limit = level_limit, match_mode = match_mode):
if len(matches) < int(num_limit):
matches.append(item)
# logger.debug("Matches found: {0}".format(matches))
return(matches)
else:
matches = [item for item in find_gen(search_dir = search_dir, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, search_type = search_type, level_limit = level_limit, match_mode = match_mode)]
# logger.debug("Matches found: {0}".format(matches))
return(matches)
def find_gen(search_dir, inclusion_patterns = ('*',), exclusion_patterns = (), search_type = 'all', level_limit = None, match_mode = "any"):
"""
Generator function to return file matches. Used internally by `find`
Parameters
----------
search_dir: str
path to the directory in which to search for files and subdirectories
inclusion_patterns: list or tuple
a list or tuple of patterns to match files/dirs against for inclusion in match output
exclusion_patterns: list or tuple
a list or tuple of patterns to match files/dirs against for exclusion from match output
level_limit: int
the number of directory levels to recurse; 0 is parent dir only
match_mode:
'any' or 'all'; matches any of the provided inclusion_patterns, or all of them
search_type:
'all', 'file', or 'dir'; type of items to find
"""
import os
import sys
import fnmatch
search_dir = search_dir.rstrip(os.path.sep)
# assert os.path.isdir(search_dir)
num_sep = search_dir.count(os.path.sep)
# logger.debug("Searching {0} for {1} matching {2}, level limit: {3}".format(search_dir, search_type, inclusion_patterns, level_limit))
for root, dirs, files in os.walk(search_dir):
# choose which items to search
if search_type == 'all':
items = dirs + files
elif search_type == 'dir':
items = dirs
elif search_type == 'file':
items = files
else:
logger.error("Search type '{0}' not valid, exiting script".format(search_type))
sys.exit()
# yeild the results
for item in super_filter(names = items, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, match_mode = match_mode):
yield(os.path.join(root, item))
# check for a level limit
if level_limit != None:
num_sep_this = root.count(os.path.sep)
if num_sep + int(level_limit) <= num_sep_this:
del dirs[:]
def super_filter(names, inclusion_patterns = ('*',), exclusion_patterns = (), match_mode = "any"):
"""
Enhanced version of `fnmatch.filter()` that accepts multiple inclusion and exclusion patterns.
Filter the input names by choosing only those that are matched by
some pattern in `inclusion_patterns` _and_ not by any in `exclusion_patterns`.
Adapted from:
https://codereview.stackexchange.com/questions/74713/filtering-with-multiple-inclusion-and-exclusion-patterns
"""
included = multi_filter(names, patterns = inclusion_patterns, match_mode = match_mode)
excluded = multi_filter(names, patterns = exclusion_patterns, match_mode = match_mode)
for item in set(included) - set(excluded):
yield(item)
def multi_filter(names, patterns, match_mode = "any"):
"""
Generator function which yields the names that match one or more of the patterns.
"""
# logger.debug("Filtering {0} against {1}; match_mode: {2}".format(names, patterns, match_mode))
for name in names:
basename = os.path.basename(name)
# logger.debug("item: {0}".format(basename))
# in case a single string was passed as a pattern
if isinstance(patterns, str):
if fnmatch.fnmatch(basename, patterns):
yield(name)
# patterns is not an empty list
elif patterns:
if match_mode == 'any':
if any(fnmatch.fnmatch(basename, pattern) for pattern in patterns):
# logger.debug("match found")
yield(name)
elif match_mode == 'all':
if all(fnmatch.fnmatch(basename, pattern) for pattern in patterns):
# logger.debug("match found")
yield(name)
#
# for pattern in patterns:
# if fnmatch.fnmatch(name, pattern):
# yield name
# deprecated
def find_files(search_dir, search_filename):
"""
deprecated function that returns the paths to all files matching the supplied filename in the search dir
"""
import os
logger.debug('Now searching for file "{0}" in directory {1}'.format(search_filename, search_dir))
file_list = []
for root, dirs, files in os.walk(search_dir):
for file in files:
if file == search_filename:
found_file = os.path.join(root, file)
file_list.append(found_file)
logger.debug('Found {0} matches'.format(len(file_list)))
return(file_list)
def walklevel(some_dir, level=1):
"""
deprecated function that recursively searches a directory for all items up to a given depth
Examples
----------
Example usage::
file_list = []
for item in pf.walklevel(some_dir):
if (item.endswith('my_file.txt') and os.path.isfile(item) ):
file_list.append(item)
"""
import os
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
# yield root, dirs, files
for dir in dirs:
yield os.path.join(root, dir)
for file in files:
yield os.path.join(root, file)
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]