|
@@ -0,0 +1,247 @@
|
|
1
|
+#!/usr/bin/env python
|
|
2
|
+# -*- coding: utf-8 -*-
|
|
3
|
+#
|
|
4
|
+"""
|
|
5
|
+fstools (Filesystem Tools)
|
|
6
|
+==========================
|
|
7
|
+
|
|
8
|
+**Author:**
|
|
9
|
+
|
|
10
|
+* Dirk Alders <sudo-dirk@mount-mockery.de>
|
|
11
|
+
|
|
12
|
+**Description:**
|
|
13
|
+
|
|
14
|
+ This module supports functions and classes to handle files and paths
|
|
15
|
+
|
|
16
|
+**Submodules:**
|
|
17
|
+
|
|
18
|
+* :mod:`mmod.module.sub1`
|
|
19
|
+* :class:`mmod.module.sub2`
|
|
20
|
+* :func:`mmod.module.sub2`
|
|
21
|
+
|
|
22
|
+**Unittest:**
|
|
23
|
+
|
|
24
|
+ See also the :download:`unittest <../../fstools/_testresults_/unittest.pdf>` documentation.
|
|
25
|
+"""
|
|
26
|
+__DEPENDENCIES__ = []
|
|
27
|
+
|
|
28
|
+import glob
|
|
29
|
+import hashlib
|
|
30
|
+import logging
|
|
31
|
+from functools import partial
|
|
32
|
+import hmac
|
|
33
|
+import os
|
|
34
|
+import sys
|
|
35
|
+import time
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+logger_name = 'FSTOOLS'
|
|
39
|
+logger = logging.getLogger(logger_name)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+__DESCRIPTION__ = """The Module {\\tt %s} is designed to help on all issues with files and folders.
|
|
43
|
+For more Information read the documentation.""" % __name__.replace('_', '\_')
|
|
44
|
+"""The Module Description"""
|
|
45
|
+__INTERPRETER__ = (2, 3)
|
|
46
|
+"""The Tested Interpreter-Versions"""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+def uid(pathname, max_staleness=3600):
|
|
50
|
+ """
|
|
51
|
+ Function returning a unique id for a given file or path.
|
|
52
|
+
|
|
53
|
+ :param str pathname: File or Path name for generation of the uid.
|
|
54
|
+ :param int max_staleness: If a file or path is older than that, we may consider
|
|
55
|
+ it stale and return a different uid - this is a
|
|
56
|
+ dirty trick to work around changes never being
|
|
57
|
+ detected. Default is 3600 seconds, use None to
|
|
58
|
+ disable this trickery. See below for more details.
|
|
59
|
+ :returns: An object that changes value if the file changed,
|
|
60
|
+ None is returned if there were problems accessing the file
|
|
61
|
+ :rtype: str
|
|
62
|
+
|
|
63
|
+ .. note:: Depending on the operating system capabilities and the way the
|
|
64
|
+ file update is done, this function might return the same value
|
|
65
|
+ even if the file has changed. It should be better than just
|
|
66
|
+ using file's mtime though.
|
|
67
|
+ max_staleness tries to avoid the worst for these cases.
|
|
68
|
+
|
|
69
|
+ .. note:: If this function is used for a path, it will stat all pathes and files rekursively.
|
|
70
|
+
|
|
71
|
+ Using just the file's mtime to determine if the file has changed is
|
|
72
|
+ not reliable - if file updates happen faster than the file system's
|
|
73
|
+ mtime granularity, then the modification is not detectable because
|
|
74
|
+ the mtime is still the same.
|
|
75
|
+
|
|
76
|
+ This function tries to improve by using not only the mtime, but also
|
|
77
|
+ other metadata values like file size and inode to improve reliability.
|
|
78
|
+
|
|
79
|
+ For the calculation of this value, we of course only want to use data
|
|
80
|
+ that we can get rather fast, thus we use file metadata, not file data
|
|
81
|
+ (file content).
|
|
82
|
+
|
|
83
|
+ >>> print 'UID:', uid(__file__)
|
|
84
|
+ UID: 16a65cc78e1344e596ef1c9536dab2193a402934
|
|
85
|
+ """
|
|
86
|
+ if os.path.isdir(pathname):
|
|
87
|
+ pathlist = dirlist(pathname) + filelist(pathname)
|
|
88
|
+ pathlist.sort()
|
|
89
|
+ else:
|
|
90
|
+ pathlist = [pathname]
|
|
91
|
+ uid = []
|
|
92
|
+ for element in pathlist:
|
|
93
|
+ try:
|
|
94
|
+ st = os.stat(element)
|
|
95
|
+ except (IOError, OSError):
|
|
96
|
+ uid.append(None) # for permanent errors on stat() this does not change, but
|
|
97
|
+ # having a changing value would be pointless because if we
|
|
98
|
+ # can't even stat the file, it is unlikely we can read it.
|
|
99
|
+ else:
|
|
100
|
+ fake_mtime = int(st.st_mtime)
|
|
101
|
+ if not st.st_ino and max_staleness:
|
|
102
|
+ # st_ino being 0 likely means that we run on a platform not
|
|
103
|
+ # supporting it (e.g. win32) - thus we likely need this dirty
|
|
104
|
+ # trick
|
|
105
|
+ now = int(time.time())
|
|
106
|
+ if now >= st.st_mtime + max_staleness:
|
|
107
|
+ # keep same fake_mtime for each max_staleness interval
|
|
108
|
+ fake_mtime = int(now / max_staleness) * max_staleness
|
|
109
|
+ uid.append((
|
|
110
|
+ st.st_mtime, # might have a rather rough granularity, e.g. 2s
|
|
111
|
+ # on FAT, 1s on ext3 and might not change on fast
|
|
112
|
+ # updates
|
|
113
|
+ st.st_ino, # inode number (will change if the update is done
|
|
114
|
+ # by e.g. renaming a temp file to the real file).
|
|
115
|
+ # not supported on win32 (0 ever)
|
|
116
|
+ st.st_size, # likely to change on many updates, but not
|
|
117
|
+ # sufficient alone
|
|
118
|
+ fake_mtime) # trick to workaround file system / platform
|
|
119
|
+ # limitations causing permanent trouble
|
|
120
|
+ )
|
|
121
|
+ if sys.version_info < (3, 0):
|
|
122
|
+ secret = ''
|
|
123
|
+ return hmac.new(secret, repr(uid), hashlib.sha1).hexdigest()
|
|
124
|
+ else:
|
|
125
|
+ secret = b''
|
|
126
|
+ return hmac.new(secret, bytes(repr(uid), 'latin-1'), hashlib.sha1).hexdigest()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+def uid_filelist(path='.', expression='*', rekursive=True):
|
|
130
|
+ SHAhash = hashlib.md5()
|
|
131
|
+ #
|
|
132
|
+ fl = filelist(path, expression, rekursive)
|
|
133
|
+ fl.sort()
|
|
134
|
+ for f in fl:
|
|
135
|
+ if sys.version_info < (3, 0):
|
|
136
|
+ with open(f, 'rb') as fh:
|
|
137
|
+ SHAhash.update(hashlib.md5(fh.read()).hexdigest())
|
|
138
|
+ else:
|
|
139
|
+ with open(f, mode='rb') as fh:
|
|
140
|
+ d = hashlib.md5()
|
|
141
|
+ for buf in iter(partial(fh.read, 128), b''):
|
|
142
|
+ d.update(buf)
|
|
143
|
+ SHAhash.update(d.hexdigest().encode())
|
|
144
|
+ #
|
|
145
|
+ return SHAhash.hexdigest()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+def filelist(path='.', expression='*', rekursive=True):
|
|
149
|
+ """
|
|
150
|
+ Function returning a list of files below a given path.
|
|
151
|
+
|
|
152
|
+ :param str path: folder which is the basepath for searching files.
|
|
153
|
+ :param str expression: expression to fit including shell-style wildcards.
|
|
154
|
+ :param bool rekursive: search all subfolders if True.
|
|
155
|
+ :returns: list of filenames including the pathe
|
|
156
|
+ :rtype: list
|
|
157
|
+
|
|
158
|
+ .. note:: The returned filenames could be relative pathes depending on argument path.
|
|
159
|
+
|
|
160
|
+ >>> for filename in filelist(path='.', expression='*.py*', rekursive=True):
|
|
161
|
+ ... print filename
|
|
162
|
+ ./__init__.py
|
|
163
|
+ ./__init__.pyc
|
|
164
|
+ """
|
|
165
|
+ li = list()
|
|
166
|
+ if os.path.exists(path):
|
|
167
|
+ logger.debug('FILELIST: path (%s) exists - looking for files to append', path)
|
|
168
|
+ for filename in glob.glob(os.path.join(path, expression)):
|
|
169
|
+ if os.path.isfile(filename):
|
|
170
|
+ li.append(filename)
|
|
171
|
+ for directory in os.listdir(path):
|
|
172
|
+ directory = os.path.join(path, directory)
|
|
173
|
+ if os.path.isdir(directory) and rekursive and not os.path.islink(directory):
|
|
174
|
+ li.extend(filelist(directory, expression))
|
|
175
|
+ else:
|
|
176
|
+ logger.warning('FILELIST: path (%s) does not exist - empty filelist will be returned', path)
|
|
177
|
+ return li
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+def dirlist(path='.', rekursive=True):
|
|
181
|
+ """
|
|
182
|
+ Function returning a list of directories below a given path.
|
|
183
|
+
|
|
184
|
+ :param str path: folder which is the basepath for searching files.
|
|
185
|
+ :param bool rekursive: search all subfolders if True.
|
|
186
|
+ :returns: list of filenames including the pathe
|
|
187
|
+ :rtype: list
|
|
188
|
+
|
|
189
|
+ .. note:: The returned filenames could be relative pathes depending on argument path.
|
|
190
|
+
|
|
191
|
+ >>> for dirname in dirlist(path='..', rekursive=True):
|
|
192
|
+ ... print dirname
|
|
193
|
+ ../caching
|
|
194
|
+ ../fstools
|
|
195
|
+ """
|
|
196
|
+ li = list()
|
|
197
|
+ if os.path.exists(path):
|
|
198
|
+ logger.debug('DIRLIST: path (%s) exists - looking for directories to append', path)
|
|
199
|
+ for dirname in os.listdir(path):
|
|
200
|
+ fulldir = os.path.join(path, dirname)
|
|
201
|
+ if os.path.isdir(fulldir):
|
|
202
|
+ li.append(fulldir)
|
|
203
|
+ if rekursive:
|
|
204
|
+ li.extend(dirlist(fulldir))
|
|
205
|
+ else:
|
|
206
|
+ logger.warning('DIRLIST: path (%s) does not exist - empty filelist will be returned', path)
|
|
207
|
+ return li
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+def is_writeable(path):
|
|
211
|
+ """.. warning:: Needs to be documented
|
|
212
|
+ """
|
|
213
|
+ if os.access(path, os.W_OK):
|
|
214
|
+ # path is writable whatever it is, file or directory
|
|
215
|
+ return True
|
|
216
|
+ else:
|
|
217
|
+ # path is not writable whatever it is, file or directory
|
|
218
|
+ return False
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+def mkdir(path):
|
|
222
|
+ """.. warning:: Needs to be documented
|
|
223
|
+ """
|
|
224
|
+ path = os.path.abspath(path)
|
|
225
|
+ if not os.path.exists(os.path.dirname(path)):
|
|
226
|
+ mkdir(os.path.dirname(path))
|
|
227
|
+ if not os.path.exists(path):
|
|
228
|
+ os.mkdir(path)
|
|
229
|
+ return os.path.isdir(path)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+def open_locked_non_blocking(*args, **kwargs):
|
|
233
|
+ """.. warning:: Needs to be documented (acquire exclusive lock file access). Throws an exception, if file is locked!
|
|
234
|
+ """
|
|
235
|
+ import fcntl
|
|
236
|
+ locked_file_descriptor = open(*args, **kwargs)
|
|
237
|
+ fcntl.lockf(locked_file_descriptor, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
238
|
+ return locked_file_descriptor
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+def open_locked_blocking(*args, **kwargs):
|
|
242
|
+ """.. warning:: Needs to be documented (acquire exclusive lock file access). Blocks until file is free. deadlock!
|
|
243
|
+ """
|
|
244
|
+ import fcntl
|
|
245
|
+ locked_file_descriptor = open(*args, **kwargs)
|
|
246
|
+ fcntl.lockf(locked_file_descriptor, fcntl.LOCK_EX)
|
|
247
|
+ return locked_file_descriptor
|