Browse Source

Initial fstools implementation

master
Dirk Alders 4 years ago
parent
commit
ada1f74d4c
1 changed files with 247 additions and 0 deletions
  1. 247
    0
      __init__.py

+ 247
- 0
__init__.py View File

@@ -0,0 +1,247 @@
1
+#!/usr/bin/env python
2
+# -*- coding: utf-8 -*-
3
+#
4
+"""
5
+fstools (Filesystem Tools)
6
+==========================
7
+
8
+**Author:**
9
+
10
+* Dirk Alders <sudo-dirk@mount-mockery.de>
11
+
12
+**Description:**
13
+
14
+    This module supports functions and classes to handle files and paths
15
+
16
+**Submodules:**
17
+
18
+* :mod:`mmod.module.sub1`
19
+* :class:`mmod.module.sub2`
20
+* :func:`mmod.module.sub2`
21
+
22
+**Unittest:**
23
+
24
+    See also the :download:`unittest <../../fstools/_testresults_/unittest.pdf>` documentation.
25
+"""
26
+__DEPENDENCIES__ = []
27
+
28
+import glob
29
+import hashlib
30
+import logging
31
+from functools import partial
32
+import hmac
33
+import os
34
+import sys
35
+import time
36
+
37
+
38
+logger_name = 'FSTOOLS'
39
+logger = logging.getLogger(logger_name)
40
+
41
+
42
+__DESCRIPTION__ = """The Module {\\tt %s} is designed to help on all issues with files and folders.
43
+For more Information read the documentation.""" % __name__.replace('_', '\_')
44
+"""The Module Description"""
45
+__INTERPRETER__ = (2, 3)
46
+"""The Tested Interpreter-Versions"""
47
+
48
+
49
+def uid(pathname, max_staleness=3600):
50
+    """
51
+    Function returning a unique id for a given file or path.
52
+
53
+    :param str pathname: File or Path name for generation of the uid.
54
+    :param int max_staleness: If a file or path is older than that, we may consider
55
+                              it stale and return a different uid - this is a
56
+                              dirty trick to work around changes never being
57
+                              detected. Default is 3600 seconds, use None to
58
+                              disable this trickery. See below for more details.
59
+    :returns:  An object that changes value if the file changed,
60
+               None is returned if there were problems accessing the file
61
+    :rtype: str
62
+
63
+    .. note:: Depending on the operating system capabilities and the way the
64
+              file update is done, this function might return the same value
65
+              even if the file has changed. It should be better than just
66
+              using file's mtime though.
67
+              max_staleness tries to avoid the worst for these cases.
68
+
69
+    .. note:: If this function is used for a path, it will stat all pathes and files rekursively.
70
+
71
+    Using just the file's mtime to determine if the file has changed is
72
+    not reliable - if file updates happen faster than the file system's
73
+    mtime granularity, then the modification is not detectable because
74
+    the mtime is still the same.
75
+
76
+    This function tries to improve by using not only the mtime, but also
77
+    other metadata values like file size and inode to improve reliability.
78
+
79
+    For the calculation of this value, we of course only want to use data
80
+    that we can get rather fast, thus we use file metadata, not file data
81
+    (file content).
82
+
83
+    >>> print 'UID:', uid(__file__)
84
+    UID: 16a65cc78e1344e596ef1c9536dab2193a402934
85
+    """
86
+    if os.path.isdir(pathname):
87
+        pathlist = dirlist(pathname) + filelist(pathname)
88
+        pathlist.sort()
89
+    else:
90
+        pathlist = [pathname]
91
+    uid = []
92
+    for element in pathlist:
93
+        try:
94
+            st = os.stat(element)
95
+        except (IOError, OSError):
96
+            uid.append(None)    # for permanent errors on stat() this does not change, but
97
+            #                     having a changing value would be pointless because if we
98
+            #                     can't even stat the file, it is unlikely we can read it.
99
+        else:
100
+            fake_mtime = int(st.st_mtime)
101
+            if not st.st_ino and max_staleness:
102
+                # st_ino being 0 likely means that we run on a platform not
103
+                # supporting it (e.g. win32) - thus we likely need this dirty
104
+                # trick
105
+                now = int(time.time())
106
+                if now >= st.st_mtime + max_staleness:
107
+                    # keep same fake_mtime for each max_staleness interval
108
+                    fake_mtime = int(now / max_staleness) * max_staleness
109
+            uid.append((
110
+                st.st_mtime,    # might have a rather rough granularity, e.g. 2s
111
+                                # on FAT, 1s on ext3 and might not change on fast
112
+                                # updates
113
+                st.st_ino,      # inode number (will change if the update is done
114
+                                # by e.g. renaming a temp file to the real file).
115
+                                # not supported on win32 (0 ever)
116
+                st.st_size,     # likely to change on many updates, but not
117
+                                # sufficient alone
118
+                fake_mtime)     # trick to workaround file system / platform
119
+                                # limitations causing permanent trouble
120
+            )
121
+    if sys.version_info < (3, 0):
122
+        secret = ''
123
+        return hmac.new(secret, repr(uid), hashlib.sha1).hexdigest()
124
+    else:
125
+        secret = b''
126
+        return hmac.new(secret, bytes(repr(uid), 'latin-1'), hashlib.sha1).hexdigest()
127
+
128
+
129
+def uid_filelist(path='.', expression='*', rekursive=True):
130
+    SHAhash = hashlib.md5()
131
+    #
132
+    fl = filelist(path, expression, rekursive)
133
+    fl.sort()
134
+    for f in fl:
135
+        if sys.version_info < (3, 0):
136
+            with open(f, 'rb') as fh:
137
+                SHAhash.update(hashlib.md5(fh.read()).hexdigest())
138
+        else:
139
+            with open(f, mode='rb') as fh:
140
+                d = hashlib.md5()
141
+                for buf in iter(partial(fh.read, 128), b''):
142
+                    d.update(buf)
143
+            SHAhash.update(d.hexdigest().encode())
144
+    #
145
+    return SHAhash.hexdigest()
146
+
147
+
148
+def filelist(path='.', expression='*', rekursive=True):
149
+    """
150
+    Function returning a list of files below a given path.
151
+
152
+    :param str path: folder which is the basepath for searching files.
153
+    :param str expression: expression to fit including shell-style wildcards.
154
+    :param bool rekursive: search all subfolders if True.
155
+    :returns: list of filenames including the pathe
156
+    :rtype: list
157
+
158
+    .. note:: The returned filenames could be relative pathes depending on argument path.
159
+
160
+    >>> for filename in filelist(path='.', expression='*.py*', rekursive=True):
161
+    ...     print filename
162
+    ./__init__.py
163
+    ./__init__.pyc
164
+    """
165
+    li = list()
166
+    if os.path.exists(path):
167
+        logger.debug('FILELIST: path (%s) exists - looking for files to append', path)
168
+        for filename in glob.glob(os.path.join(path, expression)):
169
+            if os.path.isfile(filename):
170
+                li.append(filename)
171
+        for directory in os.listdir(path):
172
+            directory = os.path.join(path, directory)
173
+            if os.path.isdir(directory) and rekursive and not os.path.islink(directory):
174
+                li.extend(filelist(directory, expression))
175
+    else:
176
+        logger.warning('FILELIST: path (%s) does not exist - empty filelist will be returned', path)
177
+    return li
178
+
179
+
180
+def dirlist(path='.', rekursive=True):
181
+    """
182
+    Function returning a list of directories below a given path.
183
+
184
+    :param str path: folder which is the basepath for searching files.
185
+    :param bool rekursive: search all subfolders if True.
186
+    :returns: list of filenames including the pathe
187
+    :rtype: list
188
+
189
+    .. note:: The returned filenames could be relative pathes depending on argument path.
190
+
191
+    >>> for dirname in dirlist(path='..', rekursive=True):
192
+    ...     print dirname
193
+    ../caching
194
+    ../fstools
195
+    """
196
+    li = list()
197
+    if os.path.exists(path):
198
+        logger.debug('DIRLIST: path (%s) exists - looking for directories to append', path)
199
+        for dirname in os.listdir(path):
200
+            fulldir = os.path.join(path, dirname)
201
+            if os.path.isdir(fulldir):
202
+                li.append(fulldir)
203
+                if rekursive:
204
+                    li.extend(dirlist(fulldir))
205
+    else:
206
+        logger.warning('DIRLIST: path (%s) does not exist - empty filelist will be returned', path)
207
+    return li
208
+
209
+
210
+def is_writeable(path):
211
+    """.. warning:: Needs to be documented
212
+    """
213
+    if os.access(path, os.W_OK):
214
+        # path is writable whatever it is, file or directory
215
+        return True
216
+    else:
217
+        # path is not writable whatever it is, file or directory
218
+        return False
219
+
220
+
221
+def mkdir(path):
222
+    """.. warning:: Needs to be documented
223
+    """
224
+    path = os.path.abspath(path)
225
+    if not os.path.exists(os.path.dirname(path)):
226
+        mkdir(os.path.dirname(path))
227
+    if not os.path.exists(path):
228
+        os.mkdir(path)
229
+    return os.path.isdir(path)
230
+
231
+
232
+def open_locked_non_blocking(*args, **kwargs):
233
+    """.. warning:: Needs to be documented (acquire exclusive lock file access). Throws an exception, if file is locked!
234
+    """
235
+    import fcntl
236
+    locked_file_descriptor = open(*args, **kwargs)
237
+    fcntl.lockf(locked_file_descriptor, fcntl.LOCK_EX | fcntl.LOCK_NB)
238
+    return locked_file_descriptor
239
+
240
+
241
+def open_locked_blocking(*args, **kwargs):
242
+    """.. warning:: Needs to be documented (acquire exclusive lock file access). Blocks until file is free. deadlock!
243
+    """
244
+    import fcntl
245
+    locked_file_descriptor = open(*args, **kwargs)
246
+    fcntl.lockf(locked_file_descriptor, fcntl.LOCK_EX)
247
+    return locked_file_descriptor

Loading…
Cancel
Save