]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | # Copyright 2003 Dave Abrahams |
2 | # Copyright 2001, 2002 Vladimir Prus | |
3 | # Copyright 2012 Jurko Gospodnetic | |
4 | # Distributed under the Boost Software License, Version 1.0. | |
1e59de90 TL |
5 | # (See accompanying file LICENSE.txt or copy at |
6 | # https://www.bfgroup.xyz/b2/LICENSE.txt) | |
7c673cae FG |
7 | |
8 | ############################################################################### | |
9 | # | |
10 | # Based in part on an old Subversion tree.py source file (tools for comparing | |
11 | # directory trees). See http://subversion.tigris.org for more information. | |
12 | # | |
13 | # Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved. | |
14 | # | |
15 | # This software is licensed as described in the file COPYING, which you should | |
16 | # have received as part of this distribution. The terms are also available at | |
17 | # http://subversion.tigris.org/license-1.html. If newer versions of this | |
18 | # license are posted there, you may use a newer version instead, at your | |
19 | # option. | |
20 | # | |
21 | ############################################################################### | |
22 | ||
92f5a8d4 TL |
23 | from __future__ import print_function |
24 | ||
7c673cae FG |
25 | import os |
26 | import os.path | |
27 | import stat | |
28 | import sys | |
29 | ||
30 | ||
31 | class TreeNode: | |
32 | """ | |
33 | Fundamental data type used to build file system tree structures. | |
34 | ||
35 | If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN | |
36 | is a list of the nodes representing that directory's children. | |
37 | ||
38 | NAME is simply the name of the file or directory. CONTENTS is a string | |
39 | holding the file's contents (if a file). | |
40 | ||
41 | """ | |
42 | ||
43 | def __init__(self, name, children=None, contents=None): | |
44 | assert children is None or contents is None | |
45 | self.name = name | |
46 | self.mtime = 0 | |
47 | self.children = children | |
48 | self.contents = contents | |
49 | self.path = name | |
50 | ||
51 | def add_child(self, newchild): | |
52 | assert not self.is_file() | |
53 | for a in self.children: | |
54 | if a.name == newchild.name: | |
55 | if newchild.is_file(): | |
56 | a.contents = newchild.contents | |
57 | a.path = os.path.join(self.path, newchild.name) | |
58 | else: | |
59 | for i in newchild.children: | |
60 | a.add_child(i) | |
61 | break | |
62 | else: | |
63 | self.children.append(newchild) | |
64 | newchild.path = os.path.join(self.path, newchild.name) | |
65 | ||
66 | def get_child(self, name): | |
67 | """ | |
68 | If the given TreeNode directory NODE contains a child named NAME, | |
69 | return the child; else, return None. | |
70 | ||
71 | """ | |
72 | for n in self.children: | |
73 | if n.name == name: | |
74 | return n | |
75 | ||
76 | def is_file(self): | |
77 | return self.children is None | |
78 | ||
79 | def pprint(self): | |
80 | print(" * Node name: %s" % self.name) | |
81 | print(" Path: %s" % self.path) | |
82 | print(" Contents: %s" % self.contents) | |
83 | if self.is_file(): | |
84 | print(" Children: is a file.") | |
85 | else: | |
86 | print(" Children: %d" % len(self.children)) | |
87 | ||
88 | ||
89 | class TreeDifference: | |
90 | def __init__(self): | |
91 | self.added_files = [] | |
92 | self.removed_files = [] | |
93 | self.modified_files = [] | |
94 | self.touched_files = [] | |
95 | ||
96 | def append(self, other): | |
97 | self.added_files.extend(other.added_files) | |
98 | self.removed_files.extend(other.removed_files) | |
99 | self.modified_files.extend(other.modified_files) | |
100 | self.touched_files.extend(other.touched_files) | |
101 | ||
102 | def ignore_directories(self): | |
103 | """Removes directories from our lists of found differences.""" | |
104 | not_dir = lambda x : x[-1] != "/" | |
92f5a8d4 TL |
105 | self.added_files = list(filter(not_dir, self.added_files)) |
106 | self.removed_files = list(filter(not_dir, self.removed_files)) | |
107 | self.modified_files = list(filter(not_dir, self.modified_files)) | |
108 | self.touched_files = list(filter(not_dir, self.touched_files)) | |
7c673cae FG |
109 | |
110 | def pprint(self, file=sys.stdout): | |
111 | file.write("Added files : %s\n" % self.added_files) | |
112 | file.write("Removed files : %s\n" % self.removed_files) | |
113 | file.write("Modified files: %s\n" % self.modified_files) | |
114 | file.write("Touched files : %s\n" % self.touched_files) | |
115 | ||
116 | def empty(self): | |
117 | return not (self.added_files or self.removed_files or | |
118 | self.modified_files or self.touched_files) | |
119 | ||
120 | ||
121 | def build_tree(path): | |
122 | """ | |
123 | Takes PATH as the folder path, walks the file system below that path, and | |
124 | creates a tree structure based on any files and folders found there. | |
125 | Returns the prepared tree structure plus the maximum file modification | |
126 | timestamp under the given folder. | |
127 | ||
128 | """ | |
129 | return _handle_dir(os.path.normpath(path)) | |
130 | ||
131 | ||
132 | def tree_difference(a, b): | |
133 | """Compare TreeNodes A and B, and create a TreeDifference instance.""" | |
134 | return _do_tree_difference(a, b, "", True) | |
135 | ||
136 | ||
137 | def _do_tree_difference(a, b, parent_path, root=False): | |
138 | """Internal recursive worker function for tree_difference().""" | |
139 | ||
140 | # We do not want to list root node names. | |
141 | if root: | |
142 | assert not parent_path | |
143 | assert not a.is_file() | |
144 | assert not b.is_file() | |
145 | full_path = "" | |
146 | else: | |
147 | assert a.name == b.name | |
148 | full_path = parent_path + a.name | |
149 | result = TreeDifference() | |
150 | ||
151 | # A and B are both files. | |
152 | if a.is_file() and b.is_file(): | |
153 | if a.contents != b.contents: | |
154 | result.modified_files.append(full_path) | |
155 | elif a.mtime != b.mtime: | |
156 | result.touched_files.append(full_path) | |
157 | return result | |
158 | ||
159 | # Directory converted to file. | |
160 | if not a.is_file() and b.is_file(): | |
161 | result.removed_files.extend(_traverse_tree(a, parent_path)) | |
162 | result.added_files.append(full_path) | |
163 | ||
164 | # File converted to directory. | |
165 | elif a.is_file() and not b.is_file(): | |
166 | result.removed_files.append(full_path) | |
167 | result.added_files.extend(_traverse_tree(b, parent_path)) | |
168 | ||
169 | # A and B are both directories. | |
170 | else: | |
171 | if full_path: | |
172 | full_path += "/" | |
173 | accounted_for = [] # Children present in both trees. | |
174 | for a_child in a.children: | |
175 | b_child = b.get_child(a_child.name) | |
176 | if b_child: | |
177 | accounted_for.append(b_child) | |
178 | result.append(_do_tree_difference(a_child, b_child, full_path)) | |
179 | else: | |
180 | result.removed_files.append(full_path + a_child.name) | |
181 | for b_child in b.children: | |
182 | if b_child not in accounted_for: | |
183 | result.added_files.extend(_traverse_tree(b_child, full_path)) | |
184 | ||
185 | return result | |
186 | ||
187 | ||
188 | def _traverse_tree(t, parent_path): | |
189 | """Returns a list of all names in a tree.""" | |
190 | assert not parent_path or parent_path[-1] == "/" | |
191 | full_node_name = parent_path + t.name | |
192 | if t.is_file(): | |
193 | result = [full_node_name] | |
194 | else: | |
195 | name_prefix = full_node_name + "/" | |
196 | result = [name_prefix] | |
197 | for i in t.children: | |
198 | result.extend(_traverse_tree(i, name_prefix)) | |
199 | return result | |
200 | ||
201 | ||
202 | def _get_text(path): | |
203 | """Return a string with the textual contents of a file at PATH.""" | |
92f5a8d4 | 204 | fp = open(path, 'rb') |
7c673cae FG |
205 | try: |
206 | return fp.read() | |
207 | finally: | |
208 | fp.close() | |
209 | ||
210 | ||
211 | def _handle_dir(path): | |
212 | """ | |
213 | Main recursive worker function for build_tree(). Returns a newly created | |
214 | tree node representing the given normalized folder path as well as the | |
215 | maximum file/folder modification time detected under the same path. | |
216 | ||
217 | """ | |
218 | files = [] | |
219 | dirs = [] | |
220 | node = TreeNode(os.path.basename(path), children=[]) | |
221 | max_mtime = node.mtime = os.stat(path).st_mtime | |
222 | ||
223 | # List files & folders. | |
224 | for f in os.listdir(path): | |
225 | f = os.path.join(path, f) | |
226 | if os.path.isdir(f): | |
227 | dirs.append(f) | |
228 | elif os.path.isfile(f): | |
229 | files.append(f) | |
230 | ||
231 | # Add a child node for each file. | |
232 | for f in files: | |
233 | fcontents = _get_text(f) | |
234 | new_file_node = TreeNode(os.path.basename(f), contents=fcontents) | |
235 | new_file_node.mtime = os.stat(f).st_mtime | |
236 | max_mtime = max(max_mtime, new_file_node.mtime) | |
237 | node.add_child(new_file_node) | |
238 | ||
239 | # For each subdir, create a node, walk its tree, add it as a child. | |
240 | for d in dirs: | |
241 | new_dir_node, new_max_mtime = _handle_dir(d) | |
242 | max_mtime = max(max_mtime, new_max_mtime) | |
243 | node.add_child(new_dir_node) | |
244 | ||
245 | return node, max_mtime |