]>
Commit | Line | Data |
---|---|---|
4d619071 ML |
1 | #!/usr/bin/python3 |
2 | import os | |
3 | import time | |
4 | import urllib.request | |
5 | import sqlite3 | |
6 | import feedparser | |
7 | import argparse | |
8 | import logging | |
9 | import email | |
ecaeef5d ML |
10 | import email.utils |
11 | import smtplib | |
4d619071 ML |
12 | import subprocess |
13 | import sys | |
ecaeef5d | 14 | from datetime import date |
4d619071 | 15 | from enum import Enum |
4d619071 ML |
16 | |
17 | URL = "https://lore.kernel.org/linux-firmware/new.atom" | |
18 | ||
19 | ||
20 | class ContentType(Enum): | |
21 | REPLY = 1 | |
22 | PATCH = 2 | |
23 | PULL_REQUEST = 3 | |
24 | SPAM = 4 | |
25 | ||
26 | ||
27 | content_types = { | |
cc8a7d10 | 28 | "are available in the Git repository at": ContentType.PULL_REQUEST, |
4d619071 ML |
29 | "diff --git": ContentType.PATCH, |
30 | "Signed-off-by:": ContentType.PATCH, | |
4d619071 ML |
31 | } |
32 | ||
33 | ||
34 | def classify_content(content): | |
35 | # load content into the email library | |
36 | msg = email.message_from_string(content) | |
8228c222 ML |
37 | decoded = None |
38 | body = None | |
4d619071 ML |
39 | |
40 | # check the subject | |
41 | subject = msg["Subject"] | |
42 | if "Re:" in subject: | |
43 | return ContentType.REPLY | |
44 | if "PATCH" in subject: | |
45 | return ContentType.PATCH | |
46 | ||
8228c222 ML |
47 | if msg.is_multipart(): |
48 | for part in msg.walk(): | |
49 | if part.get_content_type() == "text/plain": | |
50 | body = part.get_payload(decode=True) | |
51 | else: | |
52 | body = msg.get_payload(decode=True) | |
53 | ||
54 | if body: | |
55 | for encoding in ["utf-8", "windows-1252"]: | |
7bfa5f4d | 56 | try: |
8228c222 | 57 | decoded = body.decode(encoding) |
7bfa5f4d | 58 | break |
8228c222 ML |
59 | except UnicodeDecodeError: |
60 | pass | |
61 | ||
62 | if decoded: | |
63 | for key in content_types.keys(): | |
64 | if key in decoded: | |
65 | return content_types[key] | |
66 | else: | |
67 | logging.warning("Failed to decode email: %s, treating as SPAM", body) | |
68 | ||
4d619071 ML |
69 | return ContentType.SPAM |
70 | ||
71 | ||
72 | def fetch_url(url): | |
73 | with urllib.request.urlopen(url) as response: | |
74 | return response.read().decode("utf-8") | |
75 | ||
76 | ||
77 | def quiet_cmd(cmd): | |
78 | logging.debug("Running {}".format(cmd)) | |
79 | output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) | |
80 | logging.debug(output) | |
81 | ||
82 | ||
ecaeef5d | 83 | def reply_email(content, branch): |
bfd5f0b9 ML |
84 | user = None |
85 | password = None | |
86 | server = None | |
87 | port = None | |
88 | ||
ecaeef5d ML |
89 | if "SMTP_USER" in os.environ: |
90 | user = os.environ["SMTP_USER"] | |
91 | if "SMTP_PASS" in os.environ: | |
92 | password = os.environ["SMTP_PASS"] | |
93 | if "SMTP_SERVER" in os.environ: | |
94 | server = os.environ["SMTP_SERVER"] | |
95 | if "SMTP_PORT" in os.environ: | |
96 | port = os.environ["SMTP_PORT"] | |
97 | if not user or not password or not server or not port: | |
98 | logging.debug("Missing SMTP configuration, not sending email") | |
99 | return | |
100 | ||
101 | reply = email.message.EmailMessage() | |
102 | ||
103 | orig = email.message_from_string(content) | |
bfd5f0b9 ML |
104 | try: |
105 | reply["To"] = ", ".join( | |
106 | email.utils.formataddr(t) | |
107 | for t in email.utils.getaddresses( | |
108 | orig.get_all("from", []) | |
109 | + orig.get_all("to", []) | |
110 | + orig.get_all("cc", []) | |
111 | ) | |
6ed75465 | 112 | ) |
bfd5f0b9 ML |
113 | except ValueError: |
114 | logging.warning("Failed to parse email addresses, not sending email") | |
115 | return | |
ecaeef5d ML |
116 | |
117 | reply["From"] = "linux-firmware@kernel.org" | |
bfd5f0b9 ML |
118 | try: |
119 | reply["Subject"] = "Re: {}".format(orig["Subject"]) | |
120 | except ValueError: | |
121 | logging.warning("Failed to parse subject, not sending email") | |
122 | return | |
123 | ||
ecaeef5d ML |
124 | reply["In-Reply-To"] = orig["Message-Id"] |
125 | reply["References"] = orig["Message-Id"] | |
126 | reply["Thread-Topic"] = orig["Thread-Topic"] | |
127 | reply["Thread-Index"] = orig["Thread-Index"] | |
128 | ||
129 | content = ( | |
130 | "Your request has been forwarded by the Linux Firmware Kernel robot.\n" | |
131 | "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n" | |
132 | "Your request is '{}'".format(branch) | |
133 | ) | |
134 | reply.set_content(content) | |
135 | ||
136 | mailserver = smtplib.SMTP(server, port) | |
137 | mailserver.ehlo() | |
138 | mailserver.starttls() | |
139 | mailserver.ehlo() | |
140 | mailserver.login(user, password) | |
141 | mailserver.sendmail(reply["From"], reply["To"], reply.as_string()) | |
142 | mailserver.quit() | |
143 | ||
144 | ||
4d619071 ML |
145 | def create_pr(remote, branch): |
146 | cmd = [ | |
147 | "git", | |
148 | "push", | |
149 | "-u", | |
150 | remote, | |
151 | branch, | |
152 | "-o", | |
153 | "merge_request.create", | |
154 | "-o", | |
155 | "merge_request.remove_source_branch", | |
156 | "-o", | |
157 | "merge_request.target=main", | |
158 | "-o", | |
159 | "merge_request.title={}".format(branch), | |
160 | ] | |
161 | quiet_cmd(cmd) | |
162 | ||
163 | ||
164 | def refresh_branch(): | |
165 | quiet_cmd(["git", "checkout", "main"]) | |
166 | quiet_cmd(["git", "pull"]) | |
167 | ||
168 | ||
169 | def delete_branch(branch): | |
170 | quiet_cmd(["git", "checkout", "main"]) | |
171 | quiet_cmd(["git", "branch", "-D", branch]) | |
172 | ||
173 | ||
1f8f61d5 | 174 | def process_pr(mbox, num, remote): |
4d619071 | 175 | branch = "robot/pr-{}-{}".format(num, int(time.time())) |
1f8f61d5 | 176 | |
d011ba69 ML |
177 | # manual fixup for PRs from drm firmware repo |
178 | if "git@gitlab.freedesktop.org:drm/firmware.git" in mbox: | |
179 | mbox = mbox.replace( | |
180 | "git@gitlab.freedesktop.org:drm/firmware.git", | |
181 | "https://gitlab.freedesktop.org/drm/firmware.git", | |
182 | ) | |
183 | ||
1f8f61d5 ML |
184 | cmd = ["b4", "--debug", "pr", "-b", branch, "-"] |
185 | logging.debug("Running {}".format(cmd)) | |
186 | p = subprocess.Popen( | |
187 | cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
188 | ) | |
189 | stdout, stderr = p.communicate(mbox.encode("utf-8")) | |
05ac293b ML |
190 | for line in stdout.splitlines(): |
191 | logging.debug(line.decode("utf-8")) | |
192 | for line in stderr.splitlines(): | |
193 | logging.debug(line.decode("utf-8")) | |
4d619071 ML |
194 | |
195 | # determine if it worked (we can't tell unfortunately by return code) | |
196 | cmd = ["git", "branch", "--list", branch] | |
197 | logging.debug("Running {}".format(cmd)) | |
198 | result = subprocess.check_output(cmd) | |
199 | ||
200 | if result: | |
05ac293b ML |
201 | for line in result.splitlines(): |
202 | logging.debug(line.decode("utf-8")) | |
4d619071 ML |
203 | logging.info("Forwarding PR for {}".format(branch)) |
204 | if remote: | |
205 | create_pr(remote, branch) | |
206 | delete_branch(branch) | |
ecaeef5d ML |
207 | return branch |
208 | return None | |
4d619071 ML |
209 | |
210 | ||
211 | def process_patch(mbox, num, remote): | |
212 | # create a new branch for the patch | |
213 | branch = "robot/patch-{}-{}".format(num, int(time.time())) | |
214 | cmd = ["git", "checkout", "-b", branch] | |
215 | quiet_cmd(cmd) | |
216 | ||
217 | # apply the patch | |
39d55392 | 218 | cmd = ["git", "am"] |
4d619071 ML |
219 | logging.debug("Running {}".format(cmd)) |
220 | p = subprocess.Popen( | |
221 | cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
222 | ) | |
223 | stdout, stderr = p.communicate(mbox.encode("utf-8")) | |
224 | for line in stdout.splitlines(): | |
225 | logging.debug(line.decode("utf-8")) | |
226 | for line in stderr.splitlines(): | |
227 | logging.debug(line.decode("utf-8")) | |
228 | if p.returncode != 0: | |
229 | quiet_cmd(["git", "am", "--abort"]) | |
230 | else: | |
231 | logging.info("Opening PR for {}".format(branch)) | |
232 | if remote: | |
233 | create_pr(remote, branch) | |
234 | ||
235 | delete_branch(branch) | |
ecaeef5d ML |
236 | if p.returncode == 0: |
237 | return branch | |
238 | return None | |
4d619071 ML |
239 | |
240 | ||
241 | def update_database(conn, url): | |
242 | c = conn.cursor() | |
243 | ||
244 | c.execute( | |
245 | """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)""" | |
246 | ) | |
247 | ||
248 | # local file | |
249 | if os.path.exists(url): | |
250 | with open(url, "r") as f: | |
251 | atom = f.read() | |
252 | # remote file | |
253 | else: | |
254 | logging.info("Fetching {}".format(url)) | |
255 | atom = fetch_url(url) | |
256 | ||
257 | # Parse the atom and extract the URLs | |
258 | feed = feedparser.parse(atom) | |
259 | ||
260 | # Insert the URLs into the database (oldest first) | |
261 | feed["entries"].reverse() | |
262 | for entry in feed["entries"]: | |
263 | c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,)) | |
264 | if c.fetchone(): | |
265 | continue | |
266 | c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0)) | |
267 | ||
268 | # Commit the changes and close the connection | |
269 | conn.commit() | |
270 | ||
271 | ||
272 | def process_database(conn, remote): | |
273 | c = conn.cursor() | |
274 | ||
275 | # get all unprocessed urls that aren't spam | |
276 | c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0") | |
277 | num = 0 | |
278 | msg = "" | |
279 | ||
280 | rows = c.fetchall() | |
281 | ||
282 | if not rows: | |
283 | logging.info("No new entries") | |
284 | return | |
285 | ||
286 | refresh_branch() | |
287 | ||
288 | # loop over all unprocessed urls | |
289 | for row in rows: | |
ecaeef5d | 290 | branch = None |
4d619071 ML |
291 | msg = "Processing ({}%)".format(round(num / len(rows) * 100)) |
292 | print(msg, end="\r", flush=True) | |
293 | ||
294 | url = "{}raw".format(row[0]) | |
295 | logging.debug("Processing {}".format(url)) | |
296 | mbox = fetch_url(url) | |
297 | classification = classify_content(mbox) | |
298 | ||
299 | if classification == ContentType.PATCH: | |
300 | logging.debug("Processing patch ({})".format(row[0])) | |
ecaeef5d | 301 | branch = process_patch(mbox, num, remote) |
4d619071 ML |
302 | |
303 | if classification == ContentType.PULL_REQUEST: | |
304 | logging.debug("Processing PR ({})".format(row[0])) | |
1f8f61d5 | 305 | branch = process_pr(mbox, num, remote) |
4d619071 ML |
306 | |
307 | if classification == ContentType.SPAM: | |
308 | logging.debug("Marking spam ({})".format(row[0])) | |
309 | c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],)) | |
310 | ||
311 | if classification == ContentType.REPLY: | |
312 | logging.debug("Ignoring reply ({})".format(row[0])) | |
313 | ||
314 | c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],)) | |
315 | num += 1 | |
316 | print(" " * len(msg), end="\r", flush=True) | |
317 | ||
318 | # commit changes | |
319 | conn.commit() | |
ecaeef5d ML |
320 | |
321 | # send any emails | |
322 | if branch: | |
323 | reply_email(mbox, branch) | |
324 | ||
4d619071 ML |
325 | logging.info("Finished processing {} new entries".format(len(rows))) |
326 | ||
327 | ||
328 | if __name__ == "__main__": | |
329 | parser = argparse.ArgumentParser(description="Process linux-firmware mailing list") | |
330 | parser.add_argument("--url", default=URL, help="URL to get ATOM feed from") | |
331 | parser.add_argument( | |
332 | "--database", | |
333 | default=os.path.join("contrib", "linux_firmware.db"), | |
334 | help="sqlite database to store entries in", | |
335 | ) | |
336 | parser.add_argument("--dry", action="store_true", help="Don't open pull requests") | |
337 | parser.add_argument( | |
338 | "--debug", action="store_true", help="Enable debug logging to console" | |
339 | ) | |
340 | parser.add_argument("--remote", default="origin", help="Remote to push to") | |
341 | parser.add_argument( | |
342 | "--refresh-cycle", default=0, help="How frequently to run (in minutes)" | |
343 | ) | |
344 | args = parser.parse_args() | |
345 | ||
346 | if not os.path.exists("WHENCE"): | |
347 | logging.critical( | |
348 | "Please run this script from the root of the linux-firmware repository" | |
349 | ) | |
350 | sys.exit(1) | |
351 | ||
352 | log = os.path.join( | |
353 | "contrib", | |
354 | "{prefix}-{date}.{suffix}".format( | |
355 | prefix="linux_firmware", suffix="txt", date=date.today() | |
356 | ), | |
357 | ) | |
358 | logging.basicConfig( | |
359 | format="%(asctime)s %(levelname)s:\t%(message)s", | |
360 | filename=log, | |
361 | filemode="w", | |
362 | level=logging.DEBUG, | |
363 | ) | |
364 | ||
365 | # set a format which is simpler for console use | |
366 | console = logging.StreamHandler() | |
367 | if args.debug: | |
368 | console.setLevel(logging.DEBUG) | |
369 | else: | |
370 | console.setLevel(logging.INFO) | |
371 | formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s") | |
372 | console.setFormatter(formatter) | |
373 | logging.getLogger("").addHandler(console) | |
374 | ||
375 | while True: | |
376 | conn = sqlite3.connect(args.database) | |
377 | # update the database | |
378 | update_database(conn, args.url) | |
379 | ||
380 | if args.dry: | |
381 | remote = "" | |
382 | else: | |
383 | remote = args.remote | |
384 | ||
385 | # process the database | |
386 | process_database(conn, remote) | |
387 | ||
388 | conn.close() | |
389 | ||
390 | if args.refresh_cycle: | |
391 | logging.info("Sleeping for {} minutes".format(args.refresh_cycle)) | |
392 | time.sleep(int(args.refresh_cycle) * 60) | |
393 | else: | |
394 | break |