]> git.proxmox.com Git - mirror_linux-firmware.git/blame - contrib/process_linux_firmware.py
Make email replies more resilient
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
CommitLineData
4d619071
ML
1#!/usr/bin/python3
2import os
3import time
4import urllib.request
5import sqlite3
6import feedparser
7import argparse
8import logging
9import email
ecaeef5d
ML
10import email.utils
11import smtplib
4d619071
ML
12import subprocess
13import sys
ecaeef5d 14from datetime import date
4d619071 15from enum import Enum
4d619071
ML
16
17URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27content_types = {
cc8a7d10 28 "are available in the Git repository at": ContentType.PULL_REQUEST,
4d619071
ML
29 "diff --git": ContentType.PATCH,
30 "Signed-off-by:": ContentType.PATCH,
4d619071
ML
31}
32
33
34def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
8228c222
ML
37 decoded = None
38 body = None
4d619071
ML
39
40 # check the subject
41 subject = msg["Subject"]
42 if "Re:" in subject:
43 return ContentType.REPLY
44 if "PATCH" in subject:
45 return ContentType.PATCH
46
8228c222
ML
47 if msg.is_multipart():
48 for part in msg.walk():
49 if part.get_content_type() == "text/plain":
50 body = part.get_payload(decode=True)
51 else:
52 body = msg.get_payload(decode=True)
53
54 if body:
55 for encoding in ["utf-8", "windows-1252"]:
7bfa5f4d 56 try:
8228c222 57 decoded = body.decode(encoding)
7bfa5f4d 58 break
8228c222
ML
59 except UnicodeDecodeError:
60 pass
61
62 if decoded:
63 for key in content_types.keys():
64 if key in decoded:
65 return content_types[key]
66 else:
67 logging.warning("Failed to decode email: %s, treating as SPAM", body)
68
4d619071
ML
69 return ContentType.SPAM
70
71
72def fetch_url(url):
73 with urllib.request.urlopen(url) as response:
74 return response.read().decode("utf-8")
75
76
77def quiet_cmd(cmd):
78 logging.debug("Running {}".format(cmd))
79 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
80 logging.debug(output)
81
82
ecaeef5d 83def reply_email(content, branch):
bfd5f0b9
ML
84 user = None
85 password = None
86 server = None
87 port = None
88
ecaeef5d
ML
89 if "SMTP_USER" in os.environ:
90 user = os.environ["SMTP_USER"]
91 if "SMTP_PASS" in os.environ:
92 password = os.environ["SMTP_PASS"]
93 if "SMTP_SERVER" in os.environ:
94 server = os.environ["SMTP_SERVER"]
95 if "SMTP_PORT" in os.environ:
96 port = os.environ["SMTP_PORT"]
97 if not user or not password or not server or not port:
98 logging.debug("Missing SMTP configuration, not sending email")
99 return
100
101 reply = email.message.EmailMessage()
102
103 orig = email.message_from_string(content)
bfd5f0b9
ML
104 try:
105 reply["To"] = ", ".join(
106 email.utils.formataddr(t)
107 for t in email.utils.getaddresses(
108 orig.get_all("from", [])
109 + orig.get_all("to", [])
110 + orig.get_all("cc", [])
111 )
6ed75465 112 )
bfd5f0b9
ML
113 except ValueError:
114 logging.warning("Failed to parse email addresses, not sending email")
115 return
ecaeef5d
ML
116
117 reply["From"] = "linux-firmware@kernel.org"
bfd5f0b9
ML
118 try:
119 reply["Subject"] = "Re: {}".format(orig["Subject"])
120 except ValueError:
121 logging.warning("Failed to parse subject, not sending email")
122 return
123
ecaeef5d
ML
124 reply["In-Reply-To"] = orig["Message-Id"]
125 reply["References"] = orig["Message-Id"]
126 reply["Thread-Topic"] = orig["Thread-Topic"]
127 reply["Thread-Index"] = orig["Thread-Index"]
128
129 content = (
130 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
131 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
132 "Your request is '{}'".format(branch)
133 )
134 reply.set_content(content)
135
136 mailserver = smtplib.SMTP(server, port)
137 mailserver.ehlo()
138 mailserver.starttls()
139 mailserver.ehlo()
140 mailserver.login(user, password)
141 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
142 mailserver.quit()
143
144
4d619071
ML
145def create_pr(remote, branch):
146 cmd = [
147 "git",
148 "push",
149 "-u",
150 remote,
151 branch,
152 "-o",
153 "merge_request.create",
154 "-o",
155 "merge_request.remove_source_branch",
156 "-o",
157 "merge_request.target=main",
158 "-o",
159 "merge_request.title={}".format(branch),
160 ]
161 quiet_cmd(cmd)
162
163
164def refresh_branch():
165 quiet_cmd(["git", "checkout", "main"])
166 quiet_cmd(["git", "pull"])
167
168
169def delete_branch(branch):
170 quiet_cmd(["git", "checkout", "main"])
171 quiet_cmd(["git", "branch", "-D", branch])
172
173
1f8f61d5 174def process_pr(mbox, num, remote):
4d619071 175 branch = "robot/pr-{}-{}".format(num, int(time.time()))
1f8f61d5 176
d011ba69
ML
177 # manual fixup for PRs from drm firmware repo
178 if "git@gitlab.freedesktop.org:drm/firmware.git" in mbox:
179 mbox = mbox.replace(
180 "git@gitlab.freedesktop.org:drm/firmware.git",
181 "https://gitlab.freedesktop.org/drm/firmware.git",
182 )
183
1f8f61d5
ML
184 cmd = ["b4", "--debug", "pr", "-b", branch, "-"]
185 logging.debug("Running {}".format(cmd))
186 p = subprocess.Popen(
187 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
188 )
189 stdout, stderr = p.communicate(mbox.encode("utf-8"))
05ac293b
ML
190 for line in stdout.splitlines():
191 logging.debug(line.decode("utf-8"))
192 for line in stderr.splitlines():
193 logging.debug(line.decode("utf-8"))
4d619071
ML
194
195 # determine if it worked (we can't tell unfortunately by return code)
196 cmd = ["git", "branch", "--list", branch]
197 logging.debug("Running {}".format(cmd))
198 result = subprocess.check_output(cmd)
199
200 if result:
05ac293b
ML
201 for line in result.splitlines():
202 logging.debug(line.decode("utf-8"))
4d619071
ML
203 logging.info("Forwarding PR for {}".format(branch))
204 if remote:
205 create_pr(remote, branch)
206 delete_branch(branch)
ecaeef5d
ML
207 return branch
208 return None
4d619071
ML
209
210
211def process_patch(mbox, num, remote):
212 # create a new branch for the patch
213 branch = "robot/patch-{}-{}".format(num, int(time.time()))
214 cmd = ["git", "checkout", "-b", branch]
215 quiet_cmd(cmd)
216
217 # apply the patch
39d55392 218 cmd = ["git", "am"]
4d619071
ML
219 logging.debug("Running {}".format(cmd))
220 p = subprocess.Popen(
221 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
222 )
223 stdout, stderr = p.communicate(mbox.encode("utf-8"))
224 for line in stdout.splitlines():
225 logging.debug(line.decode("utf-8"))
226 for line in stderr.splitlines():
227 logging.debug(line.decode("utf-8"))
228 if p.returncode != 0:
229 quiet_cmd(["git", "am", "--abort"])
230 else:
231 logging.info("Opening PR for {}".format(branch))
232 if remote:
233 create_pr(remote, branch)
234
235 delete_branch(branch)
ecaeef5d
ML
236 if p.returncode == 0:
237 return branch
238 return None
4d619071
ML
239
240
241def update_database(conn, url):
242 c = conn.cursor()
243
244 c.execute(
245 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
246 )
247
248 # local file
249 if os.path.exists(url):
250 with open(url, "r") as f:
251 atom = f.read()
252 # remote file
253 else:
254 logging.info("Fetching {}".format(url))
255 atom = fetch_url(url)
256
257 # Parse the atom and extract the URLs
258 feed = feedparser.parse(atom)
259
260 # Insert the URLs into the database (oldest first)
261 feed["entries"].reverse()
262 for entry in feed["entries"]:
263 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
264 if c.fetchone():
265 continue
266 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
267
268 # Commit the changes and close the connection
269 conn.commit()
270
271
272def process_database(conn, remote):
273 c = conn.cursor()
274
275 # get all unprocessed urls that aren't spam
276 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
277 num = 0
278 msg = ""
279
280 rows = c.fetchall()
281
282 if not rows:
283 logging.info("No new entries")
284 return
285
286 refresh_branch()
287
288 # loop over all unprocessed urls
289 for row in rows:
ecaeef5d 290 branch = None
4d619071
ML
291 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
292 print(msg, end="\r", flush=True)
293
294 url = "{}raw".format(row[0])
295 logging.debug("Processing {}".format(url))
296 mbox = fetch_url(url)
297 classification = classify_content(mbox)
298
299 if classification == ContentType.PATCH:
300 logging.debug("Processing patch ({})".format(row[0]))
ecaeef5d 301 branch = process_patch(mbox, num, remote)
4d619071
ML
302
303 if classification == ContentType.PULL_REQUEST:
304 logging.debug("Processing PR ({})".format(row[0]))
1f8f61d5 305 branch = process_pr(mbox, num, remote)
4d619071
ML
306
307 if classification == ContentType.SPAM:
308 logging.debug("Marking spam ({})".format(row[0]))
309 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
310
311 if classification == ContentType.REPLY:
312 logging.debug("Ignoring reply ({})".format(row[0]))
313
314 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
315 num += 1
316 print(" " * len(msg), end="\r", flush=True)
317
318 # commit changes
319 conn.commit()
ecaeef5d
ML
320
321 # send any emails
322 if branch:
323 reply_email(mbox, branch)
324
4d619071
ML
325 logging.info("Finished processing {} new entries".format(len(rows)))
326
327
328if __name__ == "__main__":
329 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
330 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
331 parser.add_argument(
332 "--database",
333 default=os.path.join("contrib", "linux_firmware.db"),
334 help="sqlite database to store entries in",
335 )
336 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
337 parser.add_argument(
338 "--debug", action="store_true", help="Enable debug logging to console"
339 )
340 parser.add_argument("--remote", default="origin", help="Remote to push to")
341 parser.add_argument(
342 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
343 )
344 args = parser.parse_args()
345
346 if not os.path.exists("WHENCE"):
347 logging.critical(
348 "Please run this script from the root of the linux-firmware repository"
349 )
350 sys.exit(1)
351
352 log = os.path.join(
353 "contrib",
354 "{prefix}-{date}.{suffix}".format(
355 prefix="linux_firmware", suffix="txt", date=date.today()
356 ),
357 )
358 logging.basicConfig(
359 format="%(asctime)s %(levelname)s:\t%(message)s",
360 filename=log,
361 filemode="w",
362 level=logging.DEBUG,
363 )
364
365 # set a format which is simpler for console use
366 console = logging.StreamHandler()
367 if args.debug:
368 console.setLevel(logging.DEBUG)
369 else:
370 console.setLevel(logging.INFO)
371 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
372 console.setFormatter(formatter)
373 logging.getLogger("").addHandler(console)
374
375 while True:
376 conn = sqlite3.connect(args.database)
377 # update the database
378 update_database(conn, args.url)
379
380 if args.dry:
381 remote = ""
382 else:
383 remote = args.remote
384
385 # process the database
386 process_database(conn, remote)
387
388 conn.close()
389
390 if args.refresh_cycle:
391 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
392 time.sleep(int(args.refresh_cycle) * 60)
393 else:
394 break