]> git.proxmox.com Git - mirror_linux-firmware.git/blob - contrib/process_linux_firmware.py
668e35c0eb0600fcd4cde19c560a4b32a8dd8542
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
1 #!/usr/bin/python3
2 import os
3 import time
4 import urllib.request
5 import sqlite3
6 import feedparser
7 import argparse
8 import logging
9 import email
10 import email.utils
11 import smtplib
12 import subprocess
13 import sys
14 from datetime import date
15 from enum import Enum
16
17 URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20 class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27 content_types = {
28 "are available in the Git repository at": ContentType.PULL_REQUEST,
29 "diff --git": ContentType.PATCH,
30 "Signed-off-by:": ContentType.PATCH,
31 }
32
33
34 def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
37
38 # check the subject
39 subject = msg["Subject"]
40 if "Re:" in subject:
41 return ContentType.REPLY
42 if "PATCH" in subject:
43 return ContentType.PATCH
44
45 for part in msg.walk():
46 if part.get_content_type() == "text/plain":
47 try:
48 body = part.get_payload(decode=True).decode("utf-8")
49 for key in content_types.keys():
50 if key in body:
51 return content_types[key]
52 break
53 except UnicodeDecodeError as e:
54 logging.warning("Failed to decode email: %s, treating as SPAM" % e)
55 break
56 return ContentType.SPAM
57
58
59 def fetch_url(url):
60 with urllib.request.urlopen(url) as response:
61 return response.read().decode("utf-8")
62
63
64 def quiet_cmd(cmd):
65 logging.debug("Running {}".format(cmd))
66 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
67 logging.debug(output)
68
69
70 def reply_email(content, branch):
71 if "SMTP_USER" in os.environ:
72 user = os.environ["SMTP_USER"]
73 if "SMTP_PASS" in os.environ:
74 password = os.environ["SMTP_PASS"]
75 if "SMTP_SERVER" in os.environ:
76 server = os.environ["SMTP_SERVER"]
77 if "SMTP_PORT" in os.environ:
78 port = os.environ["SMTP_PORT"]
79 if not user or not password or not server or not port:
80 logging.debug("Missing SMTP configuration, not sending email")
81 return
82
83 reply = email.message.EmailMessage()
84
85 orig = email.message_from_string(content)
86 reply["To"] = ", ".join(
87 email.utils.formataddr(t)
88 for t in email.utils.getaddresses(
89 orig.get_all("from", []) + orig.get_all("to", []) + orig.get_all("cc", [])
90 )
91 )
92
93 reply["From"] = "linux-firmware@kernel.org"
94 reply["Subject"] = "Re: {}".format(orig["Subject"])
95 reply["In-Reply-To"] = orig["Message-Id"]
96 reply["References"] = orig["Message-Id"]
97 reply["Thread-Topic"] = orig["Thread-Topic"]
98 reply["Thread-Index"] = orig["Thread-Index"]
99
100 content = (
101 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
102 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
103 "Your request is '{}'".format(branch)
104 )
105 reply.set_content(content)
106
107 mailserver = smtplib.SMTP(server, port)
108 mailserver.ehlo()
109 mailserver.starttls()
110 mailserver.ehlo()
111 mailserver.login(user, password)
112 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
113 mailserver.quit()
114
115
116 def create_pr(remote, branch):
117 cmd = [
118 "git",
119 "push",
120 "-u",
121 remote,
122 branch,
123 "-o",
124 "merge_request.create",
125 "-o",
126 "merge_request.remove_source_branch",
127 "-o",
128 "merge_request.target=main",
129 "-o",
130 "merge_request.title={}".format(branch),
131 ]
132 quiet_cmd(cmd)
133
134
135 def refresh_branch():
136 quiet_cmd(["git", "checkout", "main"])
137 quiet_cmd(["git", "pull"])
138
139
140 def delete_branch(branch):
141 quiet_cmd(["git", "checkout", "main"])
142 quiet_cmd(["git", "branch", "-D", branch])
143
144
145 def process_pr(mbox, num, remote):
146 branch = "robot/pr-{}-{}".format(num, int(time.time()))
147
148 # manual fixup for PRs from drm firmware repo
149 if "git@gitlab.freedesktop.org:drm/firmware.git" in mbox:
150 mbox = mbox.replace(
151 "git@gitlab.freedesktop.org:drm/firmware.git",
152 "https://gitlab.freedesktop.org/drm/firmware.git",
153 )
154
155 cmd = ["b4", "--debug", "pr", "-b", branch, "-"]
156 logging.debug("Running {}".format(cmd))
157 p = subprocess.Popen(
158 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
159 )
160 stdout, stderr = p.communicate(mbox.encode("utf-8"))
161 for line in stdout.splitlines():
162 logging.debug(line.decode("utf-8"))
163 for line in stderr.splitlines():
164 logging.debug(line.decode("utf-8"))
165
166 # determine if it worked (we can't tell unfortunately by return code)
167 cmd = ["git", "branch", "--list", branch]
168 logging.debug("Running {}".format(cmd))
169 result = subprocess.check_output(cmd)
170
171 if result:
172 for line in result.splitlines():
173 logging.debug(line.decode("utf-8"))
174 logging.info("Forwarding PR for {}".format(branch))
175 if remote:
176 create_pr(remote, branch)
177 delete_branch(branch)
178 return branch
179 return None
180
181
182 def process_patch(mbox, num, remote):
183 # create a new branch for the patch
184 branch = "robot/patch-{}-{}".format(num, int(time.time()))
185 cmd = ["git", "checkout", "-b", branch]
186 quiet_cmd(cmd)
187
188 # apply the patch
189 cmd = ["git", "am"]
190 logging.debug("Running {}".format(cmd))
191 p = subprocess.Popen(
192 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
193 )
194 stdout, stderr = p.communicate(mbox.encode("utf-8"))
195 for line in stdout.splitlines():
196 logging.debug(line.decode("utf-8"))
197 for line in stderr.splitlines():
198 logging.debug(line.decode("utf-8"))
199 if p.returncode != 0:
200 quiet_cmd(["git", "am", "--abort"])
201 else:
202 logging.info("Opening PR for {}".format(branch))
203 if remote:
204 create_pr(remote, branch)
205
206 delete_branch(branch)
207 if p.returncode == 0:
208 return branch
209 return None
210
211
212 def update_database(conn, url):
213 c = conn.cursor()
214
215 c.execute(
216 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
217 )
218
219 # local file
220 if os.path.exists(url):
221 with open(url, "r") as f:
222 atom = f.read()
223 # remote file
224 else:
225 logging.info("Fetching {}".format(url))
226 atom = fetch_url(url)
227
228 # Parse the atom and extract the URLs
229 feed = feedparser.parse(atom)
230
231 # Insert the URLs into the database (oldest first)
232 feed["entries"].reverse()
233 for entry in feed["entries"]:
234 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
235 if c.fetchone():
236 continue
237 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
238
239 # Commit the changes and close the connection
240 conn.commit()
241
242
243 def process_database(conn, remote):
244 c = conn.cursor()
245
246 # get all unprocessed urls that aren't spam
247 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
248 num = 0
249 msg = ""
250
251 rows = c.fetchall()
252
253 if not rows:
254 logging.info("No new entries")
255 return
256
257 refresh_branch()
258
259 # loop over all unprocessed urls
260 for row in rows:
261 branch = None
262 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
263 print(msg, end="\r", flush=True)
264
265 url = "{}raw".format(row[0])
266 logging.debug("Processing {}".format(url))
267 mbox = fetch_url(url)
268 classification = classify_content(mbox)
269
270 if classification == ContentType.PATCH:
271 logging.debug("Processing patch ({})".format(row[0]))
272 branch = process_patch(mbox, num, remote)
273
274 if classification == ContentType.PULL_REQUEST:
275 logging.debug("Processing PR ({})".format(row[0]))
276 branch = process_pr(mbox, num, remote)
277
278 if classification == ContentType.SPAM:
279 logging.debug("Marking spam ({})".format(row[0]))
280 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
281
282 if classification == ContentType.REPLY:
283 logging.debug("Ignoring reply ({})".format(row[0]))
284
285 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
286 num += 1
287 print(" " * len(msg), end="\r", flush=True)
288
289 # commit changes
290 conn.commit()
291
292 # send any emails
293 if branch:
294 reply_email(mbox, branch)
295
296 logging.info("Finished processing {} new entries".format(len(rows)))
297
298
299 if __name__ == "__main__":
300 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
301 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
302 parser.add_argument(
303 "--database",
304 default=os.path.join("contrib", "linux_firmware.db"),
305 help="sqlite database to store entries in",
306 )
307 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
308 parser.add_argument(
309 "--debug", action="store_true", help="Enable debug logging to console"
310 )
311 parser.add_argument("--remote", default="origin", help="Remote to push to")
312 parser.add_argument(
313 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
314 )
315 args = parser.parse_args()
316
317 if not os.path.exists("WHENCE"):
318 logging.critical(
319 "Please run this script from the root of the linux-firmware repository"
320 )
321 sys.exit(1)
322
323 log = os.path.join(
324 "contrib",
325 "{prefix}-{date}.{suffix}".format(
326 prefix="linux_firmware", suffix="txt", date=date.today()
327 ),
328 )
329 logging.basicConfig(
330 format="%(asctime)s %(levelname)s:\t%(message)s",
331 filename=log,
332 filemode="w",
333 level=logging.DEBUG,
334 )
335
336 # set a format which is simpler for console use
337 console = logging.StreamHandler()
338 if args.debug:
339 console.setLevel(logging.DEBUG)
340 else:
341 console.setLevel(logging.INFO)
342 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
343 console.setFormatter(formatter)
344 logging.getLogger("").addHandler(console)
345
346 while True:
347 conn = sqlite3.connect(args.database)
348 # update the database
349 update_database(conn, args.url)
350
351 if args.dry:
352 remote = ""
353 else:
354 remote = args.remote
355
356 # process the database
357 process_database(conn, remote)
358
359 conn.close()
360
361 if args.refresh_cycle:
362 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
363 time.sleep(int(args.refresh_cycle) * 60)
364 else:
365 break