]> git.proxmox.com Git - mirror_linux-firmware.git/blame - contrib/process_linux_firmware.py
Use `git am` instead of `b4 shazam`
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
CommitLineData
4d619071
ML
1#!/usr/bin/python3
2import os
3import time
4import urllib.request
5import sqlite3
6import feedparser
7import argparse
8import logging
9import email
ecaeef5d
ML
10import email.utils
11import smtplib
4d619071
ML
12import subprocess
13import sys
ecaeef5d 14from datetime import date
4d619071 15from enum import Enum
4d619071
ML
16
17URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27content_types = {
28 "diff --git": ContentType.PATCH,
29 "Signed-off-by:": ContentType.PATCH,
30 "are available in the Git repository at": ContentType.PULL_REQUEST,
31}
32
33
34def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
37
38 # check the subject
39 subject = msg["Subject"]
40 if "Re:" in subject:
41 return ContentType.REPLY
42 if "PATCH" in subject:
43 return ContentType.PATCH
44
45 for part in msg.walk():
46 if part.get_content_type() == "text/plain":
47 body = part.get_payload(decode=True).decode("utf-8")
48 for key in content_types.keys():
49 if key in body:
50 return content_types[key]
51 break
52 return ContentType.SPAM
53
54
55def fetch_url(url):
56 with urllib.request.urlopen(url) as response:
57 return response.read().decode("utf-8")
58
59
60def quiet_cmd(cmd):
61 logging.debug("Running {}".format(cmd))
62 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
63 logging.debug(output)
64
65
ecaeef5d
ML
66def reply_email(content, branch):
67 if "SMTP_USER" in os.environ:
68 user = os.environ["SMTP_USER"]
69 if "SMTP_PASS" in os.environ:
70 password = os.environ["SMTP_PASS"]
71 if "SMTP_SERVER" in os.environ:
72 server = os.environ["SMTP_SERVER"]
73 if "SMTP_PORT" in os.environ:
74 port = os.environ["SMTP_PORT"]
75 if not user or not password or not server or not port:
76 logging.debug("Missing SMTP configuration, not sending email")
77 return
78
79 reply = email.message.EmailMessage()
80
81 orig = email.message_from_string(content)
6ed75465
ML
82 reply["To"] = ", ".join(
83 email.utils.formataddr(t)
84 for t in email.utils.getaddresses(
85 orig.get_all("from", []) + orig.get_all("to", []) + orig.get_all("cc", [])
86 )
ecaeef5d 87 )
ecaeef5d
ML
88
89 reply["From"] = "linux-firmware@kernel.org"
90 reply["Subject"] = "Re: {}".format(orig["Subject"])
91 reply["In-Reply-To"] = orig["Message-Id"]
92 reply["References"] = orig["Message-Id"]
93 reply["Thread-Topic"] = orig["Thread-Topic"]
94 reply["Thread-Index"] = orig["Thread-Index"]
95
96 content = (
97 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
98 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
99 "Your request is '{}'".format(branch)
100 )
101 reply.set_content(content)
102
103 mailserver = smtplib.SMTP(server, port)
104 mailserver.ehlo()
105 mailserver.starttls()
106 mailserver.ehlo()
107 mailserver.login(user, password)
108 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
109 mailserver.quit()
110
111
4d619071
ML
112def create_pr(remote, branch):
113 cmd = [
114 "git",
115 "push",
116 "-u",
117 remote,
118 branch,
119 "-o",
120 "merge_request.create",
121 "-o",
122 "merge_request.remove_source_branch",
123 "-o",
124 "merge_request.target=main",
125 "-o",
126 "merge_request.title={}".format(branch),
127 ]
128 quiet_cmd(cmd)
129
130
131def refresh_branch():
132 quiet_cmd(["git", "checkout", "main"])
133 quiet_cmd(["git", "pull"])
134
135
136def delete_branch(branch):
137 quiet_cmd(["git", "checkout", "main"])
138 quiet_cmd(["git", "branch", "-D", branch])
139
140
141def process_pr(url, num, remote):
142 branch = "robot/pr-{}-{}".format(num, int(time.time()))
143 cmd = ["b4", "pr", "-b", branch, url]
144 try:
145 quiet_cmd(cmd)
146 except subprocess.CalledProcessError:
147 logging.warning("Failed to apply PR")
ecaeef5d 148 return None
4d619071
ML
149
150 # determine if it worked (we can't tell unfortunately by return code)
151 cmd = ["git", "branch", "--list", branch]
152 logging.debug("Running {}".format(cmd))
153 result = subprocess.check_output(cmd)
154
155 if result:
156 logging.info("Forwarding PR for {}".format(branch))
157 if remote:
158 create_pr(remote, branch)
159 delete_branch(branch)
ecaeef5d
ML
160 return branch
161 return None
4d619071
ML
162
163
164def process_patch(mbox, num, remote):
165 # create a new branch for the patch
166 branch = "robot/patch-{}-{}".format(num, int(time.time()))
167 cmd = ["git", "checkout", "-b", branch]
168 quiet_cmd(cmd)
169
170 # apply the patch
39d55392 171 cmd = ["git", "am"]
4d619071
ML
172 logging.debug("Running {}".format(cmd))
173 p = subprocess.Popen(
174 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
175 )
176 stdout, stderr = p.communicate(mbox.encode("utf-8"))
177 for line in stdout.splitlines():
178 logging.debug(line.decode("utf-8"))
179 for line in stderr.splitlines():
180 logging.debug(line.decode("utf-8"))
181 if p.returncode != 0:
182 quiet_cmd(["git", "am", "--abort"])
183 else:
184 logging.info("Opening PR for {}".format(branch))
185 if remote:
186 create_pr(remote, branch)
187
188 delete_branch(branch)
ecaeef5d
ML
189 if p.returncode == 0:
190 return branch
191 return None
4d619071
ML
192
193
194def update_database(conn, url):
195 c = conn.cursor()
196
197 c.execute(
198 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
199 )
200
201 # local file
202 if os.path.exists(url):
203 with open(url, "r") as f:
204 atom = f.read()
205 # remote file
206 else:
207 logging.info("Fetching {}".format(url))
208 atom = fetch_url(url)
209
210 # Parse the atom and extract the URLs
211 feed = feedparser.parse(atom)
212
213 # Insert the URLs into the database (oldest first)
214 feed["entries"].reverse()
215 for entry in feed["entries"]:
216 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
217 if c.fetchone():
218 continue
219 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
220
221 # Commit the changes and close the connection
222 conn.commit()
223
224
225def process_database(conn, remote):
226 c = conn.cursor()
227
228 # get all unprocessed urls that aren't spam
229 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
230 num = 0
231 msg = ""
232
233 rows = c.fetchall()
234
235 if not rows:
236 logging.info("No new entries")
237 return
238
239 refresh_branch()
240
241 # loop over all unprocessed urls
242 for row in rows:
243
ecaeef5d 244 branch = None
4d619071
ML
245 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
246 print(msg, end="\r", flush=True)
247
248 url = "{}raw".format(row[0])
249 logging.debug("Processing {}".format(url))
250 mbox = fetch_url(url)
251 classification = classify_content(mbox)
252
253 if classification == ContentType.PATCH:
254 logging.debug("Processing patch ({})".format(row[0]))
ecaeef5d 255 branch = process_patch(mbox, num, remote)
4d619071
ML
256
257 if classification == ContentType.PULL_REQUEST:
258 logging.debug("Processing PR ({})".format(row[0]))
ecaeef5d 259 branch = process_pr(row[0], num, remote)
4d619071
ML
260
261 if classification == ContentType.SPAM:
262 logging.debug("Marking spam ({})".format(row[0]))
263 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
264
265 if classification == ContentType.REPLY:
266 logging.debug("Ignoring reply ({})".format(row[0]))
267
268 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
269 num += 1
270 print(" " * len(msg), end="\r", flush=True)
271
272 # commit changes
273 conn.commit()
ecaeef5d
ML
274
275 # send any emails
276 if branch:
277 reply_email(mbox, branch)
278
4d619071
ML
279 logging.info("Finished processing {} new entries".format(len(rows)))
280
281
282if __name__ == "__main__":
283 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
284 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
285 parser.add_argument(
286 "--database",
287 default=os.path.join("contrib", "linux_firmware.db"),
288 help="sqlite database to store entries in",
289 )
290 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
291 parser.add_argument(
292 "--debug", action="store_true", help="Enable debug logging to console"
293 )
294 parser.add_argument("--remote", default="origin", help="Remote to push to")
295 parser.add_argument(
296 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
297 )
298 args = parser.parse_args()
299
300 if not os.path.exists("WHENCE"):
301 logging.critical(
302 "Please run this script from the root of the linux-firmware repository"
303 )
304 sys.exit(1)
305
306 log = os.path.join(
307 "contrib",
308 "{prefix}-{date}.{suffix}".format(
309 prefix="linux_firmware", suffix="txt", date=date.today()
310 ),
311 )
312 logging.basicConfig(
313 format="%(asctime)s %(levelname)s:\t%(message)s",
314 filename=log,
315 filemode="w",
316 level=logging.DEBUG,
317 )
318
319 # set a format which is simpler for console use
320 console = logging.StreamHandler()
321 if args.debug:
322 console.setLevel(logging.DEBUG)
323 else:
324 console.setLevel(logging.INFO)
325 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
326 console.setFormatter(formatter)
327 logging.getLogger("").addHandler(console)
328
329 while True:
330 conn = sqlite3.connect(args.database)
331 # update the database
332 update_database(conn, args.url)
333
334 if args.dry:
335 remote = ""
336 else:
337 remote = args.remote
338
339 # process the database
340 process_database(conn, remote)
341
342 conn.close()
343
344 if args.refresh_cycle:
345 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
346 time.sleep(int(args.refresh_cycle) * 60)
347 else:
348 break