]> git.proxmox.com Git - mirror_linux-firmware.git/blob - contrib/process_linux_firmware.py
Merge branch 'mlimonci/use-am' into 'main'
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
1 #!/usr/bin/python3
2 import os
3 import time
4 import urllib.request
5 import sqlite3
6 import feedparser
7 import argparse
8 import logging
9 import email
10 import email.utils
11 import smtplib
12 import subprocess
13 import sys
14 from datetime import date
15 from enum import Enum
16
17 URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20 class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27 content_types = {
28 "diff --git": ContentType.PATCH,
29 "Signed-off-by:": ContentType.PATCH,
30 "are available in the Git repository at": ContentType.PULL_REQUEST,
31 }
32
33
34 def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
37
38 # check the subject
39 subject = msg["Subject"]
40 if "Re:" in subject:
41 return ContentType.REPLY
42 if "PATCH" in subject:
43 return ContentType.PATCH
44
45 for part in msg.walk():
46 if part.get_content_type() == "text/plain":
47 try:
48 body = part.get_payload(decode=True).decode("utf-8")
49 for key in content_types.keys():
50 if key in body:
51 return content_types[key]
52 break
53 except UnicodeDecodeError as e:
54 logging.warning("Failed to decode email: %s, treating as SPAM" % e)
55 break
56 return ContentType.SPAM
57
58
59 def fetch_url(url):
60 with urllib.request.urlopen(url) as response:
61 return response.read().decode("utf-8")
62
63
64 def quiet_cmd(cmd):
65 logging.debug("Running {}".format(cmd))
66 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
67 logging.debug(output)
68
69
70 def reply_email(content, branch):
71 if "SMTP_USER" in os.environ:
72 user = os.environ["SMTP_USER"]
73 if "SMTP_PASS" in os.environ:
74 password = os.environ["SMTP_PASS"]
75 if "SMTP_SERVER" in os.environ:
76 server = os.environ["SMTP_SERVER"]
77 if "SMTP_PORT" in os.environ:
78 port = os.environ["SMTP_PORT"]
79 if not user or not password or not server or not port:
80 logging.debug("Missing SMTP configuration, not sending email")
81 return
82
83 reply = email.message.EmailMessage()
84
85 orig = email.message_from_string(content)
86 reply["To"] = ", ".join(
87 email.utils.formataddr(t)
88 for t in email.utils.getaddresses(
89 orig.get_all("from", []) + orig.get_all("to", []) + orig.get_all("cc", [])
90 )
91 )
92
93 reply["From"] = "linux-firmware@kernel.org"
94 reply["Subject"] = "Re: {}".format(orig["Subject"])
95 reply["In-Reply-To"] = orig["Message-Id"]
96 reply["References"] = orig["Message-Id"]
97 reply["Thread-Topic"] = orig["Thread-Topic"]
98 reply["Thread-Index"] = orig["Thread-Index"]
99
100 content = (
101 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
102 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
103 "Your request is '{}'".format(branch)
104 )
105 reply.set_content(content)
106
107 mailserver = smtplib.SMTP(server, port)
108 mailserver.ehlo()
109 mailserver.starttls()
110 mailserver.ehlo()
111 mailserver.login(user, password)
112 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
113 mailserver.quit()
114
115
116 def create_pr(remote, branch):
117 cmd = [
118 "git",
119 "push",
120 "-u",
121 remote,
122 branch,
123 "-o",
124 "merge_request.create",
125 "-o",
126 "merge_request.remove_source_branch",
127 "-o",
128 "merge_request.target=main",
129 "-o",
130 "merge_request.title={}".format(branch),
131 ]
132 quiet_cmd(cmd)
133
134
135 def refresh_branch():
136 quiet_cmd(["git", "checkout", "main"])
137 quiet_cmd(["git", "pull"])
138
139
140 def delete_branch(branch):
141 quiet_cmd(["git", "checkout", "main"])
142 quiet_cmd(["git", "branch", "-D", branch])
143
144
145 def process_pr(url, num, remote):
146 branch = "robot/pr-{}-{}".format(num, int(time.time()))
147 cmd = ["b4", "pr", "-b", branch, url]
148 try:
149 quiet_cmd(cmd)
150 except subprocess.CalledProcessError:
151 logging.warning("Failed to apply PR")
152 return None
153
154 # determine if it worked (we can't tell unfortunately by return code)
155 cmd = ["git", "branch", "--list", branch]
156 logging.debug("Running {}".format(cmd))
157 result = subprocess.check_output(cmd)
158
159 if result:
160 logging.info("Forwarding PR for {}".format(branch))
161 if remote:
162 create_pr(remote, branch)
163 delete_branch(branch)
164 return branch
165 return None
166
167
168 def process_patch(mbox, num, remote):
169 # create a new branch for the patch
170 branch = "robot/patch-{}-{}".format(num, int(time.time()))
171 cmd = ["git", "checkout", "-b", branch]
172 quiet_cmd(cmd)
173
174 # apply the patch
175 cmd = ["git", "am"]
176 logging.debug("Running {}".format(cmd))
177 p = subprocess.Popen(
178 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
179 )
180 stdout, stderr = p.communicate(mbox.encode("utf-8"))
181 for line in stdout.splitlines():
182 logging.debug(line.decode("utf-8"))
183 for line in stderr.splitlines():
184 logging.debug(line.decode("utf-8"))
185 if p.returncode != 0:
186 quiet_cmd(["git", "am", "--abort"])
187 else:
188 logging.info("Opening PR for {}".format(branch))
189 if remote:
190 create_pr(remote, branch)
191
192 delete_branch(branch)
193 if p.returncode == 0:
194 return branch
195 return None
196
197
198 def update_database(conn, url):
199 c = conn.cursor()
200
201 c.execute(
202 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
203 )
204
205 # local file
206 if os.path.exists(url):
207 with open(url, "r") as f:
208 atom = f.read()
209 # remote file
210 else:
211 logging.info("Fetching {}".format(url))
212 atom = fetch_url(url)
213
214 # Parse the atom and extract the URLs
215 feed = feedparser.parse(atom)
216
217 # Insert the URLs into the database (oldest first)
218 feed["entries"].reverse()
219 for entry in feed["entries"]:
220 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
221 if c.fetchone():
222 continue
223 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
224
225 # Commit the changes and close the connection
226 conn.commit()
227
228
229 def process_database(conn, remote):
230 c = conn.cursor()
231
232 # get all unprocessed urls that aren't spam
233 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
234 num = 0
235 msg = ""
236
237 rows = c.fetchall()
238
239 if not rows:
240 logging.info("No new entries")
241 return
242
243 refresh_branch()
244
245 # loop over all unprocessed urls
246 for row in rows:
247
248 branch = None
249 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
250 print(msg, end="\r", flush=True)
251
252 url = "{}raw".format(row[0])
253 logging.debug("Processing {}".format(url))
254 mbox = fetch_url(url)
255 classification = classify_content(mbox)
256
257 if classification == ContentType.PATCH:
258 logging.debug("Processing patch ({})".format(row[0]))
259 branch = process_patch(mbox, num, remote)
260
261 if classification == ContentType.PULL_REQUEST:
262 logging.debug("Processing PR ({})".format(row[0]))
263 branch = process_pr(row[0], num, remote)
264
265 if classification == ContentType.SPAM:
266 logging.debug("Marking spam ({})".format(row[0]))
267 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
268
269 if classification == ContentType.REPLY:
270 logging.debug("Ignoring reply ({})".format(row[0]))
271
272 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
273 num += 1
274 print(" " * len(msg), end="\r", flush=True)
275
276 # commit changes
277 conn.commit()
278
279 # send any emails
280 if branch:
281 reply_email(mbox, branch)
282
283 logging.info("Finished processing {} new entries".format(len(rows)))
284
285
286 if __name__ == "__main__":
287 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
288 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
289 parser.add_argument(
290 "--database",
291 default=os.path.join("contrib", "linux_firmware.db"),
292 help="sqlite database to store entries in",
293 )
294 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
295 parser.add_argument(
296 "--debug", action="store_true", help="Enable debug logging to console"
297 )
298 parser.add_argument("--remote", default="origin", help="Remote to push to")
299 parser.add_argument(
300 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
301 )
302 args = parser.parse_args()
303
304 if not os.path.exists("WHENCE"):
305 logging.critical(
306 "Please run this script from the root of the linux-firmware repository"
307 )
308 sys.exit(1)
309
310 log = os.path.join(
311 "contrib",
312 "{prefix}-{date}.{suffix}".format(
313 prefix="linux_firmware", suffix="txt", date=date.today()
314 ),
315 )
316 logging.basicConfig(
317 format="%(asctime)s %(levelname)s:\t%(message)s",
318 filename=log,
319 filemode="w",
320 level=logging.DEBUG,
321 )
322
323 # set a format which is simpler for console use
324 console = logging.StreamHandler()
325 if args.debug:
326 console.setLevel(logging.DEBUG)
327 else:
328 console.setLevel(logging.INFO)
329 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
330 console.setFormatter(formatter)
331 logging.getLogger("").addHandler(console)
332
333 while True:
334 conn = sqlite3.connect(args.database)
335 # update the database
336 update_database(conn, args.url)
337
338 if args.dry:
339 remote = ""
340 else:
341 remote = args.remote
342
343 # process the database
344 process_database(conn, remote)
345
346 conn.close()
347
348 if args.refresh_cycle:
349 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
350 time.sleep(int(args.refresh_cycle) * 60)
351 else:
352 break