]> git.proxmox.com Git - mirror_linux-firmware.git/blob - contrib/process_linux_firmware.py
Use `git am` instead of `b4 shazam`
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
1 #!/usr/bin/python3
2 import os
3 import time
4 import urllib.request
5 import sqlite3
6 import feedparser
7 import argparse
8 import logging
9 import email
10 import email.utils
11 import smtplib
12 import subprocess
13 import sys
14 from datetime import date
15 from enum import Enum
16
17 URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20 class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27 content_types = {
28 "diff --git": ContentType.PATCH,
29 "Signed-off-by:": ContentType.PATCH,
30 "are available in the Git repository at": ContentType.PULL_REQUEST,
31 }
32
33
34 def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
37
38 # check the subject
39 subject = msg["Subject"]
40 if "Re:" in subject:
41 return ContentType.REPLY
42 if "PATCH" in subject:
43 return ContentType.PATCH
44
45 for part in msg.walk():
46 if part.get_content_type() == "text/plain":
47 body = part.get_payload(decode=True).decode("utf-8")
48 for key in content_types.keys():
49 if key in body:
50 return content_types[key]
51 break
52 return ContentType.SPAM
53
54
55 def fetch_url(url):
56 with urllib.request.urlopen(url) as response:
57 return response.read().decode("utf-8")
58
59
60 def quiet_cmd(cmd):
61 logging.debug("Running {}".format(cmd))
62 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
63 logging.debug(output)
64
65
66 def reply_email(content, branch):
67 if "SMTP_USER" in os.environ:
68 user = os.environ["SMTP_USER"]
69 if "SMTP_PASS" in os.environ:
70 password = os.environ["SMTP_PASS"]
71 if "SMTP_SERVER" in os.environ:
72 server = os.environ["SMTP_SERVER"]
73 if "SMTP_PORT" in os.environ:
74 port = os.environ["SMTP_PORT"]
75 if not user or not password or not server or not port:
76 logging.debug("Missing SMTP configuration, not sending email")
77 return
78
79 reply = email.message.EmailMessage()
80
81 orig = email.message_from_string(content)
82 reply["To"] = ", ".join(
83 email.utils.formataddr(t)
84 for t in email.utils.getaddresses(
85 orig.get_all("from", []) + orig.get_all("to", []) + orig.get_all("cc", [])
86 )
87 )
88
89 reply["From"] = "linux-firmware@kernel.org"
90 reply["Subject"] = "Re: {}".format(orig["Subject"])
91 reply["In-Reply-To"] = orig["Message-Id"]
92 reply["References"] = orig["Message-Id"]
93 reply["Thread-Topic"] = orig["Thread-Topic"]
94 reply["Thread-Index"] = orig["Thread-Index"]
95
96 content = (
97 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
98 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
99 "Your request is '{}'".format(branch)
100 )
101 reply.set_content(content)
102
103 mailserver = smtplib.SMTP(server, port)
104 mailserver.ehlo()
105 mailserver.starttls()
106 mailserver.ehlo()
107 mailserver.login(user, password)
108 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
109 mailserver.quit()
110
111
112 def create_pr(remote, branch):
113 cmd = [
114 "git",
115 "push",
116 "-u",
117 remote,
118 branch,
119 "-o",
120 "merge_request.create",
121 "-o",
122 "merge_request.remove_source_branch",
123 "-o",
124 "merge_request.target=main",
125 "-o",
126 "merge_request.title={}".format(branch),
127 ]
128 quiet_cmd(cmd)
129
130
131 def refresh_branch():
132 quiet_cmd(["git", "checkout", "main"])
133 quiet_cmd(["git", "pull"])
134
135
136 def delete_branch(branch):
137 quiet_cmd(["git", "checkout", "main"])
138 quiet_cmd(["git", "branch", "-D", branch])
139
140
141 def process_pr(url, num, remote):
142 branch = "robot/pr-{}-{}".format(num, int(time.time()))
143 cmd = ["b4", "pr", "-b", branch, url]
144 try:
145 quiet_cmd(cmd)
146 except subprocess.CalledProcessError:
147 logging.warning("Failed to apply PR")
148 return None
149
150 # determine if it worked (we can't tell unfortunately by return code)
151 cmd = ["git", "branch", "--list", branch]
152 logging.debug("Running {}".format(cmd))
153 result = subprocess.check_output(cmd)
154
155 if result:
156 logging.info("Forwarding PR for {}".format(branch))
157 if remote:
158 create_pr(remote, branch)
159 delete_branch(branch)
160 return branch
161 return None
162
163
164 def process_patch(mbox, num, remote):
165 # create a new branch for the patch
166 branch = "robot/patch-{}-{}".format(num, int(time.time()))
167 cmd = ["git", "checkout", "-b", branch]
168 quiet_cmd(cmd)
169
170 # apply the patch
171 cmd = ["git", "am"]
172 logging.debug("Running {}".format(cmd))
173 p = subprocess.Popen(
174 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
175 )
176 stdout, stderr = p.communicate(mbox.encode("utf-8"))
177 for line in stdout.splitlines():
178 logging.debug(line.decode("utf-8"))
179 for line in stderr.splitlines():
180 logging.debug(line.decode("utf-8"))
181 if p.returncode != 0:
182 quiet_cmd(["git", "am", "--abort"])
183 else:
184 logging.info("Opening PR for {}".format(branch))
185 if remote:
186 create_pr(remote, branch)
187
188 delete_branch(branch)
189 if p.returncode == 0:
190 return branch
191 return None
192
193
194 def update_database(conn, url):
195 c = conn.cursor()
196
197 c.execute(
198 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
199 )
200
201 # local file
202 if os.path.exists(url):
203 with open(url, "r") as f:
204 atom = f.read()
205 # remote file
206 else:
207 logging.info("Fetching {}".format(url))
208 atom = fetch_url(url)
209
210 # Parse the atom and extract the URLs
211 feed = feedparser.parse(atom)
212
213 # Insert the URLs into the database (oldest first)
214 feed["entries"].reverse()
215 for entry in feed["entries"]:
216 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
217 if c.fetchone():
218 continue
219 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
220
221 # Commit the changes and close the connection
222 conn.commit()
223
224
225 def process_database(conn, remote):
226 c = conn.cursor()
227
228 # get all unprocessed urls that aren't spam
229 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
230 num = 0
231 msg = ""
232
233 rows = c.fetchall()
234
235 if not rows:
236 logging.info("No new entries")
237 return
238
239 refresh_branch()
240
241 # loop over all unprocessed urls
242 for row in rows:
243
244 branch = None
245 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
246 print(msg, end="\r", flush=True)
247
248 url = "{}raw".format(row[0])
249 logging.debug("Processing {}".format(url))
250 mbox = fetch_url(url)
251 classification = classify_content(mbox)
252
253 if classification == ContentType.PATCH:
254 logging.debug("Processing patch ({})".format(row[0]))
255 branch = process_patch(mbox, num, remote)
256
257 if classification == ContentType.PULL_REQUEST:
258 logging.debug("Processing PR ({})".format(row[0]))
259 branch = process_pr(row[0], num, remote)
260
261 if classification == ContentType.SPAM:
262 logging.debug("Marking spam ({})".format(row[0]))
263 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
264
265 if classification == ContentType.REPLY:
266 logging.debug("Ignoring reply ({})".format(row[0]))
267
268 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
269 num += 1
270 print(" " * len(msg), end="\r", flush=True)
271
272 # commit changes
273 conn.commit()
274
275 # send any emails
276 if branch:
277 reply_email(mbox, branch)
278
279 logging.info("Finished processing {} new entries".format(len(rows)))
280
281
282 if __name__ == "__main__":
283 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
284 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
285 parser.add_argument(
286 "--database",
287 default=os.path.join("contrib", "linux_firmware.db"),
288 help="sqlite database to store entries in",
289 )
290 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
291 parser.add_argument(
292 "--debug", action="store_true", help="Enable debug logging to console"
293 )
294 parser.add_argument("--remote", default="origin", help="Remote to push to")
295 parser.add_argument(
296 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
297 )
298 args = parser.parse_args()
299
300 if not os.path.exists("WHENCE"):
301 logging.critical(
302 "Please run this script from the root of the linux-firmware repository"
303 )
304 sys.exit(1)
305
306 log = os.path.join(
307 "contrib",
308 "{prefix}-{date}.{suffix}".format(
309 prefix="linux_firmware", suffix="txt", date=date.today()
310 ),
311 )
312 logging.basicConfig(
313 format="%(asctime)s %(levelname)s:\t%(message)s",
314 filename=log,
315 filemode="w",
316 level=logging.DEBUG,
317 )
318
319 # set a format which is simpler for console use
320 console = logging.StreamHandler()
321 if args.debug:
322 console.setLevel(logging.DEBUG)
323 else:
324 console.setLevel(logging.INFO)
325 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
326 console.setFormatter(formatter)
327 logging.getLogger("").addHandler(console)
328
329 while True:
330 conn = sqlite3.connect(args.database)
331 # update the database
332 update_database(conn, args.url)
333
334 if args.dry:
335 remote = ""
336 else:
337 remote = args.remote
338
339 # process the database
340 process_database(conn, remote)
341
342 conn.close()
343
344 if args.refresh_cycle:
345 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
346 time.sleep(int(args.refresh_cycle) * 60)
347 else:
348 break