]> git.proxmox.com Git - mirror_linux-firmware.git/blob - contrib/process_linux_firmware.py
4ef0aa760692f6fe6c4051b6d210cbf54215ef8d
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
1 #!/usr/bin/python3
2 import os
3 import time
4 import urllib.request
5 import sqlite3
6 import feedparser
7 import argparse
8 import logging
9 import email
10 import email.utils
11 import smtplib
12 import subprocess
13 import sys
14 from datetime import date
15 from enum import Enum
16
17 URL = "https://lore.kernel.org/linux-firmware/new.atom"
18
19
20 class ContentType(Enum):
21 REPLY = 1
22 PATCH = 2
23 PULL_REQUEST = 3
24 SPAM = 4
25
26
27 content_types = {
28 "diff --git": ContentType.PATCH,
29 "Signed-off-by:": ContentType.PATCH,
30 "are available in the Git repository at": ContentType.PULL_REQUEST,
31 }
32
33
34 def classify_content(content):
35 # load content into the email library
36 msg = email.message_from_string(content)
37
38 # check the subject
39 subject = msg["Subject"]
40 if "Re:" in subject:
41 return ContentType.REPLY
42 if "PATCH" in subject:
43 return ContentType.PATCH
44
45 for part in msg.walk():
46 if part.get_content_type() == "text/plain":
47 body = part.get_payload(decode=True).decode("utf-8")
48 for key in content_types.keys():
49 if key in body:
50 return content_types[key]
51 break
52 return ContentType.SPAM
53
54
55 def fetch_url(url):
56 with urllib.request.urlopen(url) as response:
57 return response.read().decode("utf-8")
58
59
60 def quiet_cmd(cmd):
61 logging.debug("Running {}".format(cmd))
62 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
63 logging.debug(output)
64
65
66 def reply_email(content, branch):
67 if "SMTP_USER" in os.environ:
68 user = os.environ["SMTP_USER"]
69 if "SMTP_PASS" in os.environ:
70 password = os.environ["SMTP_PASS"]
71 if "SMTP_SERVER" in os.environ:
72 server = os.environ["SMTP_SERVER"]
73 if "SMTP_PORT" in os.environ:
74 port = os.environ["SMTP_PORT"]
75 if not user or not password or not server or not port:
76 logging.debug("Missing SMTP configuration, not sending email")
77 return
78
79 reply = email.message.EmailMessage()
80
81 orig = email.message_from_string(content)
82 targets = email.utils.getaddresses(
83 orig.get_all("to", []) + orig.get_all("cc", []) + orig.get_all("from", [])
84 )
85 for target in targets:
86 reply["To"] += email.utils.formataddr(target)
87
88 reply["From"] = "linux-firmware@kernel.org"
89 reply["Subject"] = "Re: {}".format(orig["Subject"])
90 reply["In-Reply-To"] = orig["Message-Id"]
91 reply["References"] = orig["Message-Id"]
92 reply["Thread-Topic"] = orig["Thread-Topic"]
93 reply["Thread-Index"] = orig["Thread-Index"]
94
95 content = (
96 "Your request has been forwarded by the Linux Firmware Kernel robot.\n"
97 "Please follow up at https://gitlab.com/kernel-firmware/linux-firmware/-/merge_requests to ensure it gets merged\n"
98 "Your request is '{}'".format(branch)
99 )
100 reply.set_content(content)
101
102 mailserver = smtplib.SMTP(server, port)
103 mailserver.ehlo()
104 mailserver.starttls()
105 mailserver.ehlo()
106 mailserver.login(user, password)
107 mailserver.sendmail(reply["From"], reply["To"], reply.as_string())
108 mailserver.quit()
109
110
111 def create_pr(remote, branch):
112 cmd = [
113 "git",
114 "push",
115 "-u",
116 remote,
117 branch,
118 "-o",
119 "merge_request.create",
120 "-o",
121 "merge_request.remove_source_branch",
122 "-o",
123 "merge_request.target=main",
124 "-o",
125 "merge_request.title={}".format(branch),
126 ]
127 quiet_cmd(cmd)
128
129
130 def refresh_branch():
131 quiet_cmd(["git", "checkout", "main"])
132 quiet_cmd(["git", "pull"])
133
134
135 def delete_branch(branch):
136 quiet_cmd(["git", "checkout", "main"])
137 quiet_cmd(["git", "branch", "-D", branch])
138
139
140 def process_pr(url, num, remote):
141 branch = "robot/pr-{}-{}".format(num, int(time.time()))
142 cmd = ["b4", "pr", "-b", branch, url]
143 try:
144 quiet_cmd(cmd)
145 except subprocess.CalledProcessError:
146 logging.warning("Failed to apply PR")
147 return None
148
149 # determine if it worked (we can't tell unfortunately by return code)
150 cmd = ["git", "branch", "--list", branch]
151 logging.debug("Running {}".format(cmd))
152 result = subprocess.check_output(cmd)
153
154 if result:
155 logging.info("Forwarding PR for {}".format(branch))
156 if remote:
157 create_pr(remote, branch)
158 delete_branch(branch)
159 return branch
160 return None
161
162
163 def process_patch(mbox, num, remote):
164 # create a new branch for the patch
165 branch = "robot/patch-{}-{}".format(num, int(time.time()))
166 cmd = ["git", "checkout", "-b", branch]
167 quiet_cmd(cmd)
168
169 # apply the patch
170 cmd = ["b4", "shazam", "-m", "-"]
171 logging.debug("Running {}".format(cmd))
172 p = subprocess.Popen(
173 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
174 )
175 stdout, stderr = p.communicate(mbox.encode("utf-8"))
176 for line in stdout.splitlines():
177 logging.debug(line.decode("utf-8"))
178 for line in stderr.splitlines():
179 logging.debug(line.decode("utf-8"))
180 if p.returncode != 0:
181 quiet_cmd(["git", "am", "--abort"])
182 else:
183 logging.info("Opening PR for {}".format(branch))
184 if remote:
185 create_pr(remote, branch)
186
187 delete_branch(branch)
188 if p.returncode == 0:
189 return branch
190 return None
191
192
193 def update_database(conn, url):
194 c = conn.cursor()
195
196 c.execute(
197 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
198 )
199
200 # local file
201 if os.path.exists(url):
202 with open(url, "r") as f:
203 atom = f.read()
204 # remote file
205 else:
206 logging.info("Fetching {}".format(url))
207 atom = fetch_url(url)
208
209 # Parse the atom and extract the URLs
210 feed = feedparser.parse(atom)
211
212 # Insert the URLs into the database (oldest first)
213 feed["entries"].reverse()
214 for entry in feed["entries"]:
215 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
216 if c.fetchone():
217 continue
218 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
219
220 # Commit the changes and close the connection
221 conn.commit()
222
223
224 def process_database(conn, remote):
225 c = conn.cursor()
226
227 # get all unprocessed urls that aren't spam
228 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
229 num = 0
230 msg = ""
231
232 rows = c.fetchall()
233
234 if not rows:
235 logging.info("No new entries")
236 return
237
238 refresh_branch()
239
240 # loop over all unprocessed urls
241 for row in rows:
242
243 branch = None
244 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
245 print(msg, end="\r", flush=True)
246
247 url = "{}raw".format(row[0])
248 logging.debug("Processing {}".format(url))
249 mbox = fetch_url(url)
250 classification = classify_content(mbox)
251
252 if classification == ContentType.PATCH:
253 logging.debug("Processing patch ({})".format(row[0]))
254 branch = process_patch(mbox, num, remote)
255
256 if classification == ContentType.PULL_REQUEST:
257 logging.debug("Processing PR ({})".format(row[0]))
258 branch = process_pr(row[0], num, remote)
259
260 if classification == ContentType.SPAM:
261 logging.debug("Marking spam ({})".format(row[0]))
262 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
263
264 if classification == ContentType.REPLY:
265 logging.debug("Ignoring reply ({})".format(row[0]))
266
267 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
268 num += 1
269 print(" " * len(msg), end="\r", flush=True)
270
271 # commit changes
272 conn.commit()
273
274 # send any emails
275 if branch:
276 reply_email(mbox, branch)
277
278 logging.info("Finished processing {} new entries".format(len(rows)))
279
280
281 if __name__ == "__main__":
282 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
283 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
284 parser.add_argument(
285 "--database",
286 default=os.path.join("contrib", "linux_firmware.db"),
287 help="sqlite database to store entries in",
288 )
289 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
290 parser.add_argument(
291 "--debug", action="store_true", help="Enable debug logging to console"
292 )
293 parser.add_argument("--remote", default="origin", help="Remote to push to")
294 parser.add_argument(
295 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
296 )
297 args = parser.parse_args()
298
299 if not os.path.exists("WHENCE"):
300 logging.critical(
301 "Please run this script from the root of the linux-firmware repository"
302 )
303 sys.exit(1)
304
305 log = os.path.join(
306 "contrib",
307 "{prefix}-{date}.{suffix}".format(
308 prefix="linux_firmware", suffix="txt", date=date.today()
309 ),
310 )
311 logging.basicConfig(
312 format="%(asctime)s %(levelname)s:\t%(message)s",
313 filename=log,
314 filemode="w",
315 level=logging.DEBUG,
316 )
317
318 # set a format which is simpler for console use
319 console = logging.StreamHandler()
320 if args.debug:
321 console.setLevel(logging.DEBUG)
322 else:
323 console.setLevel(logging.INFO)
324 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
325 console.setFormatter(formatter)
326 logging.getLogger("").addHandler(console)
327
328 while True:
329 conn = sqlite3.connect(args.database)
330 # update the database
331 update_database(conn, args.url)
332
333 if args.dry:
334 remote = ""
335 else:
336 remote = args.remote
337
338 # process the database
339 process_database(conn, remote)
340
341 conn.close()
342
343 if args.refresh_cycle:
344 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
345 time.sleep(int(args.refresh_cycle) * 60)
346 else:
347 break