]> git.proxmox.com Git - mirror_linux-firmware.git/blob - contrib/process_linux_firmware.py
Add a script for a robot to open up pull requests
[mirror_linux-firmware.git] / contrib / process_linux_firmware.py
1 #!/usr/bin/python3
2 import os
3 import time
4 import urllib.request
5 import sqlite3
6 import feedparser
7 import argparse
8 import logging
9 import email
10 import subprocess
11 import sys
12 from datetime import datetime, timedelta, date
13 from enum import Enum
14 import b4
15
16 URL = "https://lore.kernel.org/linux-firmware/new.atom"
17
18
19 class ContentType(Enum):
20 REPLY = 1
21 PATCH = 2
22 PULL_REQUEST = 3
23 SPAM = 4
24
25
26 content_types = {
27 "diff --git": ContentType.PATCH,
28 "Signed-off-by:": ContentType.PATCH,
29 "are available in the Git repository at": ContentType.PULL_REQUEST,
30 }
31
32
33 def classify_content(content):
34 # load content into the email library
35 msg = email.message_from_string(content)
36
37 # check the subject
38 subject = msg["Subject"]
39 if "Re:" in subject:
40 return ContentType.REPLY
41 if "PATCH" in subject:
42 return ContentType.PATCH
43
44 for part in msg.walk():
45 if part.get_content_type() == "text/plain":
46 body = part.get_payload(decode=True).decode("utf-8")
47 for key in content_types.keys():
48 if key in body:
49 return content_types[key]
50 break
51 return ContentType.SPAM
52
53
54 def fetch_url(url):
55 with urllib.request.urlopen(url) as response:
56 return response.read().decode("utf-8")
57
58
59 def quiet_cmd(cmd):
60 logging.debug("Running {}".format(cmd))
61 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
62 logging.debug(output)
63
64
65 def create_pr(remote, branch):
66 cmd = [
67 "git",
68 "push",
69 "-u",
70 remote,
71 branch,
72 "-o",
73 "merge_request.create",
74 "-o",
75 "merge_request.remove_source_branch",
76 "-o",
77 "merge_request.target=main",
78 "-o",
79 "merge_request.title={}".format(branch),
80 ]
81 quiet_cmd(cmd)
82
83
84 def refresh_branch():
85 quiet_cmd(["git", "checkout", "main"])
86 quiet_cmd(["git", "pull"])
87
88
89 def delete_branch(branch):
90 quiet_cmd(["git", "checkout", "main"])
91 quiet_cmd(["git", "branch", "-D", branch])
92
93
94 def process_pr(url, num, remote):
95 branch = "robot/pr-{}-{}".format(num, int(time.time()))
96 cmd = ["b4", "pr", "-b", branch, url]
97 try:
98 quiet_cmd(cmd)
99 except subprocess.CalledProcessError:
100 logging.warning("Failed to apply PR")
101 return
102
103 # determine if it worked (we can't tell unfortunately by return code)
104 cmd = ["git", "branch", "--list", branch]
105 logging.debug("Running {}".format(cmd))
106 result = subprocess.check_output(cmd)
107
108 if result:
109 logging.info("Forwarding PR for {}".format(branch))
110 if remote:
111 create_pr(remote, branch)
112 delete_branch(branch)
113
114
115 def process_patch(mbox, num, remote):
116 # create a new branch for the patch
117 branch = "robot/patch-{}-{}".format(num, int(time.time()))
118 cmd = ["git", "checkout", "-b", branch]
119 quiet_cmd(cmd)
120
121 # apply the patch
122 cmd = ["b4", "shazam", "-m", "-"]
123 logging.debug("Running {}".format(cmd))
124 p = subprocess.Popen(
125 cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
126 )
127 stdout, stderr = p.communicate(mbox.encode("utf-8"))
128 for line in stdout.splitlines():
129 logging.debug(line.decode("utf-8"))
130 for line in stderr.splitlines():
131 logging.debug(line.decode("utf-8"))
132 if p.returncode != 0:
133 quiet_cmd(["git", "am", "--abort"])
134 else:
135 logging.info("Opening PR for {}".format(branch))
136 if remote:
137 create_pr(remote, branch)
138
139 delete_branch(branch)
140
141
142 def update_database(conn, url):
143 c = conn.cursor()
144
145 c.execute(
146 """CREATE TABLE IF NOT EXISTS firmware (url text, processed integer default 0, spam integer default 0)"""
147 )
148
149 # local file
150 if os.path.exists(url):
151 with open(url, "r") as f:
152 atom = f.read()
153 # remote file
154 else:
155 logging.info("Fetching {}".format(url))
156 atom = fetch_url(url)
157
158 # Parse the atom and extract the URLs
159 feed = feedparser.parse(atom)
160
161 # Insert the URLs into the database (oldest first)
162 feed["entries"].reverse()
163 for entry in feed["entries"]:
164 c.execute("SELECT url FROM firmware WHERE url = ?", (entry.link,))
165 if c.fetchone():
166 continue
167 c.execute("INSERT INTO firmware VALUES (?, ?, ?)", (entry.link, 0, 0))
168
169 # Commit the changes and close the connection
170 conn.commit()
171
172
173 def process_database(conn, remote):
174 c = conn.cursor()
175
176 # get all unprocessed urls that aren't spam
177 c.execute("SELECT url FROM firmware WHERE processed = 0 AND spam = 0")
178 num = 0
179 msg = ""
180
181 rows = c.fetchall()
182
183 if not rows:
184 logging.info("No new entries")
185 return
186
187 refresh_branch()
188
189 # loop over all unprocessed urls
190 for row in rows:
191
192 msg = "Processing ({}%)".format(round(num / len(rows) * 100))
193 print(msg, end="\r", flush=True)
194
195 url = "{}raw".format(row[0])
196 logging.debug("Processing {}".format(url))
197 mbox = fetch_url(url)
198 classification = classify_content(mbox)
199
200 if classification == ContentType.PATCH:
201 logging.debug("Processing patch ({})".format(row[0]))
202 process_patch(mbox, num, remote)
203
204 if classification == ContentType.PULL_REQUEST:
205 logging.debug("Processing PR ({})".format(row[0]))
206 process_pr(row[0], num, remote)
207
208 if classification == ContentType.SPAM:
209 logging.debug("Marking spam ({})".format(row[0]))
210 c.execute("UPDATE firmware SET spam = 1 WHERE url = ?", (row[0],))
211
212 if classification == ContentType.REPLY:
213 logging.debug("Ignoring reply ({})".format(row[0]))
214
215 c.execute("UPDATE firmware SET processed = 1 WHERE url = ?", (row[0],))
216 num += 1
217 print(" " * len(msg), end="\r", flush=True)
218
219 # commit changes
220 conn.commit()
221 logging.info("Finished processing {} new entries".format(len(rows)))
222
223
224 if __name__ == "__main__":
225 parser = argparse.ArgumentParser(description="Process linux-firmware mailing list")
226 parser.add_argument("--url", default=URL, help="URL to get ATOM feed from")
227 parser.add_argument(
228 "--database",
229 default=os.path.join("contrib", "linux_firmware.db"),
230 help="sqlite database to store entries in",
231 )
232 parser.add_argument("--dry", action="store_true", help="Don't open pull requests")
233 parser.add_argument(
234 "--debug", action="store_true", help="Enable debug logging to console"
235 )
236 parser.add_argument("--remote", default="origin", help="Remote to push to")
237 parser.add_argument(
238 "--refresh-cycle", default=0, help="How frequently to run (in minutes)"
239 )
240 args = parser.parse_args()
241
242 if not os.path.exists("WHENCE"):
243 logging.critical(
244 "Please run this script from the root of the linux-firmware repository"
245 )
246 sys.exit(1)
247
248 log = os.path.join(
249 "contrib",
250 "{prefix}-{date}.{suffix}".format(
251 prefix="linux_firmware", suffix="txt", date=date.today()
252 ),
253 )
254 logging.basicConfig(
255 format="%(asctime)s %(levelname)s:\t%(message)s",
256 filename=log,
257 filemode="w",
258 level=logging.DEBUG,
259 )
260
261 # set a format which is simpler for console use
262 console = logging.StreamHandler()
263 if args.debug:
264 console.setLevel(logging.DEBUG)
265 else:
266 console.setLevel(logging.INFO)
267 formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
268 console.setFormatter(formatter)
269 logging.getLogger("").addHandler(console)
270
271 while True:
272 conn = sqlite3.connect(args.database)
273 # update the database
274 update_database(conn, args.url)
275
276 if args.dry:
277 remote = ""
278 else:
279 remote = args.remote
280
281 # process the database
282 process_database(conn, remote)
283
284 conn.close()
285
286 if args.refresh_cycle:
287 logging.info("Sleeping for {} minutes".format(args.refresh_cycle))
288 time.sleep(int(args.refresh_cycle) * 60)
289 else:
290 break