]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, | |
12 | # software distributed under the License is distributed on an | |
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | # KIND, either express or implied. See the License for the | |
15 | # specific language governing permissions and limitations | |
16 | # under the License. | |
17 | ||
18 | from collections import defaultdict | |
19 | import functools | |
20 | import os | |
21 | import re | |
22 | import pathlib | |
23 | import shelve | |
24 | import warnings | |
25 | ||
26 | from git import Repo | |
27 | from jira import JIRA | |
28 | from semver import VersionInfo as SemVer | |
29 | ||
30 | from .utils.source import ArrowSources | |
31 | from .utils.report import JinjaReport | |
32 | ||
33 | ||
34 | def cached_property(fn): | |
35 | return property(functools.lru_cache(maxsize=1)(fn)) | |
36 | ||
37 | ||
38 | class Version(SemVer): | |
39 | ||
40 | __slots__ = ('released', 'release_date') | |
41 | ||
42 | def __init__(self, released=False, release_date=None, **kwargs): | |
43 | super().__init__(**kwargs) | |
44 | self.released = released | |
45 | self.release_date = release_date | |
46 | ||
47 | @classmethod | |
48 | def parse(cls, version, **kwargs): | |
49 | return cls(**SemVer.parse(version).to_dict(), **kwargs) | |
50 | ||
51 | @classmethod | |
52 | def from_jira(cls, jira_version): | |
53 | return cls.parse( | |
54 | jira_version.name, | |
55 | released=jira_version.released, | |
56 | release_date=getattr(jira_version, 'releaseDate', None) | |
57 | ) | |
58 | ||
59 | ||
60 | class Issue: | |
61 | ||
62 | def __init__(self, key, type, summary): | |
63 | self.key = key | |
64 | self.type = type | |
65 | self.summary = summary | |
66 | ||
67 | @classmethod | |
68 | def from_jira(cls, jira_issue): | |
69 | return cls( | |
70 | key=jira_issue.key, | |
71 | type=jira_issue.fields.issuetype.name, | |
72 | summary=jira_issue.fields.summary | |
73 | ) | |
74 | ||
75 | @property | |
76 | def project(self): | |
77 | return self.key.split('-')[0] | |
78 | ||
79 | @property | |
80 | def number(self): | |
81 | return int(self.key.split('-')[1]) | |
82 | ||
83 | ||
84 | class Jira(JIRA): | |
85 | ||
86 | def __init__(self, user=None, password=None, | |
87 | url='https://issues.apache.org/jira'): | |
88 | user = user or os.environ.get('APACHE_JIRA_USER') | |
89 | password = password or os.environ.get('APACHE_JIRA_PASSWORD') | |
90 | super().__init__(url, basic_auth=(user, password)) | |
91 | ||
92 | def project_version(self, version_string, project='ARROW'): | |
93 | # query version from jira to populated with additional metadata | |
94 | versions = {str(v): v for v in self.project_versions(project)} | |
95 | return versions[version_string] | |
96 | ||
97 | def project_versions(self, project): | |
98 | versions = [] | |
99 | for v in super().project_versions(project): | |
100 | try: | |
101 | versions.append(Version.from_jira(v)) | |
102 | except ValueError: | |
103 | # ignore invalid semantic versions like JS-0.4.0 | |
104 | continue | |
105 | return sorted(versions, reverse=True) | |
106 | ||
107 | def issue(self, key): | |
108 | return Issue.from_jira(super().issue(key)) | |
109 | ||
110 | def project_issues(self, version, project='ARROW'): | |
111 | query = "project={} AND fixVersion={}".format(project, version) | |
112 | issues = super().search_issues(query, maxResults=False) | |
113 | return list(map(Issue.from_jira, issues)) | |
114 | ||
115 | ||
116 | class CachedJira: | |
117 | ||
118 | def __init__(self, cache_path, jira=None): | |
119 | self.jira = jira or Jira() | |
120 | self.cache_path = cache_path | |
121 | ||
122 | def __getattr__(self, name): | |
123 | attr = getattr(self.jira, name) | |
124 | return self._cached(name, attr) if callable(attr) else attr | |
125 | ||
126 | def _cached(self, name, method): | |
127 | def wrapper(*args, **kwargs): | |
128 | key = str((name, args, kwargs)) | |
129 | with shelve.open(self.cache_path) as cache: | |
130 | try: | |
131 | result = cache[key] | |
132 | except KeyError: | |
133 | cache[key] = result = method(*args, **kwargs) | |
134 | return result | |
135 | return wrapper | |
136 | ||
137 | ||
138 | _TITLE_REGEX = re.compile( | |
139 | r"(?P<issue>(?P<project>(ARROW|PARQUET))\-\d+)?\s*:?\s*" | |
140 | r"(?P<components>\[.*\])?\s*(?P<summary>.*)" | |
141 | ) | |
142 | _COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]") | |
143 | ||
144 | ||
145 | class CommitTitle: | |
146 | ||
147 | def __init__(self, summary, project=None, issue=None, components=None): | |
148 | self.project = project | |
149 | self.issue = issue | |
150 | self.components = components or [] | |
151 | self.summary = summary | |
152 | ||
153 | def __str__(self): | |
154 | out = "" | |
155 | if self.issue: | |
156 | out += "{}: ".format(self.issue) | |
157 | if self.components: | |
158 | for component in self.components: | |
159 | out += "[{}]".format(component) | |
160 | out += " " | |
161 | out += self.summary | |
162 | return out | |
163 | ||
164 | def __eq__(self, other): | |
165 | return ( | |
166 | self.summary == other.summary and | |
167 | self.project == other.project and | |
168 | self.issue == other.issue and | |
169 | self.components == other.components | |
170 | ) | |
171 | ||
172 | def __hash__(self): | |
173 | return hash( | |
174 | (self.summary, self.project, self.issue, tuple(self.components)) | |
175 | ) | |
176 | ||
177 | @classmethod | |
178 | def parse(cls, headline): | |
179 | matches = _TITLE_REGEX.match(headline) | |
180 | if matches is None: | |
181 | warnings.warn( | |
182 | "Unable to parse commit message `{}`".format(headline) | |
183 | ) | |
184 | return CommitTitle(headline) | |
185 | ||
186 | values = matches.groupdict() | |
187 | components = values.get('components') or '' | |
188 | components = _COMPONENT_REGEX.findall(components) | |
189 | ||
190 | return CommitTitle( | |
191 | values['summary'], | |
192 | project=values.get('project'), | |
193 | issue=values.get('issue'), | |
194 | components=components | |
195 | ) | |
196 | ||
197 | ||
198 | class Commit: | |
199 | ||
200 | def __init__(self, wrapped): | |
201 | self._title = CommitTitle.parse(wrapped.summary) | |
202 | self._wrapped = wrapped | |
203 | ||
204 | def __getattr__(self, attr): | |
205 | if hasattr(self._title, attr): | |
206 | return getattr(self._title, attr) | |
207 | else: | |
208 | return getattr(self._wrapped, attr) | |
209 | ||
210 | def __repr__(self): | |
211 | template = '<Commit sha={!r} issue={!r} components={!r} summary={!r}>' | |
212 | return template.format(self.hexsha, self.issue, self.components, | |
213 | self.summary) | |
214 | ||
215 | @property | |
216 | def url(self): | |
217 | return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha) | |
218 | ||
219 | @property | |
220 | def title(self): | |
221 | return self._title | |
222 | ||
223 | ||
224 | class ReleaseCuration(JinjaReport): | |
225 | templates = { | |
226 | 'console': 'release_curation.txt.j2' | |
227 | } | |
228 | fields = [ | |
229 | 'release', | |
230 | 'within', | |
231 | 'outside', | |
232 | 'nojira', | |
233 | 'parquet', | |
234 | 'nopatch' | |
235 | ] | |
236 | ||
237 | ||
238 | class JiraChangelog(JinjaReport): | |
239 | templates = { | |
240 | 'markdown': 'release_changelog.md.j2', | |
241 | 'html': 'release_changelog.html.j2' | |
242 | } | |
243 | fields = [ | |
244 | 'release', | |
245 | 'categories' | |
246 | ] | |
247 | ||
248 | ||
249 | class Release: | |
250 | ||
251 | def __init__(self): | |
252 | raise TypeError("Do not initialize Release class directly, use " | |
253 | "Release.from_jira(version) instead.") | |
254 | ||
255 | def __repr__(self): | |
256 | if self.version.released: | |
257 | status = "released_at={!r}".format(self.version.release_date) | |
258 | else: | |
259 | status = "pending" | |
260 | return "<{} {!r} {}>".format(self.__class__.__name__, | |
261 | str(self.version), status) | |
262 | ||
263 | @staticmethod | |
264 | def from_jira(version, jira=None, repo=None): | |
265 | if jira is None: | |
266 | jira = Jira() | |
267 | elif isinstance(jira, str): | |
268 | jira = Jira(jira) | |
269 | elif not isinstance(jira, (Jira, CachedJira)): | |
270 | raise TypeError("`jira` argument must be a server url or a valid " | |
271 | "Jira instance") | |
272 | ||
273 | if repo is None: | |
274 | arrow = ArrowSources.find() | |
275 | repo = Repo(arrow.path) | |
276 | elif isinstance(repo, (str, pathlib.Path)): | |
277 | repo = Repo(repo) | |
278 | elif not isinstance(repo, Repo): | |
279 | raise TypeError("`repo` argument must be a path or a valid Repo " | |
280 | "instance") | |
281 | ||
282 | if isinstance(version, str): | |
283 | version = jira.project_version(version, project='ARROW') | |
284 | elif not isinstance(version, Version): | |
285 | raise TypeError(version) | |
286 | ||
287 | # decide the type of the release based on the version number | |
288 | if version.patch == 0: | |
289 | if version.minor == 0: | |
290 | klass = MajorRelease | |
291 | elif version.major == 0: | |
292 | # handle minor releases before 1.0 as major releases | |
293 | klass = MajorRelease | |
294 | else: | |
295 | klass = MinorRelease | |
296 | else: | |
297 | klass = PatchRelease | |
298 | ||
299 | # prevent instantiating release object directly | |
300 | obj = klass.__new__(klass) | |
301 | obj.version = version | |
302 | obj.jira = jira | |
303 | obj.repo = repo | |
304 | ||
305 | return obj | |
306 | ||
307 | @property | |
308 | def is_released(self): | |
309 | return self.version.released | |
310 | ||
311 | @property | |
312 | def tag(self): | |
313 | return "apache-arrow-{}".format(str(self.version)) | |
314 | ||
315 | @property | |
316 | def branch(self): | |
317 | raise NotImplementedError() | |
318 | ||
319 | @property | |
320 | def siblings(self): | |
321 | """ | |
322 | Releases to consider when calculating previous and next releases. | |
323 | """ | |
324 | raise NotImplementedError() | |
325 | ||
326 | @cached_property | |
327 | def previous(self): | |
328 | # select all non-patch releases | |
329 | position = self.siblings.index(self.version) | |
330 | try: | |
331 | previous = self.siblings[position + 1] | |
332 | except IndexError: | |
333 | # first release doesn't have a previous one | |
334 | return None | |
335 | else: | |
336 | return Release.from_jira(previous, jira=self.jira, repo=self.repo) | |
337 | ||
338 | @cached_property | |
339 | def next(self): | |
340 | # select all non-patch releases | |
341 | position = self.siblings.index(self.version) | |
342 | if position <= 0: | |
343 | raise ValueError("There is no upcoming release set in JIRA after " | |
344 | "version {}".format(self.version)) | |
345 | upcoming = self.siblings[position - 1] | |
346 | return Release.from_jira(upcoming, jira=self.jira, repo=self.repo) | |
347 | ||
348 | @cached_property | |
349 | def issues(self): | |
350 | issues = self.jira.project_issues(self.version, project='ARROW') | |
351 | return {i.key: i for i in issues} | |
352 | ||
353 | @cached_property | |
354 | def commits(self): | |
355 | """ | |
356 | All commits applied between two versions. | |
357 | """ | |
358 | if self.previous is None: | |
359 | # first release | |
360 | lower = '' | |
361 | else: | |
362 | lower = self.repo.tags[self.previous.tag] | |
363 | ||
364 | if self.version.released: | |
365 | upper = self.repo.tags[self.tag] | |
366 | else: | |
367 | try: | |
368 | upper = self.repo.branches[self.branch] | |
369 | except IndexError: | |
370 | warnings.warn("Release branch `{}` doesn't exist." | |
371 | .format(self.branch)) | |
372 | return [] | |
373 | ||
374 | commit_range = "{}..{}".format(lower, upper) | |
375 | return list(map(Commit, self.repo.iter_commits(commit_range))) | |
376 | ||
377 | def curate(self): | |
378 | # handle commits with parquet issue key specially and query them from | |
379 | # jira and add it to the issues | |
380 | release_issues = self.issues | |
381 | ||
382 | within, outside, nojira, parquet = [], [], [], [] | |
383 | for c in self.commits: | |
384 | if c.issue is None: | |
385 | nojira.append(c) | |
386 | elif c.issue in release_issues: | |
387 | within.append((release_issues[c.issue], c)) | |
388 | elif c.project == 'PARQUET': | |
389 | parquet.append((self.jira.issue(c.issue), c)) | |
390 | else: | |
391 | outside.append((self.jira.issue(c.issue), c)) | |
392 | ||
393 | # remaining jira tickets | |
394 | within_keys = {i.key for i, c in within} | |
395 | nopatch = [issue for key, issue in release_issues.items() | |
396 | if key not in within_keys] | |
397 | ||
398 | return ReleaseCuration(release=self, within=within, outside=outside, | |
399 | nojira=nojira, parquet=parquet, nopatch=nopatch) | |
400 | ||
401 | def changelog(self): | |
402 | release_issues = [] | |
403 | ||
404 | # get organized report for the release | |
405 | curation = self.curate() | |
406 | ||
407 | # jira tickets having patches in the release | |
408 | for issue, _ in curation.within: | |
409 | release_issues.append(issue) | |
410 | ||
411 | # jira tickets without patches | |
412 | for issue in curation.nopatch: | |
413 | release_issues.append(issue) | |
414 | ||
415 | # parquet patches in the release | |
416 | for issue, _ in curation.parquet: | |
417 | release_issues.append(issue) | |
418 | ||
419 | # organize issues into categories | |
420 | issue_types = { | |
421 | 'Bug': 'Bug Fixes', | |
422 | 'Improvement': 'New Features and Improvements', | |
423 | 'New Feature': 'New Features and Improvements', | |
424 | 'Sub-task': 'New Features and Improvements', | |
425 | 'Task': 'New Features and Improvements', | |
426 | 'Test': 'Bug Fixes', | |
427 | 'Wish': 'New Features and Improvements', | |
428 | } | |
429 | categories = defaultdict(list) | |
430 | for issue in release_issues: | |
431 | categories[issue_types[issue.type]].append(issue) | |
432 | ||
433 | # sort issues by the issue key in ascending order | |
434 | for name, issues in categories.items(): | |
435 | issues.sort(key=lambda issue: (issue.project, issue.number)) | |
436 | ||
437 | return JiraChangelog(release=self, categories=categories) | |
438 | ||
439 | ||
440 | class MaintenanceMixin: | |
441 | """ | |
442 | Utility methods for cherry-picking commits from the main branch. | |
443 | """ | |
444 | ||
445 | def commits_to_pick(self, exclude_already_applied=True): | |
446 | # collect commits applied on the main branch since the root of the | |
447 | # maintenance branch (the previous major release) | |
448 | if self.version.major == 0: | |
449 | # treat minor releases as major releases preceeding 1.0.0 release | |
450 | commit_range = "apache-arrow-0.{}.0..master".format( | |
451 | self.version.minor | |
452 | ) | |
453 | else: | |
454 | commit_range = "apache-arrow-{}.0.0..master".format( | |
455 | self.version.major | |
456 | ) | |
457 | ||
458 | # keeping the original order of the commits helps to minimize the merge | |
459 | # conflicts during cherry-picks | |
460 | commits = map(Commit, self.repo.iter_commits(commit_range)) | |
461 | ||
462 | # exclude patches that have been already applied to the maintenance | |
463 | # branch, we cannot identify patches based on sha because it changes | |
464 | # after the cherry pick so use commit title instead | |
465 | if exclude_already_applied: | |
466 | already_applied = {c.title for c in self.commits} | |
467 | else: | |
468 | already_applied = set() | |
469 | ||
470 | # iterate over the commits applied on the main branch and filter out | |
471 | # the ones that are included in the jira release | |
472 | patches_to_pick = [c for c in commits if | |
473 | c.issue in self.issues and | |
474 | c.title not in already_applied] | |
475 | ||
476 | return reversed(patches_to_pick) | |
477 | ||
478 | def cherry_pick_commits(self, recreate_branch=True): | |
479 | if recreate_branch: | |
480 | # delete, create and checkout the maintenance branch based off of | |
481 | # the previous tag | |
482 | if self.branch in self.repo.branches: | |
483 | self.repo.git.branch('-D', self.branch) | |
484 | self.repo.git.checkout(self.previous.tag, b=self.branch) | |
485 | else: | |
486 | # just checkout the already existing maintenance branch | |
487 | self.repo.git.checkout(self.branch) | |
488 | ||
489 | # cherry pick the commits based on the jira tickets | |
490 | for commit in self.commits_to_pick(): | |
491 | self.repo.git.cherry_pick(commit.hexsha) | |
492 | ||
493 | ||
494 | class MajorRelease(Release): | |
495 | ||
496 | @property | |
497 | def branch(self): | |
498 | return "master" | |
499 | ||
500 | @cached_property | |
501 | def siblings(self): | |
502 | """ | |
503 | Filter only the major releases. | |
504 | """ | |
505 | # handle minor releases before 1.0 as major releases | |
506 | return [v for v in self.jira.project_versions('ARROW') | |
507 | if v.patch == 0 and (v.major == 0 or v.minor == 0)] | |
508 | ||
509 | ||
510 | class MinorRelease(Release, MaintenanceMixin): | |
511 | ||
512 | @property | |
513 | def branch(self): | |
514 | return "maint-{}.x.x".format(self.version.major) | |
515 | ||
516 | @cached_property | |
517 | def siblings(self): | |
518 | """ | |
519 | Filter the major and minor releases. | |
520 | """ | |
521 | return [v for v in self.jira.project_versions('ARROW') if v.patch == 0] | |
522 | ||
523 | ||
524 | class PatchRelease(Release, MaintenanceMixin): | |
525 | ||
526 | @property | |
527 | def branch(self): | |
528 | return "maint-{}.{}.x".format(self.version.major, self.version.minor) | |
529 | ||
530 | @cached_property | |
531 | def siblings(self): | |
532 | """ | |
533 | No filtering, consider all releases. | |
534 | """ | |
535 | return self.jira.project_versions('ARROW') |