]>
git.proxmox.com Git - rustc.git/blob - src/tools/linkchecker/main.rs
1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Script to check the validity of `href` links in our HTML documentation.
13 //! In the past we've been quite error prone to writing in broken links as most
14 //! of them are manually rather than automatically added. As files move over
15 //! time or apis change old links become stale or broken. The purpose of this
16 //! script is to check all relative links in our documentation to make sure they
17 //! actually point to a valid place.
19 //! Currently this doesn't actually do any HTML parsing or anything fancy like
20 //! that, it just has a simple "regex" to search for `href` and `id` tags.
21 //! These values are then translated to file URLs if possible and then the
22 //! destination is asserted to exist.
24 //! A few whitelisted exceptions are allowed as there's known bugs in rustdoc,
25 //! but this should catch the majority of "broken link" cases.
31 use std
::io
::prelude
::*;
32 use std
::path
::{Path, PathBuf}
;
33 use std
::collections
::{HashMap, HashSet}
;
34 use std
::collections
::hash_map
::Entry
;
36 use url
::{Url, UrlParser}
;
41 ($e
:expr
) => (match $e
{
43 Err(e
) => panic
!("{} failed with {:?}", stringify
!($e
), e
),
48 let docs
= env
::args().nth(1).unwrap();
49 let docs
= env
::current_dir().unwrap().join(docs
);
50 let mut url
= Url
::from_file_path(&docs
).unwrap();
51 let mut errors
= false;
52 walk(&mut HashMap
::new(), &docs
, &docs
, &mut url
, &mut errors
);
54 panic
!("found some broken links");
60 IOError(std
::io
::Error
),
61 BrokenRedirect(PathBuf
, std
::io
::Error
),
75 type Cache
= HashMap
<PathBuf
, FileEntry
>;
78 fn parse_ids(&mut self,
83 if self.ids
.is_empty() {
84 with_attrs_in_source(contents
, " id", |fragment
, i
| {
85 let frag
= fragment
.trim_left_matches("#").to_owned();
86 if !self.ids
.insert(frag
) {
88 println
!("{}:{}: id is not unique: `{}`",
89 file
.display(), i
, fragment
);
96 fn walk(cache
: &mut Cache
,
102 for entry
in t
!(dir
.read_dir()).map(|e
| t
!(e
)) {
103 let path
= entry
.path();
104 let kind
= t
!(entry
.file_type());
105 url
.path_mut().unwrap().push(entry
.file_name().into_string().unwrap());
107 walk(cache
, root
, &path
, url
, errors
);
109 let pretty_path
= check(cache
, root
, &path
, url
, errors
);
110 if let Some(pretty_path
) = pretty_path
{
111 let entry
= cache
.get_mut(&pretty_path
).unwrap();
112 // we don't need the source anymore,
113 // so drop to to reduce memory-usage
114 entry
.source
= String
::new();
117 url
.path_mut().unwrap().pop();
121 fn check(cache
: &mut Cache
,
125 errors
: &mut bool
) -> Option
<PathBuf
>
127 // ignore js files as they are not prone to errors as the rest of the
128 // documentation is and they otherwise bring up false positives.
129 if file
.extension().and_then(|s
| s
.to_str()) == Some("js") {
133 // Unfortunately we're not 100% full of valid links today to we need a few
134 // whitelists to get this past `make check` today.
136 if file
.ends_with("std/string/struct.String.html") {
140 if file
.ends_with("collections/string/struct.String.html") {
144 if file
.ends_with("btree_set/struct.BTreeSet.html") ||
145 file
.ends_with("collections/struct.BTreeSet.html") ||
146 file
.ends_with("collections/btree_map/struct.BTreeMap.html") ||
147 file
.ends_with("collections/hash_map/struct.HashMap.html") {
151 if file
.ends_with("std/sys/ext/index.html") {
155 if let Some(file
) = file
.to_str() {
157 if file
.contains("ParseFloatError") {
160 // weird reexports, but this module is on its way out, so chalk it up to
161 // "rustdoc weirdness" and move on from there
162 if file
.contains("scoped_tls") {
167 let mut parser
= UrlParser
::new();
168 parser
.base_url(base
);
170 let res
= load_file(cache
, root
, PathBuf
::from(file
), SkipRedirect
);
171 let (pretty_file
, contents
) = match res
{
173 Err(_
) => return None
,
176 cache
.get_mut(&pretty_file
).unwrap()
177 .parse_ids(&pretty_file
, &contents
, errors
);
180 // Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
181 with_attrs_in_source(&contents
, " href", |url
, i
| {
182 // Once we've plucked out the URL, parse it using our base url and
183 // then try to extract a file path. If either of these fail then we
185 let (parsed_url
, path
) = match url_to_file_path(&parser
, url
) {
186 Some((url
, path
)) => (url
, PathBuf
::from(path
)),
190 // Alright, if we've found a file name then this file had better
191 // exist! If it doesn't then we register and print an error.
196 let res
= load_file(cache
, root
, path
.clone(), FromRedirect(false));
197 let (pretty_path
, contents
) = match res
{
199 Err(LoadError
::IOError(err
)) => panic
!(format
!("{}", err
)),
200 Err(LoadError
::BrokenRedirect(target
, _
)) => {
201 print
!("{}:{}: broken redirect to {}",
202 pretty_file
.display(), i
+ 1, target
.display());
205 Err(LoadError
::IsRedirect
) => unreachable
!(),
208 if let Some(ref fragment
) = parsed_url
.fragment
{
209 // Fragments like `#1-6` are most likely line numbers to be
210 // interpreted by javascript, so we're ignoring these
211 if fragment
.splitn(2, '
-'
)
212 .all(|f
| f
.chars().all(|c
| c
.is_numeric())) {
216 let entry
= &mut cache
.get_mut(&pretty_path
).unwrap();
217 entry
.parse_ids(&pretty_path
, &contents
, errors
);
219 if !entry
.ids
.contains(fragment
) {
221 print
!("{}:{}: broken link fragment ",
222 pretty_file
.display(), i
+ 1);
223 println
!("`#{}` pointing to `{}`",
224 fragment
, pretty_path
.display());
229 print
!("{}:{}: broken link - ", pretty_file
.display(), i
+ 1);
230 let pretty_path
= path
.strip_prefix(root
).unwrap_or(&path
);
231 println
!("{}", pretty_path
.display());
237 fn load_file(cache
: &mut Cache
,
240 redirect
: Redirect
) -> Result
<(PathBuf
, String
), LoadError
> {
241 let mut contents
= String
::new();
242 let pretty_file
= PathBuf
::from(file
.strip_prefix(root
).unwrap_or(&file
));
244 let maybe_redirect
= match cache
.entry(pretty_file
.clone()) {
245 Entry
::Occupied(entry
) => {
246 contents
= entry
.get().source
.clone();
249 Entry
::Vacant(entry
) => {
250 let mut fp
= try
!(File
::open(file
.clone()).map_err(|err
| {
251 if let FromRedirect(true) = redirect
{
252 LoadError
::BrokenRedirect(file
.clone(), err
)
254 LoadError
::IOError(err
)
257 try
!(fp
.read_to_string(&mut contents
)
258 .map_err(|err
| LoadError
::IOError(err
)));
260 let maybe
= maybe_redirect(&contents
);
262 if let SkipRedirect
= redirect
{
263 return Err(LoadError
::IsRedirect
);
266 entry
.insert(FileEntry
{
267 source
: contents
.clone(),
274 let base
= Url
::from_file_path(&file
).unwrap();
275 let mut parser
= UrlParser
::new();
276 parser
.base_url(&base
);
278 match maybe_redirect
.and_then(|url
| url_to_file_path(&parser
, &url
)) {
279 Some((_
, redirect_file
)) => {
280 let path
= PathBuf
::from(redirect_file
);
281 load_file(cache
, root
, path
, FromRedirect(true))
283 None
=> Ok((pretty_file
, contents
))
287 fn maybe_redirect(source
: &str) -> Option
<String
> {
288 const REDIRECT
: &'
static str = "<p>Redirecting to <a href=";
290 let mut lines
= source
.lines();
291 let redirect_line
= match lines
.nth(6) {
296 redirect_line
.find(REDIRECT
).map(|i
| {
297 let rest
= &redirect_line
[(i
+ REDIRECT
.len() + 1)..];
298 let pos_quote
= rest
.find('
"').unwrap();
299 rest[..pos_quote].to_owned()
303 fn url_to_file_path(parser: &UrlParser, url: &str) -> Option<(Url, PathBuf)> {
304 parser.parse(url).ok().and_then(|parsed_url| {
305 parsed_url.to_file_path().ok().map(|f| (parsed_url, f))
309 fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str,
313 for (i, mut line) in contents.lines().enumerate() {
314 while let Some(j) = line.find(attr) {
315 let rest = &line[j + attr.len() ..];
317 let pos_equals = match rest.find("=") {
321 if rest[..pos_equals].trim_left_matches(" ") != "" {
325 let rest = &rest[pos_equals + 1..];
327 let pos_quote = match rest.find(&['"'
, '
\''
][..]) {
331 let quote_delim
= rest
.as_bytes()[pos_quote
] as char;
333 if rest
[..pos_quote
].trim_left_matches(" ") != "" {
336 let rest
= &rest
[pos_quote
+ 1..];
337 let url
= match rest
.find(quote_delim
) {
338 Some(i
) => &rest
[..i
],