1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Script to check the validity of `href` links in our HTML documentation.
13 //! In the past we've been quite error prone to writing in broken links as most
14 //! of them are manually rather than automatically added. As files move over
15 //! time or apis change old links become stale or broken. The purpose of this
16 //! script is to check all relative links in our documentation to make sure they
17 //! actually point to a valid place.
19 //! Currently this doesn't actually do any HTML parsing or anything fancy like
20 //! that, it just has a simple "regex" to search for `href` and `id` tags.
21 //! These values are then translated to file URLs if possible and then the
22 //! destination is asserted to exist.
24 //! A few whitelisted exceptions are allowed as there's known bugs in rustdoc,
25 //! but this should catch the majority of "broken link" cases.
29 use std
::io
::prelude
::*;
30 use std
::path
::{Path, PathBuf, Component}
;
31 use std
::collections
::{HashMap, HashSet}
;
32 use std
::collections
::hash_map
::Entry
;
37 ($e
:expr
) => (match $e
{
39 Err(e
) => panic
!("{} failed with {:?}", stringify
!($e
), e
),
44 let docs
= env
::args().nth(1).unwrap();
45 let docs
= env
::current_dir().unwrap().join(docs
);
46 let mut errors
= false;
47 walk(&mut HashMap
::new(), &docs
, &docs
, &mut errors
);
49 panic
!("found some broken links");
55 IOError(std
::io
::Error
),
56 BrokenRedirect(PathBuf
, std
::io
::Error
),
68 names
: HashSet
<String
>,
71 type Cache
= HashMap
<PathBuf
, FileEntry
>;
74 fn parse_ids(&mut self, file
: &Path
, contents
: &str, errors
: &mut bool
) {
75 if self.ids
.is_empty() {
76 with_attrs_in_source(contents
, " id", |fragment
, i
| {
77 let frag
= fragment
.trim_left_matches("#").to_owned();
78 if !self.ids
.insert(frag
) {
80 println
!("{}:{}: id is not unique: `{}`", file
.display(), i
, fragment
);
86 fn parse_names(&mut self, contents
: &str) {
87 if self.names
.is_empty() {
88 with_attrs_in_source(contents
, " name", |fragment
, _
| {
89 let frag
= fragment
.trim_left_matches("#").to_owned();
90 self.names
.insert(frag
);
96 fn walk(cache
: &mut Cache
, root
: &Path
, dir
: &Path
, errors
: &mut bool
) {
97 for entry
in t
!(dir
.read_dir()).map(|e
| t
!(e
)) {
98 let path
= entry
.path();
99 let kind
= t
!(entry
.file_type());
101 walk(cache
, root
, &path
, errors
);
103 let pretty_path
= check(cache
, root
, &path
, errors
);
104 if let Some(pretty_path
) = pretty_path
{
105 let entry
= cache
.get_mut(&pretty_path
).unwrap();
106 // we don't need the source anymore,
107 // so drop to reduce memory-usage
108 entry
.source
= String
::new();
114 fn check(cache
: &mut Cache
,
119 // ignore js files as they are not prone to errors as the rest of the
120 // documentation is and they otherwise bring up false positives.
121 if file
.extension().and_then(|s
| s
.to_str()) == Some("js") {
125 // Unfortunately we're not 100% full of valid links today to we need a few
126 // whitelists to get this past `make check` today.
128 if file
.ends_with("std/string/struct.String.html") {
132 if file
.ends_with("collections/string/struct.String.html") {
136 if file
.ends_with("btree_set/struct.BTreeSet.html") ||
137 file
.ends_with("collections/struct.BTreeSet.html") ||
138 file
.ends_with("collections/btree_map/struct.BTreeMap.html") ||
139 file
.ends_with("collections/hash_map/struct.HashMap.html") {
143 let res
= load_file(cache
, root
, PathBuf
::from(file
), SkipRedirect
);
144 let (pretty_file
, contents
) = match res
{
146 Err(_
) => return None
,
149 cache
.get_mut(&pretty_file
)
151 .parse_ids(&pretty_file
, &contents
, errors
);
152 cache
.get_mut(&pretty_file
)
154 .parse_names(&contents
);
157 // Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
158 with_attrs_in_source(&contents
, " href", |url
, i
| {
159 // Ignore external URLs
160 if url
.starts_with("http:") || url
.starts_with("https:") ||
161 url
.starts_with("javascript:") || url
.starts_with("ftp:") ||
162 url
.starts_with("irc:") || url
.starts_with("data:") {
165 let mut parts
= url
.splitn(2, "#");
166 let url
= parts
.next().unwrap();
167 let fragment
= parts
.next();
168 let mut parts
= url
.splitn(2, "?");
169 let url
= parts
.next().unwrap();
171 // Once we've plucked out the URL, parse it using our base url and
172 // then try to extract a file path.
173 let mut path
= file
.to_path_buf();
176 for part
in Path
::new(url
).components() {
178 Component
::Prefix(_
) |
179 Component
::RootDir
=> panic
!(),
180 Component
::CurDir
=> {}
181 Component
::ParentDir
=> { path.pop(); }
182 Component
::Normal(s
) => { path.push(s); }
187 if let Some(extension
) = path
.extension() {
188 // don't check these files
189 if extension
== "png" {
194 // Alright, if we've found a file name then this file had better
195 // exist! If it doesn't then we register and print an error.
198 // Links to directories show as directory listings when viewing
199 // the docs offline so it's best to avoid them.
201 let pretty_path
= path
.strip_prefix(root
).unwrap_or(&path
);
202 println
!("{}:{}: directory link - {}",
203 pretty_file
.display(),
205 pretty_path
.display());
208 let res
= load_file(cache
, root
, path
.clone(), FromRedirect(false));
209 let (pretty_path
, contents
) = match res
{
211 Err(LoadError
::IOError(err
)) => {
212 panic
!(format
!("error loading {}: {}", path
.display(), err
));
214 Err(LoadError
::BrokenRedirect(target
, _
)) => {
216 println
!("{}:{}: broken redirect to {}",
217 pretty_file
.display(),
222 Err(LoadError
::IsRedirect
) => unreachable
!(),
225 if let Some(ref fragment
) = fragment
{
226 // Fragments like `#1-6` are most likely line numbers to be
227 // interpreted by javascript, so we're ignoring these
228 if fragment
.splitn(2, '
-'
)
229 .all(|f
| f
.chars().all(|c
| c
.is_numeric())) {
233 let entry
= &mut cache
.get_mut(&pretty_path
).unwrap();
234 entry
.parse_ids(&pretty_path
, &contents
, errors
);
235 entry
.parse_names(&contents
);
237 if !(entry
.ids
.contains(*fragment
) || entry
.names
.contains(*fragment
)) {
239 print
!("{}:{}: broken link fragment ",
240 pretty_file
.display(),
242 println
!("`#{}` pointing to `{}`", fragment
, pretty_path
.display());
247 print
!("{}:{}: broken link - ", pretty_file
.display(), i
+ 1);
248 let pretty_path
= path
.strip_prefix(root
).unwrap_or(&path
);
249 println
!("{}", pretty_path
.display());
255 fn load_file(cache
: &mut Cache
,
259 -> Result
<(PathBuf
, String
), LoadError
> {
260 let mut contents
= String
::new();
261 let pretty_file
= PathBuf
::from(file
.strip_prefix(root
).unwrap_or(&file
));
263 let maybe_redirect
= match cache
.entry(pretty_file
.clone()) {
264 Entry
::Occupied(entry
) => {
265 contents
= entry
.get().source
.clone();
268 Entry
::Vacant(entry
) => {
269 let mut fp
= File
::open(file
.clone()).map_err(|err
| {
270 if let FromRedirect(true) = redirect
{
271 LoadError
::BrokenRedirect(file
.clone(), err
)
273 LoadError
::IOError(err
)
276 fp
.read_to_string(&mut contents
).map_err(|err
| LoadError
::IOError(err
))?
;
278 let maybe
= maybe_redirect(&contents
);
280 if let SkipRedirect
= redirect
{
281 return Err(LoadError
::IsRedirect
);
284 entry
.insert(FileEntry
{
285 source
: contents
.clone(),
287 names
: HashSet
::new(),
294 match maybe_redirect
.map(|url
| file
.join(url
)) {
295 Some(redirect_file
) => {
296 let path
= PathBuf
::from(redirect_file
);
297 load_file(cache
, root
, path
, FromRedirect(true))
299 None
=> Ok((pretty_file
, contents
)),
303 fn maybe_redirect(source
: &str) -> Option
<String
> {
304 const REDIRECT
: &'
static str = "<p>Redirecting to <a href=";
306 let mut lines
= source
.lines();
307 let redirect_line
= match lines
.nth(6) {
312 redirect_line
.find(REDIRECT
).map(|i
| {
313 let rest
= &redirect_line
[(i
+ REDIRECT
.len() + 1)..];
314 let pos_quote
= rest
.find('
"').unwrap();
315 rest[..pos_quote].to_owned()
319 fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
320 for (i, mut line) in contents.lines().enumerate() {
321 while let Some(j) = line.find(attr) {
322 let rest = &line[j + attr.len()..];
324 let pos_equals = match rest.find("=") {
328 if rest[..pos_equals].trim_left_matches(" ") != "" {
332 let rest = &rest[pos_equals + 1..];
334 let pos_quote = match rest.find(&['"'
, '
\''
][..]) {
338 let quote_delim
= rest
.as_bytes()[pos_quote
] as char;
340 if rest
[..pos_quote
].trim_left_matches(" ") != "" {
343 let rest
= &rest
[pos_quote
+ 1..];
344 let url
= match rest
.find(quote_delim
) {
345 Some(i
) => &rest
[..i
],