Skip to content

Commit

Permalink
Add skip_missing flag, add Input enum
Browse files Browse the repository at this point in the history
  • Loading branch information
pawroman committed Nov 25, 2020
1 parent 68b574d commit 3ac68b8
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 6 deletions.
15 changes: 15 additions & 0 deletions src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@ pub(crate) enum FileType {
Plaintext,
}

impl<P: AsRef<Path>> From<P> for FileType {
/// Detect if the given path points to a Markdown, HTML, or plaintext file.
fn from(p: P) -> FileType {
let path = p.as_ref();
match path.extension() {
Some(ext) => match ext.to_str().unwrap() {
"md" => FileType::Markdown,
"html" | "htm" => FileType::HTML,
_ => FileType::Plaintext,
},
None => FileType::Plaintext,
}
}
}

// Use LinkFinder here to offload the actual link searching
fn find_links(input: &str) -> Vec<linkify::Link> {
let finder = LinkFinder::new();
Expand Down
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ fn main() -> Result<()> {
}
None => tokio::runtime::Runtime::new()?,
};
let errorcode = runtime.block_on(run(cfg, opts.inputs))?;
let errorcode = runtime.block_on(run(
cfg,
opts.inputs.iter().map(|i| i.to_string()).collect(),
))?;
std::process::exit(errorcode);
}

Expand Down
13 changes: 10 additions & 3 deletions src/options.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::types::Input;
use anyhow::{Error, Result};
use serde::Deserialize;
use std::{fs, io::ErrorKind};
use structopt::{clap::crate_version, StructOpt};
use url::Url;

pub(crate) const USER_AGENT: &str = concat!("lychee/", crate_version!());
const METHOD: &str = "get";
Expand Down Expand Up @@ -33,9 +35,9 @@ macro_rules! fold_in {
#[derive(Debug, StructOpt)]
#[structopt(name = "lychee", about = "A glorious link checker")]
pub(crate) struct LycheeOptions {
/// Input files
#[structopt(default_value = "README.md")]
pub inputs: Vec<String>,
/// TODO: Inputs
#[structopt(default_value = "README.md", parse(from_str = Input::from))]
pub inputs: Vec<Input>,

/// Configuration file to use
#[structopt(short, long = "config", default_value = "./lychee.toml")]
Expand All @@ -52,6 +54,11 @@ pub struct Config {
#[serde(default)]
pub verbose: bool,

/// TODO: Skip missing input files
#[structopt(long)]
#[serde(default)]
pub skip_missing: bool,

/// Show progress
#[structopt(short, long)]
#[serde(default)]
Expand Down
118 changes: 116 additions & 2 deletions src/types.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
use crate::extract::FileType;
use crate::options::Config;
use anyhow::anyhow;
use anyhow::{anyhow, Result};
use glob::glob;
use regex::RegexSet;
use std::net::IpAddr;
use std::path::{Path, PathBuf};
use std::{collections::HashSet, convert::TryFrom, fmt::Display};
use tokio::fs::read_to_string;
use tokio::io::{stdin, AsyncReadExt};
use url::Url;

const STDIN: &str = "-";

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Uri {
Website(Url),
Expand Down Expand Up @@ -119,7 +126,7 @@ impl From<reqwest::Error> for Status {
}

/// Exclude configuration for the link checker.
/// You can ignore links based on
/// You can ignore links based on regex patterns or pre-defined IP ranges.
#[derive(Clone, Debug)]
pub struct Excludes {
pub regex: Option<RegexSet>,
Expand Down Expand Up @@ -158,6 +165,113 @@ impl Default for Excludes {
}
}

#[derive(Debug)]
#[non_exhaustive]
pub(crate) enum Input {
RemoteUrl(Url),
FsGlob(String),
FsPath(PathBuf),
Stdin,
}

impl ToString for Input {
fn to_string(&self) -> String {
match self {
Self::RemoteUrl(url) => url.to_string(),
Self::FsGlob(s) => s.clone(),
Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(),
Self::Stdin => STDIN.to_owned(),
}
}
}

#[derive(Debug)]
pub(crate) struct InputContent {
input: Input,
file_type: FileType,
content: String,
}

impl From<&str> for Input {
fn from(value: &str) -> Self {
if value == STDIN {
Self::Stdin
} else {
match Url::parse(&value) {
Ok(url) => Self::RemoteUrl(url),
Err(_) => Self::FsGlob(value.to_owned()),
}
}
}
}

impl Input {
async fn get_contents(self) -> Result<Vec<InputContent>> {
use Input::*;

let contents = match self {
RemoteUrl(url) => vec![Self::url_contents(url).await?],
FsGlob(path_glob) => Self::glob_contents(path_glob).await?,
FsPath(path) => vec![Self::path_content(&path).await?],
Stdin => vec![Self::stdin_content().await?],
};

Ok(contents)
}

async fn url_contents(url: Url) -> Result<InputContent> {
let res = reqwest::get(url.clone()).await?;
let content = res.text().await?;
let input_content = InputContent {
file_type: FileType::from(&url.as_str()),
input: Input::RemoteUrl(url),
content,
};

Ok(input_content)
}

async fn glob_contents(path_glob: String) -> Result<Vec<InputContent>> {
let mut contents = vec![];

for entry in glob(&path_glob)? {
match entry {
Ok(path) => {
let content = Self::path_content(&path).await?;
contents.push(content);
}
Err(e) => println!("{:?}", e),
}
}

Ok(contents)
}

async fn path_content<P: Into<PathBuf> + AsRef<Path>>(path: P) -> Result<InputContent> {
let input_content = InputContent {
file_type: FileType::from(path.as_ref()),
content: read_to_string(&path).await?,
input: Input::FsPath(path.into()),
};

Ok(input_content)
}

async fn stdin_content() -> Result<InputContent> {
let mut content = String::new();
let mut stdin = stdin();
stdin.read_to_string(&mut content).await?;

let input_content = InputContent {
input: Input::Stdin,
content,
file_type: FileType::Plaintext,
};

Ok(input_content)
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down

0 comments on commit 3ac68b8

Please sign in to comment.