From 2252d42893d794b591436a5857618459779f40be Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 24 Jun 2024 14:35:36 -0700 Subject: [PATCH] add apnic population data --- README.md | 10 ++++- src/asnames/mod.rs | 20 +++++++++ src/asnames/population.rs | 89 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 10 ++++- 4 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 src/asnames/population.rs diff --git a/README.md b/README.md index 10a64f5..29e9d8d 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ println!( Data source: - RIPE NCC asnames: - CAIDA as-to-organization mapping: +- APNIC AS population data: #### Data structure @@ -93,7 +94,7 @@ pub struct AsName { pub asn: u32, pub name: String, pub country: String, - pub as2org: Option, + pub population: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct As2orgInfo { @@ -102,6 +103,13 @@ pub struct As2orgInfo { pub org_id: String, pub org_name: String, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AsnPopulationData { + pub user_count: i64, + pub percent_country: f64, + pub percent_global: f64, + pub sample_count: i64, +} ``` #### Usage example diff --git a/src/asnames/mod.rs b/src/asnames/mod.rs index dbf6c9c..2477541 100644 --- a/src/asnames/mod.rs +++ b/src/asnames/mod.rs @@ -15,6 +15,7 @@ //! pub name: String, //! pub country: String, //! pub as2org: Option, +//! pub population: Option, //! } //! #[derive(Debug, Clone, Serialize, Deserialize)] //! pub struct As2orgInfo { @@ -23,6 +24,13 @@ //! pub org_id: String, //! pub org_name: String, //! } +//! #[derive(Debug, Clone, Serialize, Deserialize)] +//! pub struct AsnPopulationData { +//! pub user_count: i64, +//! pub percent_country: f64, +//! pub percent_global: f64, +//! pub sample_count: i64, +//! } //! ``` //! //! # Example @@ -37,9 +45,13 @@ //! assert_eq!(asnames.get(&400644).unwrap().country, "US"); //! ``` +mod population; + +use crate::asnames::population::AsnPopulationData; use anyhow::Result; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use tracing::info; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AsName { @@ -47,6 +59,7 @@ pub struct AsName { pub name: String, pub country: String, pub as2org: Option, + pub population: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -60,8 +73,13 @@ pub struct As2orgInfo { const DATA_URL: &str = "https://ftp.ripe.net/ripe/asnames/asn.txt"; pub fn get_asnames() -> Result> { + info!("loading asnames from RIPE NCC..."); let text = oneio::read_to_string(DATA_URL)?; + info!("loading as2org data from CAIDA..."); let as2org = as2org_rs::As2org::new(None)?; + info!("loading ASN population data from APNIC..."); + let population = population::AsnPopulation::new()?; + let asnames = text .lines() .filter_map(|line| { @@ -80,11 +98,13 @@ pub fn get_asnames() -> Result> { org_id: info.org_id.clone(), org_name: info.org_name.clone(), }); + let population = population.get(asn); Some(AsName { asn, name: name_str.to_string(), country: country_str.to_string(), as2org, + population, }) }) .collect::>(); diff --git a/src/asnames/population.rs b/src/asnames/population.rs new file mode 100644 index 0000000..c58861c --- /dev/null +++ b/src/asnames/population.rs @@ -0,0 +1,89 @@ +use anyhow::Result; +use chrono::NaiveDate; +use serde::{de, Deserialize, Deserializer, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ApnicAsnPopulationEntry { + pub rank: u32, + #[serde(rename = "AS")] + pub asn: u32, + #[serde(rename = "Description")] + pub description: String, + #[serde(rename = "CC")] + pub country_code: String, + #[serde(rename = "Users")] + pub user_count: i64, + #[serde(rename = "Percent of CC Pop")] + pub percent_country: f64, + #[serde(rename = "Percent of Internet")] + pub percent_global: f64, + #[serde(rename = "Samples")] + pub sample_count: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ApnicAsnPopulation { + pub copyright: String, + pub description: String, + #[serde(rename = "Date", deserialize_with = "deserialize_date")] + pub date: NaiveDate, + #[serde(rename = "Window")] + pub window: String, + #[serde(rename = "Data")] + pub data: Vec, +} + +fn deserialize_date<'de, D>(d: D) -> Result +where + D: Deserializer<'de>, +{ + let string = String::deserialize(d)?; + NaiveDate::parse_from_str(string.as_str(), "%d/%m/%Y").map_err(de::Error::custom) +} + +pub struct AsnPopulation { + population_map: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AsnPopulationData { + pub user_count: i64, + pub percent_country: f64, + pub percent_global: f64, + pub sample_count: i64, +} + +impl AsnPopulation { + pub fn new() -> Result { + let population: ApnicAsnPopulation = + oneio::read_json_struct("https://stats.labs.apnic.net/cgi-bin/aspop?f=j")?; + let mut population_map = HashMap::new(); + for entry in population.data { + population_map.insert(entry.asn, entry); + } + Ok(AsnPopulation { population_map }) + } + + pub fn get(&self, asn: u32) -> Option { + self.population_map + .get(&asn) + .map(|entry| AsnPopulationData { + user_count: entry.user_count, + percent_country: entry.percent_country, + percent_global: entry.percent_global, + sample_count: entry.sample_count, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_asn_population() { + let population = AsnPopulation::new().unwrap(); + dbg!(population.get(15169).unwrap()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 723ee98..5976426 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,6 +74,7 @@ //! Data source: //! - RIPE NCC asnames: //! - CAIDA as-to-organization mapping: +//! - APNIC AS population data: //! //! ### Data structure //! @@ -84,7 +85,7 @@ //! pub asn: u32, //! pub name: String, //! pub country: String, -//! pub as2org: Option, +//! pub population: Option, //! } //! #[derive(Debug, Clone, Serialize, Deserialize)] //! pub struct As2orgInfo { @@ -93,6 +94,13 @@ //! pub org_id: String, //! pub org_name: String, //! } +//! #[derive(Debug, Clone, Serialize, Deserialize)] +//! pub struct AsnPopulationData { +//! pub user_count: i64, +//! pub percent_country: f64, +//! pub percent_global: f64, +//! pub sample_count: i64, +//! } //! ``` //! //! ### Usage example