Skip to content

Commit

Permalink
Merge pull request #539 from datanel/new_filter_ntfs
Browse files Browse the repository at this point in the history
[feature] FilterNtfs: filter by line_code
  • Loading branch information
ArnaudOggy authored Feb 17, 2020
2 parents 18f16f6 + 6b04989 commit 29156d3
Show file tree
Hide file tree
Showing 34 changed files with 325 additions and 86 deletions.
2 changes: 2 additions & 0 deletions src/minidom_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ mod try_attribute;
pub use try_attribute::TryAttribute;
mod try_only_child;
pub use try_only_child::TryOnlyChild;
#[cfg(feature = "proj")]
mod writer;
#[cfg(feature = "proj")]
pub use self::writer::ElementWriter;
212 changes: 159 additions & 53 deletions src/ntfs/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,75 +17,181 @@
//! [NTFS](https://github.com/CanalTP/ntfs-specification/blob/master/ntfs_fr.md).

use crate::{objects::VehicleJourney, Model, Result};
use failure::bail;
use failure::{bail, format_err};
use lazy_static::lazy_static;
use std::collections::{HashMap, HashSet};
use transit_model_collection::{CollectionWithId, Idx};
use transit_model_collection::{CollectionWithId, Id, Idx};
use transit_model_relations::IdxSet;

#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
pub enum Action {
Extract,
Remove,
}

/// Extract or remove networks
pub fn filter(model: Model, action: Action, network_ids: Vec<String>) -> Result<Model> {
fn updated_stop_time_attributes<T>(
vehicle_journeys: &CollectionWithId<VehicleJourney>,
attributes_map: &HashMap<(Idx<VehicleJourney>, u32), T>,
old_vj_idx_to_vj_id: &HashMap<Idx<VehicleJourney>, String>,
) -> HashMap<(Idx<VehicleJourney>, u32), T>
where
T: Clone,
{
let mut updated_attributes_map = HashMap::new();
for (&(old_vj_idx, sequence), attribute) in attributes_map {
if let Some(new_vj_idx) = old_vj_idx_to_vj_id
.get(&old_vj_idx)
.and_then(|vj_id| vehicle_journeys.get_idx(vj_id))
{
updated_attributes_map.insert((new_vj_idx, sequence), attribute.clone());
}
#[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)]
pub enum ObjectType {
Network,
Line,
}

type PropertyValues = HashMap<String, HashSet<String>>;

#[derive(Debug)]
pub struct Filter {
action: Action,
filters: HashMap<ObjectType, PropertyValues>,
}

impl Filter {
pub fn new(action: Action) -> Self {
Filter {
action,
filters: HashMap::new(),
}
updated_attributes_map
}

let mut networks = model.networks.clone();
let n_id_to_old_idx = networks.get_id_to_idx().clone();
let calendars = model.calendars.clone();
let vjs = model.vehicle_journeys.clone();
let old_vj_idx_to_vj_id: HashMap<Idx<VehicleJourney>, String> = model
.vehicle_journeys
.get_id_to_idx()
.iter()
.map(|(id, &idx)| (idx, id.clone()))
.collect();
pub fn add<T: Into<String>, U: Into<String>>(
&mut self,
object_type: ObjectType,
property: T,
value: U,
) {
let props = self.filters.entry(object_type).or_insert_with(HashMap::new);
props
.entry(property.into())
.or_insert_with(HashSet::new)
.insert(value.into());
}
}

type FnFilter = Box<dyn Fn(&Model, &str) -> Result<IdxSet<VehicleJourney>> + Send + Sync>;
lazy_static! {
static ref PROPERTY_FILTERS: HashMap<ObjectType, HashMap<&'static str, FnFilter>> = {
let mut m: HashMap<ObjectType, HashMap<&'static str, FnFilter>> = HashMap::new();

// Network filters
let mut network_filters: HashMap<&'static str, FnFilter> = HashMap::new();
network_filters.insert(
"network_id",
Box::new(|model, network_id| {
model
.networks
.get_idx(&network_id)
.ok_or_else(|| format_err!("Network '{}' not found.", network_id))
.map(|network_idx| model.get_corresponding_from_idx(network_idx))
}),
);
m.insert(ObjectType::Network, network_filters);

// Line filters
let mut line_filters: HashMap<&'static str, FnFilter> = HashMap::new();
line_filters.insert("line_code",
Box::new(|model, line_code| {
Ok(model
.lines
.values()
.filter(|line| line.code.as_ref().map(|line_code| line_code.as_str()) == Some(line_code))
// Unwrap is safe because we're iterating on model.lines already
.map(|line| model.lines.get_idx(&line.id).unwrap())
.flat_map(|line_idx| model.get_corresponding_from_idx(line_idx))
.collect())
}),
);
m.insert(ObjectType::Line, line_filters);
m
};
}

fn filter_by_property(
model: &Model,
object_type: ObjectType,
property: &str,
value: &str,
) -> Result<IdxSet<VehicleJourney>> {
let filter_function = PROPERTY_FILTERS
.get(&object_type)
.ok_or_else(|| format_err!("Object of type '{:?}' are not yet supported", object_type))?
.get(property)
.ok_or_else(|| format_err!("Property '{}' not yet supported.", property))?;
filter_function(model, value)
}

let network_ids: HashSet<String> = network_ids
fn filter_from_idxset<T: Id<T>>(
collection: &mut CollectionWithId<T>,
idx_set: IdxSet<T>,
action: Action,
) {
let ids: Vec<String> = idx_set
.into_iter()
.map(|id| match networks.get(&id) {
Some(_) => Ok(id),
None => bail!("network {} not found.", id),
})
.collect::<Result<HashSet<String>>>()?;
.map(|idx| collection[idx].id().to_string())
.collect();
let id_refs: Vec<&str> = ids.iter().map(String::as_str).collect();
collection.retain(|object| match action {
Action::Extract => id_refs.contains(&object.id()),
Action::Remove => !id_refs.contains(&object.id()),
});
}

match action {
Action::Extract => networks.retain(|n| network_ids.contains(&n.id)),
Action::Remove => networks.retain(|n| !network_ids.contains(&n.id)),
fn updated_stop_time_attributes<T>(
vehicle_journeys: &CollectionWithId<VehicleJourney>,
attributes_map: &HashMap<(Idx<VehicleJourney>, u32), T>,
old_vj_idx_to_vj_id: &HashMap<Idx<VehicleJourney>, String>,
) -> HashMap<(Idx<VehicleJourney>, u32), T>
where
T: Clone,
{
let mut updated_attributes_map = HashMap::new();
for (&(old_vj_idx, sequence), attribute) in attributes_map {
if let Some(new_vj_idx) = old_vj_idx_to_vj_id
.get(&old_vj_idx)
.and_then(|vj_id| vehicle_journeys.get_idx(vj_id))
{
updated_attributes_map.insert((new_vj_idx, sequence), attribute.clone());
}
}
updated_attributes_map
}

let network_idx = networks.values().map(|n| n_id_to_old_idx[&n.id]).collect();
let calendars_used = model.get_corresponding(&network_idx);
let vjs_used = model.get_corresponding(&network_idx);
/// Extract or remove part of the dataset from property filters on an object (Network, Line, etc.)
pub fn filter(model: Model, filter: &Filter) -> Result<Model> {
let selected_vjs = filter
.filters
.iter()
.flat_map(|(object_type, property_values)| {
property_values
.iter()
.map(move |(property, values)| (object_type, property, values))
})
.flat_map(|(object_type, property, values)| {
values
.iter()
.map(move |value| (object_type, property, value))
})
.map(|(object_type, property, value)| {
filter_by_property(&model, *object_type, property.as_str(), value.as_str())
})
.try_fold::<_, _, Result<IdxSet<VehicleJourney>>>(
IdxSet::new(),
|mut vehicle_journeys_indexes, idx_set| {
vehicle_journeys_indexes.extend(idx_set?);
Ok(vehicle_journeys_indexes)
},
)?;

let mut collections = model.into_collections();

collections
.calendars
.retain(|c| calendars_used.contains(&calendars.get_idx(&c.id).unwrap()));

collections
let old_vj_idx_to_vj_id: HashMap<Idx<VehicleJourney>, String> = collections
.vehicle_journeys
.retain(|c| vjs_used.contains(&vjs.get_idx(&c.id).unwrap()));
.get_id_to_idx()
.iter()
.map(|(id, &idx)| (idx, id.clone()))
.collect();

filter_from_idxset(
&mut collections.vehicle_journeys,
selected_vjs,
filter.action,
);

collections.stop_time_ids = updated_stop_time_attributes(
&collections.vehicle_journeys,
Expand All @@ -103,8 +209,8 @@ pub fn filter(model: Model, action: Action, network_ids: Vec<String>) -> Result<
&old_vj_idx_to_vj_id,
);

if collections.calendars.is_empty() {
bail!("the data does not contain services anymore.")
if collections.vehicle_journeys.is_empty() {
bail!("the data does not contain vehicle journeys anymore.")
}

Model::new(collections)
Expand Down
82 changes: 56 additions & 26 deletions tests/filter_ntfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@ fn test_extract_network() {
test_in_tmp_dir(|path| {
let input_dir = "tests/fixtures/filter_ntfs/input";

let model = filter::filter(
transit_model::ntfs::read(input_dir).unwrap(),
filter::Action::Extract,
vec!["network1".into()],
)
.unwrap();
let mut filter = filter::Filter::new(filter::Action::Extract);
filter.add(filter::ObjectType::Network, "network_id", "network1");

let model = filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
transit_model::ntfs::write(&model, path, get_test_datetime()).unwrap();
compare_output_dir_with_expected(
&path,
Expand All @@ -41,37 +39,69 @@ fn test_remove_network() {
test_in_tmp_dir(|path| {
let input_dir = "tests/fixtures/filter_ntfs/input";

let model = filter::filter(
transit_model::ntfs::read(input_dir).unwrap(),
filter::Action::Remove,
vec!["network1".into()],
)
.unwrap();
let mut filter = filter::Filter::new(filter::Action::Remove);
filter.add(filter::ObjectType::Network, "network_id", "network1");

let model = filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
transit_model::ntfs::write(&model, path, get_test_datetime()).unwrap();
compare_output_dir_with_expected(&path, None, "./tests/fixtures/filter_ntfs/output_remove");
});
}

#[test]
#[should_panic(expected = "network unknown not found.")]
#[should_panic(expected = "Network \\'unknown\\' not found.")]
fn test_extract_with_unknown_network() {
let input_dir = "tests/fixtures/filter_ntfs/input";
filter::filter(
transit_model::ntfs::read(input_dir).unwrap(),
filter::Action::Extract,
vec!["unknown".into()],
)
.unwrap();
let mut filter = filter::Filter::new(filter::Action::Extract);
filter.add(filter::ObjectType::Network, "network_id", "unknown");

filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
}

#[test]
#[should_panic(expected = "the data does not contain services anymore.")]
#[should_panic(expected = "the data does not contain vehicle journeys anymore.")]
fn test_remove_all_networks() {
let input_dir = "tests/fixtures/filter_ntfs/input";
filter::filter(
transit_model::ntfs::read(input_dir).unwrap(),
filter::Action::Remove,
vec!["network1".into(), "network2".into(), "network3".into()],
)
.unwrap();
let mut filter = filter::Filter::new(filter::Action::Remove);
filter.add(filter::ObjectType::Network, "network_id", "network1");
filter.add(filter::ObjectType::Network, "network_id", "network2");
filter.add(filter::ObjectType::Network, "network_id", "network3");
filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
}

#[test]
fn test_remove_line_by_line_code() {
test_in_tmp_dir(|path| {
let input_dir = "tests/fixtures/filter_ntfs/input";

let mut filter = filter::Filter::new(filter::Action::Remove);
filter.add(filter::ObjectType::Line, "line_code", "route3");

let model = filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
transit_model::ntfs::write(&model, path, get_test_datetime()).unwrap();
compare_output_dir_with_expected(
&path,
None,
"./tests/fixtures/filter_ntfs/output_remove_line",
);
});
}

#[test]
fn test_extract_multiple_line_by_line_code() {
test_in_tmp_dir(|path| {
let input_dir = "tests/fixtures/filter_ntfs/input";

let mut filter = filter::Filter::new(filter::Action::Extract);
filter.add(filter::ObjectType::Line, "line_code", "route1");
filter.add(filter::ObjectType::Line, "line_code", "route3");

let model = filter::filter(transit_model::ntfs::read(input_dir).unwrap(), &filter).unwrap();
transit_model::ntfs::write(&model, path, get_test_datetime()).unwrap();
compare_output_dir_with_expected(
&path,
None,
"./tests/fixtures/filter_ntfs/output_extract_multiple_lines",
);
});
}
8 changes: 4 additions & 4 deletions tests/fixtures/filter_ntfs/input/lines.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
line_id,line_name,network_id,commercial_mode_id
line1,line1,network1,Bus
line2,line2,network2,Bus
line3,line3,network3,Bus
line_id,line_name,network_id,commercial_mode_id,line_code
line1,line1,network1,Bus,route1
line2,line2,network2,Bus,route2
line3,line3,network3,Bus,route3
2 changes: 1 addition & 1 deletion tests/fixtures/filter_ntfs/output_extract/lines.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
line_id,line_code,line_name,forward_line_name,backward_line_name,line_color,line_text_color,line_sort_order,network_id,commercial_mode_id,geometry_id,line_opening_time,line_closing_time
line1,,line1,,,,,,network1,Bus,,,
line1,route1,line1,,,,,,network1,Bus,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
service1,0,1,1,0,0,0,0,20190101,20190102
service2,0,0,0,1,1,0,0,20190103,20190104
service3,0,0,0,0,0,1,1,20190105,20190106
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
commercial_mode_id,commercial_mode_name
Bus,Bus
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
company_id,company_name,company_address,company_url,company_mail,company_phone
network1,network1,,,,
network3,network3,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
contributor_id,contributor_name,contributor_license,contributor_website
contributor1,Default contributor,Unknown license,
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dataset_id,contributor_id,dataset_start_date,dataset_end_date,dataset_type,dataset_extrapolation,dataset_desc,dataset_system
dataset1,contributor1,20190101,20190106,,0,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
feed_info_param,feed_info_value
feed_creation_date,20190403
feed_creation_time,17:19:00
feed_end_date,20190106
feed_start_date,20190101
ntfs_version,0.11.1
Loading

0 comments on commit 29156d3

Please sign in to comment.