From cd787a38dc921b59ec161bb24844689ee8237e98 Mon Sep 17 00:00:00 2001 From: Curtis Gagliardi Date: Mon, 24 Oct 2016 21:02:40 -0700 Subject: [PATCH] Fix python counting by handling case start and end are same characters. We were assuming if you saw the start of comment marker, you could safely assume you were in a comment. However, with python, we treat ''' as both start and end character for comments, so we need some additional checks. --- src/lib.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19b0e2f..3ae1776 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -314,8 +314,6 @@ pub fn lang_from_ext(filepath: &str) -> Lang { "in" => Autoconf, "clj" => Clojure, - // "bash" | "sh" => Bash, - // "cfm" => ColdFusion, "f" | "for" | "ftn" | "f77" | "pfo" => FortranLegacy, "f03" | "f08" | "f90" | "f95" => FortranModern, "makefile" | "mk" => Makefile, @@ -412,7 +410,7 @@ pub fn counter_config_for_lang<'a>(lang: &Lang) -> LineConfig<'a> { // which one is right? = or =pod? // Perl => SM("#""=", "=cut"), Perl => SM("#", "=pod", "=cut"), - Python => SM("#", "'\''", "'\''"), + Python => SM("#", "'''", "'''"), Ruby => SM("#", "=begin", "=end"), Sql => SM("--", "/*", "*/"), @@ -445,8 +443,11 @@ pub fn counter_config_for_lang<'a>(lang: &Lang) -> LineConfig<'a> { // TODO(cgag): not 100% sure that yacc belongs here. C | CCppHeader | Rust | Yacc | ActionScript | ColdFusionScript | Css | Cpp | CSharp | Dart | DeviceTree | Go | Jai | Java | JavaScript | Jsx | Kotlin | Less | LinkerScript | - ObjectiveC | ObjectiveCpp | Qcl | Sass | Scala | Swift | TypeScript | UnrealScript | - Unrecognized => c_style, + ObjectiveC | ObjectiveCpp | Qcl | Sass | Scala | Swift | TypeScript | UnrealScript => { + c_style + } + + Unrecognized => unreachable!(), }; match ctuple { @@ -611,13 +612,15 @@ pub fn count_multi(filepath: &str, multi_start: &str, multi_end: &str) -> Count } } - if pos + start_len <= trimmed.len() && - &trimmed[pos..pos + start_len] == multiline_start { + if !in_comment && pos + start_len <= trimmed.len() && + &trimmed[pos..(pos + start_len)] == multi_start { pos += start_len; in_comment = true; - } else if pos + end_len <= trimmed.len() && &trimmed[pos..pos + end_len] == multiline_end { + } else if in_comment && pos + end_len <= trimmed.len() && + &trimmed[pos..(pos + end_len)] == multi_end { pos += end_len; in_comment = false; + // TODO(cgag): should we bother handling whitespace here? } else if !in_comment { found_code = true; pos += 1; @@ -666,8 +669,6 @@ pub fn count_everything<'a>(filepath: &str, total_count } - - pub fn count_single_multi(filepath: &str, single_start: &str, multi_start: &str, @@ -729,12 +730,15 @@ pub fn count_single_multi(filepath: &str, } } - if pos + start_len <= trimmed_len && &trimmed[pos..(pos + start_len)] == multi_start { + if !in_comment && pos + start_len <= trimmed_len && + &trimmed[pos..(pos + start_len)] == multi_start { pos += start_len; in_comment = true; - } else if pos + end_len <= trimmed_len && &trimmed[pos..(pos + end_len)] == multi_end { + } else if in_comment && pos + end_len <= trimmed_len && + &trimmed[pos..(pos + end_len)] == multi_end { pos += end_len; in_comment = false; + // TODO(cgag): should we bother handling whitespace here? } else if !in_comment { found_code = true; pos += 1;