Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove per capita stop words #4456

Merged
merged 11 commits into from
Jul 12, 2024
Merged
2 changes: 1 addition & 1 deletion deploy/nl/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ indexes:
base_uae_mem:
store_type: MEMORY
source_path: ../../tools/nl/embeddings/input/base
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_11_08_35_57/embeddings.csv
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_12_09_03_25/embeddings.csv
model: uae-large-v1-model
healthcheck_query: "Life expectancy"
base_mistral_mem:
Expand Down
2 changes: 1 addition & 1 deletion run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function run_lint_fix {
pip3 install isort -q
fi
yapf -r -i -p --style='{based_on_style: google, indent_width: 2}' server/ nl_server/ shared/ tools/ -e=*pb2.py -e=**/.env/**
isort server/ nl_server/ shared/ tools/ --skip-glob *pb2.py --skip-glob **/.env/** --profile google
isort server/ nl_server/ shared/ tools/ --skip-glob=*pb2.py --skip-glob=**/.env/** --profile=google
deactivate
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
],
"title": "Hate Crime Incidents (${date})",
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
"title": "Murder and Non Negligent Manslaughter Cases"
},
{
"columns": [
Expand All @@ -104,17 +104,16 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
],
"title": "Hate Crime Cases by Type (${date})",
"title": "Hate Crime Incidents (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Cases by Type"
"title": "Hate Crime Incidents"
},
{
"columns": [
Expand All @@ -131,16 +130,17 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
],
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
"title": "Hate Crime Cases by Type (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Murder and Non Negligent Manslaughter Cases"
"title": "Hate Crime Cases by Type"
},
{
"columns": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,76 +71,76 @@
"tiles": [
{
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
],
"title": "Hate Crime Incidents in California",
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "LINE"
}
]
},
{
"tiles": [
{
"description": "Hate Crime Incidents in California",
"description": "Murder and Non Negligent Manslaughter Cases in California",
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
],
"title": "Hate Crime Incidents in California",
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "HIGHLIGHT"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
"title": "Murder and Non Negligent Manslaughter Cases"
},
{
"columns": [
{
"tiles": [
{
"statVarKey": [
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
"Count_CriminalIncidents_IsHateCrime"
],
"title": "Hate Crime Cases by Type in California",
"title": "Hate Crime Incidents in California",
"type": "LINE"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Cases by Type"
},
{
"columns": [
},
{
"tiles": [
{
"description": "Hate Crime Incidents in California",
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
"Count_CriminalIncidents_IsHateCrime"
],
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "LINE"
"title": "Hate Crime Incidents in California",
"type": "HIGHLIGHT"
}
]
},
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
},
{
"columns": [
{
"tiles": [
{
"description": "Murder and Non Negligent Manslaughter Cases in California",
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
],
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "HIGHLIGHT"
"title": "Hate Crime Cases by Type in California",
"type": "LINE"
}
]
}
],
"denom": "Count_Person",
"title": "Murder and Non Negligent Manslaughter Cases"
"title": "Hate Crime Cases by Type"
},
{
"columns": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"Parts": [
{
"CosineScore": [
0.9257857799530029
0.9257858991622925
],
"QueryPart": "male population",
"SV": [
Expand All @@ -63,8 +63,8 @@
},
{
"CosineScore": [
0.9296980500221252,
0.8847433924674988
0.9296978712081909,
0.884743332862854
],
"QueryPart": "female population",
"SV": [
Expand All @@ -80,7 +80,7 @@
"Parts": [
{
"CosineScore": [
0.9117770791053772
0.911777138710022
],
"QueryPart": "male population female",
"SV": [
Expand All @@ -89,8 +89,8 @@
},
{
"CosineScore": [
0.8982378840446472,
0.8723467588424683
0.8982377648353577,
0.8723466396331787
],
"QueryPart": "population",
"SV": [
Expand All @@ -106,9 +106,9 @@
"Parts": [
{
"CosineScore": [
0.8034241199493408,
0.8017654418945312,
0.775155782699585,
0.8034241795539856,
0.8017653822898865,
0.7751558423042297,
0.7698014974594116,
0.7586972713470459
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@
"Parts": [
{
"CosineScore": [
0.7768431901931763,
0.7495046257972717,
0.7342654466629028,
0.7325114011764526
0.7768430709838867,
0.749504566192627,
0.7342653274536133,
0.7325113415718079
],
"QueryPart": "factors like obesity blood pressure asthma",
"SV": [
Expand All @@ -128,7 +128,7 @@
},
{
"CosineScore": [
0.9056920409202576
0.9056921005249023
],
"QueryPart": "impacted climate change",
"SV": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
"contained_in_place_type": "City",
"had_default_type": false,
"type": 4
},
{
"type": 14
}
],
"client": "test_detect",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
Copy link
Contributor Author

@chejennifer chejennifer Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we lose these startWithDenom because things like "unemployment rate", "mortality rate", etc no longer get classified as PerCapita because of the change to PerCapita stop words where "rate" -> regex for rate when it's not "unemployment rate", "mortality rate", etc

I wonder if we need some special treatment here where we still classify these things as PerCapita, but don't remove those specific stop words

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This then feels like an improvement right?

The original query: "poverty vs. unemployment rate" isn't a per-capita query, so it makes sense to not start with per-capita enabled?

"title": "Population Below Poverty Line"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Count of Mortality Event: 1 Years or Less in Counties of Massachusetts"
},
{
Expand All @@ -83,7 +82,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Causes of Infant Mortality"
},
{
Expand Down Expand Up @@ -116,7 +114,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Mortality Events (1 Years or Less): Symptoms, Signs and Abnormal Clinical and Laboratory Findings, Not Elsewhere Classified) in Counties of Massachusetts"
},
{
Expand Down Expand Up @@ -149,7 +146,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Mortality Events (1 Years or Less): Certain Conditions Originating in the Perinatal Period) in Counties of Massachusetts"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate vs. Population Below poverty line"
},
{
Expand Down Expand Up @@ -56,7 +55,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate in Administrative Area 1 Places of India"
},
{
Expand Down Expand Up @@ -89,7 +87,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population Below Poverty Line in Administrative Area 1 Places of India"
},
{
Expand All @@ -111,7 +108,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Illiterate vs. Population Below poverty line (Per Capita)"
},
{
Expand Down Expand Up @@ -195,7 +191,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate vs. Population Below poverty line (Per Capita)"
},
{
Expand All @@ -217,7 +212,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Illiterate vs. Population Below poverty line"
}
],
Expand Down
Loading
Loading