Skip to content

Commit

Permalink
remove per capita stop words (#4456)
Browse files Browse the repository at this point in the history
same change as #4415 but
rebased off a clean master

svindex diff:
https://storage.mtls.cloud.google.com/datcom-embedding-diffs/chejennifer_base_uae_mem_2024_07_09_21_55_14.html

base does not remove per capita stop words
test removes per capita stop words
  • Loading branch information
chejennifer authored Jul 12, 2024
1 parent 10a081d commit 95e7096
Show file tree
Hide file tree
Showing 22 changed files with 124 additions and 134 deletions.
2 changes: 1 addition & 1 deletion deploy/nl/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ indexes:
base_uae_mem:
store_type: MEMORY
source_path: ../../tools/nl/embeddings/input/base
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_11_08_35_57/embeddings.csv
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_12_09_03_25/embeddings.csv
model: uae-large-v1-model
healthcheck_query: "Life expectancy"
base_mistral_mem:
Expand Down
2 changes: 1 addition & 1 deletion run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function run_lint_fix {
pip3 install isort -q
fi
yapf -r -i -p --style='{based_on_style: google, indent_width: 2}' server/ nl_server/ shared/ tools/ -e=*pb2.py -e=**/.env/**
isort server/ nl_server/ shared/ tools/ --skip-glob *pb2.py --skip-glob **/.env/** --profile google
isort server/ nl_server/ shared/ tools/ --skip-glob=*pb2.py --skip-glob=**/.env/** --profile=google
deactivate
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
],
"title": "Hate Crime Incidents (${date})",
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
"title": "Murder and Non Negligent Manslaughter Cases"
},
{
"columns": [
Expand All @@ -104,17 +104,16 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
],
"title": "Hate Crime Cases by Type (${date})",
"title": "Hate Crime Incidents (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Cases by Type"
"title": "Hate Crime Incidents"
},
{
"columns": [
Expand All @@ -131,16 +130,17 @@
"geoId/12"
],
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
],
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
"title": "Hate Crime Cases by Type (${date})",
"type": "BAR"
}
]
}
],
"denom": "Count_Person",
"title": "Murder and Non Negligent Manslaughter Cases"
"title": "Hate Crime Cases by Type"
},
{
"columns": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,76 +71,76 @@
"tiles": [
{
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
],
"title": "Hate Crime Incidents in California",
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "LINE"
}
]
},
{
"tiles": [
{
"description": "Hate Crime Incidents in California",
"description": "Murder and Non Negligent Manslaughter Cases in California",
"statVarKey": [
"Count_CriminalIncidents_IsHateCrime"
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
],
"title": "Hate Crime Incidents in California",
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "HIGHLIGHT"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
"title": "Murder and Non Negligent Manslaughter Cases"
},
{
"columns": [
{
"tiles": [
{
"statVarKey": [
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
"Count_CriminalIncidents_IsHateCrime"
],
"title": "Hate Crime Cases by Type in California",
"title": "Hate Crime Incidents in California",
"type": "LINE"
}
]
}
],
"denom": "Count_Person",
"title": "Hate Crime Cases by Type"
},
{
"columns": [
},
{
"tiles": [
{
"description": "Hate Crime Incidents in California",
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
"Count_CriminalIncidents_IsHateCrime"
],
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "LINE"
"title": "Hate Crime Incidents in California",
"type": "HIGHLIGHT"
}
]
},
}
],
"denom": "Count_Person",
"title": "Hate Crime Incidents"
},
{
"columns": [
{
"tiles": [
{
"description": "Murder and Non Negligent Manslaughter Cases in California",
"statVarKey": [
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
],
"title": "Murder and Non Negligent Manslaughter Cases in California",
"type": "HIGHLIGHT"
"title": "Hate Crime Cases by Type in California",
"type": "LINE"
}
]
}
],
"denom": "Count_Person",
"title": "Murder and Non Negligent Manslaughter Cases"
"title": "Hate Crime Cases by Type"
},
{
"columns": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"Parts": [
{
"CosineScore": [
0.9257857799530029
0.9257858991622925
],
"QueryPart": "male population",
"SV": [
Expand All @@ -63,8 +63,8 @@
},
{
"CosineScore": [
0.9296980500221252,
0.8847433924674988
0.9296978712081909,
0.884743332862854
],
"QueryPart": "female population",
"SV": [
Expand All @@ -80,7 +80,7 @@
"Parts": [
{
"CosineScore": [
0.9117770791053772
0.911777138710022
],
"QueryPart": "male population female",
"SV": [
Expand All @@ -89,8 +89,8 @@
},
{
"CosineScore": [
0.8982378840446472,
0.8723467588424683
0.8982377648353577,
0.8723466396331787
],
"QueryPart": "population",
"SV": [
Expand All @@ -106,9 +106,9 @@
"Parts": [
{
"CosineScore": [
0.8034241199493408,
0.8017654418945312,
0.775155782699585,
0.8034241795539856,
0.8017653822898865,
0.7751558423042297,
0.7698014974594116,
0.7586972713470459
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@
"Parts": [
{
"CosineScore": [
0.7768431901931763,
0.7495046257972717,
0.7342654466629028,
0.7325114011764526
0.7768430709838867,
0.749504566192627,
0.7342653274536133,
0.7325113415718079
],
"QueryPart": "factors like obesity blood pressure asthma",
"SV": [
Expand All @@ -128,7 +128,7 @@
},
{
"CosineScore": [
0.9056920409202576
0.9056921005249023
],
"QueryPart": "impacted climate change",
"SV": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
"contained_in_place_type": "City",
"had_default_type": false,
"type": 4
},
{
"type": 14
}
],
"client": "test_detect",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population Below Poverty Line"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Count of Mortality Event: 1 Years or Less in Counties of Massachusetts"
},
{
Expand All @@ -83,7 +82,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Causes of Infant Mortality"
},
{
Expand Down Expand Up @@ -116,7 +114,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Mortality Events (1 Years or Less): Symptoms, Signs and Abnormal Clinical and Laboratory Findings, Not Elsewhere Classified) in Counties of Massachusetts"
},
{
Expand Down Expand Up @@ -149,7 +146,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Mortality Events (1 Years or Less): Certain Conditions Originating in the Perinatal Period) in Counties of Massachusetts"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate vs. Population Below poverty line"
},
{
Expand Down Expand Up @@ -56,7 +55,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate in Administrative Area 1 Places of India"
},
{
Expand Down Expand Up @@ -89,7 +87,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population Below Poverty Line in Administrative Area 1 Places of India"
},
{
Expand All @@ -111,7 +108,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Illiterate vs. Population Below poverty line (Per Capita)"
},
{
Expand Down Expand Up @@ -195,7 +191,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Literate vs. Population Below poverty line (Per Capita)"
},
{
Expand All @@ -217,7 +212,6 @@
}
],
"denom": "Count_Person",
"startWithDenom": true,
"title": "Population: Illiterate vs. Population Below poverty line"
}
],
Expand Down
Loading

0 comments on commit 95e7096

Please sign in to comment.