-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f936daa
commit 8df53d8
Showing
3 changed files
with
105 additions
and
198 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
name = "Encodings" | ||
uuid = "8275c4fe-57c3-4fbf-b39c-271e6148849a" | ||
authors = ["pedromxavier <[email protected]>"] | ||
version = "0.1.0" | ||
version = "0.1.1" | ||
|
||
[compat] | ||
julia = "1.6" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,199 +1,106 @@ | ||
# ISO_LATIN_1 tables from: | ||
# https://www.ime.usp.br/~pf/algoritmos/apend/iso-latin-1.html | ||
const ENCODE_ISO_LATIN_1 = Dict{Char, UInt8}( | ||
' ' => 0xA0, # non-breaking space | ||
'¡' => 0xA1, # exclamação invertida | ||
'¢' => 0xA2, # | ||
'£' => 0xA3, # libra | ||
'¤' => 0xA4, # moeda genérica | ||
'¥' => 0xA5, # yen | ||
'¦' => 0xA6, # | ||
'§' => 0xA7, # parágrafo | ||
'¨' => 0xA8, # trema | ||
'©' => 0xA9, # copyright | ||
'ª' => 0xAA, # ordinal feminino | ||
'«' => 0xAB, # abre-aspas europeu | ||
'¬' => 0xAC, # negação lógica | ||
'' => 0xAD, # soft hyphen | ||
'®' => 0xAE, # registrado | ||
'¯' => 0xAF, # overscore | ||
'°' => 0xB0, # grau | ||
'±' => 0xB1, # mais ou menos | ||
'²' => 0xB2, # ao quadrado | ||
'³' => 0xB3, # ao cubo | ||
'´' => 0xB4, # acento agudo | ||
'µ' => 0xB5, # micro, mü | ||
'¶' => 0xB6, # parágrafo | ||
'·' => 0xB7, # middle dot | ||
'¸' => 0xB8, # cedilha | ||
'¹' => 0xB9, # elevado a 1 | ||
'º' => 0xBA, # ordinal masculino | ||
'»' => 0xBB, # fecha-aspas europeu | ||
'¼' => 0xBC, # | ||
'½' => 0xBD, # | ||
'¾' => 0xBE, # | ||
'¿' => 0xBF, # interrogação invertida | ||
'À' => 0xC0, # | ||
'Á' => 0xC1, # | ||
'Â' => 0xC2, # | ||
'Ã' => 0xC3, # | ||
'Ä' => 0xC4, # | ||
'Å' => 0xC5, # | ||
'Æ' => 0xC6, # ligadura AE | ||
'Ç' => 0xC7, # | ||
'È' => 0xC8, # | ||
'É' => 0xC9, # | ||
'Ê' => 0xCA, # | ||
'Ë' => 0xCB, # | ||
'Ì' => 0xCC, # | ||
'Í' => 0xCD, # | ||
'Î' => 0xCE, # | ||
'Ï' => 0xCF, # | ||
'Ð' => 0xD0, # Eth (islândico) | ||
'Ñ' => 0xD1, # | ||
'Ò' => 0xD2, # | ||
'Ó' => 0xD3, # | ||
'Ô' => 0xD4, # | ||
'Õ' => 0xD5, # | ||
'Ö' => 0xD6, # | ||
'×' => 0xD7, # multiplicação | ||
'Ø' => 0xD8, # | ||
'Ù' => 0xD9, # | ||
'Ú' => 0xDA, # | ||
'Û' => 0xDB, # | ||
'Ü' => 0xDC, # | ||
'Ý' => 0xDD, # | ||
'Þ' => 0xDE, # Thorn (islândico) | ||
'ß' => 0xDF, # ligadura sz (alemão) | ||
'à' => 0xE0, # | ||
'á' => 0xE1, # | ||
'â' => 0xE2, # | ||
'ã' => 0xE3, # | ||
'ä' => 0xE4, # | ||
'å' => 0xE5, # | ||
'æ' => 0xE6, # ligadura ae | ||
'ç' => 0xE7, # | ||
'è' => 0xE8, # | ||
'é' => 0xE0, # | ||
'ê' => 0xEA, # | ||
'ë' => 0xEB, # | ||
'ì' => 0xEC, # | ||
'í' => 0xED, # | ||
'î' => 0xEE, # | ||
'ï' => 0xEF, # | ||
'ð' => 0xF0, # eth (islândico) | ||
'ñ' => 0xF1, # | ||
'ò' => 0xF2, # | ||
'ó' => 0xF3, # | ||
'ô' => 0xF4, # | ||
'õ' => 0xF5, # | ||
'ö' => 0xF6, # | ||
'÷' => 0xF7, # divisão | ||
'ø' => 0xF8, # | ||
'ù' => 0xF9, # | ||
'ú' => 0xFA, # | ||
'û' => 0xFB, # | ||
'ü' => 0xFC, # | ||
'ý' => 0xFD, # | ||
'þ' => 0xFE, # thorn (islândico) | ||
'ÿ' => 0xFF, # | ||
) | ||
# https://cs.stanford.edu/people/miles/iso8859.html | ||
|
||
const DECODE_ISO_LATIN_1 = Dict{UInt8, Char}( | ||
0xA0 => ' ', # non-breaking space | ||
0xA1 => '¡', # exclamação invertida | ||
0xA2 => '¢', # | ||
0xA3 => '£', # libra | ||
0xA4 => '¤', # moeda genérica | ||
0xA5 => '¥', # yen | ||
0xA6 => '¦', # | ||
0xA7 => '§', # parágrafo | ||
0xA8 => '¨', # trema | ||
0xA9 => '©', # copyright | ||
0xAA => 'ª', # ordinal feminino | ||
0xAB => '«', # abre-aspas europeu | ||
0xAC => '¬', # negação lógica | ||
0xAD => '' , # soft hyphen | ||
0xAE => '®', # registrado | ||
0xAF => '¯', # overscore | ||
0xB0 => '°', # grau | ||
0xB1 => '±', # mais ou menos | ||
0xB2 => '²', # ao quadrado | ||
0xB3 => '³', # ao cubo | ||
0xB4 => '´', # acento agudo | ||
0xB5 => 'µ', # micro, mü | ||
0xB6 => '¶', # parágrafo | ||
0xB7 => '·', # middle dot | ||
0xB8 => '¸', # cedilha | ||
0xB9 => '¹', # elevado a 1 | ||
0xBA => 'º', # ordinal masculino | ||
0xBB => '»', # fecha-aspas europeu | ||
0xBC => '¼', # | ||
0xBD => '½', # | ||
0xBE => '¾', # | ||
0xBF => '¿', # interrogação invertida | ||
0xC0 => 'À', # | ||
0xC1 => 'Á', # | ||
0xC2 => 'Â', # | ||
0xC3 => 'Ã', # | ||
0xC4 => 'Ä', # | ||
0xC5 => 'Å', # | ||
0xC6 => 'Æ', # ligadura AE | ||
0xC7 => 'Ç', # | ||
0xC8 => 'È', # | ||
0xC9 => 'É', # | ||
0xCA => 'Ê', # | ||
0xCB => 'Ë', # | ||
0xCC => 'Ì', # | ||
0xCD => 'Í', # | ||
0xCE => 'Î', # | ||
0xCF => 'Ï', # | ||
0xD0 => 'Ð', # Eth (islândico) | ||
0xD1 => 'Ñ', # | ||
0xD2 => 'Ò', # | ||
0xD3 => 'Ó', # | ||
0xD4 => 'Ô', # | ||
0xD5 => 'Õ', # | ||
0xD6 => 'Ö', # | ||
0xD7 => '×', # multiplicação | ||
0xD8 => 'Ø', # | ||
0xD9 => 'Ù', # | ||
0xDA => 'Ú', # | ||
0xDB => 'Û', # | ||
0xDC => 'Ü', # | ||
0xDD => 'Ý', # | ||
0xDE => 'Þ', # Thorn (islândico) | ||
0xDF => 'ß', # ligadura sz (alemão) | ||
0xE0 => 'à', # | ||
0xE1 => 'á', # | ||
0xE2 => 'â', # | ||
0xE3 => 'ã', # | ||
0xE4 => 'ä', # | ||
0xE5 => 'å', # | ||
0xE6 => 'æ', # ligadura ae | ||
0xE7 => 'ç', # | ||
0xE8 => 'è', # | ||
0xE0 => 'é', # | ||
0xEA => 'ê', # | ||
0xEB => 'ë', # | ||
0xEC => 'ì', # | ||
0xED => 'í', # | ||
0xEE => 'î', # | ||
0xEF => 'ï', # | ||
0xF0 => 'ð', # eth (islândico) | ||
0xF1 => 'ñ', # | ||
0xF2 => 'ò', # | ||
0xF3 => 'ó', # | ||
0xF4 => 'ô', # | ||
0xF5 => 'õ', # | ||
0xF6 => 'ö', # | ||
0xF7 => '÷', # divisão | ||
0xF8 => 'ø', # | ||
0xF9 => 'ù', # | ||
0xFA => 'ú', # | ||
0xFB => 'û', # | ||
0xFC => 'ü', # | ||
0xFD => 'ý', # | ||
0xFE => 'þ', # thorn (islândico) | ||
0xFF => 'ÿ', # | ||
) | ||
# Special Characters | ||
0xA0 => ' ', # reaking space | ||
0xA1 => '¡', # inverted exclamation mark | ||
0xA2 => '¢', # cent sign | ||
0xA3 => '£', # pound sterling sign | ||
0xA4 => '¤', # general currency sign | ||
0xA5 => '¥', # yen sign | ||
0xA6 => '¦', # ¦ broken vertical bar | ||
0xA7 => '§', # section sign | ||
0xA8 => '¨', # ¨ spacing dieresis or umlaut | ||
0xA9 => '©', # copyright sign | ||
0xAA => 'ª', # feminine ordinal sign | ||
0xAB => '«', # left double angle quote or guillemet | ||
0xAC => '¬', # logical not sign | ||
0xAD => '', # soft hyphen | ||
0xAE => '®', # registered trademark sign | ||
0xAF => '¯', # &hibar; spacing macron long accent | ||
0xB0 => '°', # degree sign | ||
0xB1 => '±', # plus-or-minus sign | ||
0xB2 => '²', # superscript 2 | ||
0xB3 => '³', # superscript 3 | ||
0xB4 => '´', # spacing accute accent | ||
0xB5 => 'µ', # micro sign, mu | ||
0xB6 => '¶', # paragraph sign, pilcrow sign | ||
0xB7 => '·', # middle dot, centered dot | ||
0xB8 => '¸', # spacing cedilla | ||
0xB9 => '¹', # superscript 1 | ||
0xBA => 'º', # masculine ordinal indicator | ||
0xBB => '»', # right double angle quote or guillemet | ||
0xBC => '¼', # fraction 1/4 | ||
0xBD => '½', # ½ fraction 1/2 | ||
0xBE => '¾', # fraction 3/4 | ||
0xBF => '¿', # inverted question mark | ||
# Upper Case Latin-1 Letters | ||
0xC0 => 'À', # capital A grave | ||
0xC1 => 'Á', # capital A acute | ||
0xC2 => 'Â', # capital A circumflex | ||
0xC3 => 'Ã', # capital A tilde | ||
0xC4 => 'Ä', # capital A dieresis or umlaut | ||
0xC5 => 'Å', # capital A ring | ||
0xC6 => 'Æ', # capital AE ligature | ||
0xC7 => 'Ç', # capital C cedilla | ||
0xC8 => 'È', # capital E grave | ||
0xC9 => 'É', # capital E acute | ||
0xCA => 'Ê', # capital E circumflex | ||
0xCB => 'Ë', # capital E dieresis or umlaut | ||
0xCC => 'Ì', # capital I grave | ||
0xCD => 'Í', # capital I acute | ||
0xCE => 'Î', # capital I circumflex | ||
0xCF => 'Ï', # capital I dieresis or umlaut | ||
0xD0 => 'Ð', # capital ETH | ||
0xD1 => 'Ñ', # capital N tilde | ||
0xD2 => 'Ò', # capital O grave | ||
0xD3 => 'Ó', # capital O acute | ||
0xD4 => 'Ô', # capital O circumflex | ||
0xD5 => 'Õ', # capital O tilde | ||
0xD6 => 'Ö', # capital O dieresis or umlaut | ||
0xD7 => '×', # multiplication sign | ||
0xD8 => 'Ø', # capital O slash | ||
0xD9 => 'Ù', # capital U grave | ||
0xDA => 'Ú', # capital U acute | ||
0xDB => 'Û', # capital U circumflex | ||
0xDC => 'Ü', # capital U dieresis or umlaut | ||
0xDD => 'Ý', # capital Y acute | ||
0xDE => 'Þ', # capital THORN | ||
0xDF => 'ß', # small sharp s, sz ligature | ||
# Lower Case Latin-1 Letters | ||
0xE0 => 'à', # small a grave | ||
0xE1 => 'á', # small a acute | ||
0xE2 => 'â', # small a circumflex | ||
0xE3 => 'ã', # small a tilde | ||
0xE4 => 'ä', # small a dieresis or umlaut | ||
0xE5 => 'å', # small a ring | ||
0xE6 => 'æ', # small ae ligature | ||
0xE7 => 'ç', # small c cedilla | ||
0xE8 => 'è', # small e grave | ||
0xE9 => 'é', # small e acute | ||
0xEA => 'ê', # small e circumflex | ||
0xEB => 'ë', # small e dieresis or umlaut | ||
0xEC => 'ì', # small i grave | ||
0xED => 'í', # small i acute | ||
0xEE => 'î', # small i circumflex | ||
0xEF => 'ï', # small i dieresis or umlaut | ||
0xF0 => 'ð', # small eth | ||
0xF1 => 'ñ', # small n tilde | ||
0xF2 => 'ò', # small o grave | ||
0xF3 => 'ó', # small o acute | ||
0xF4 => 'ô', # small o circumflex | ||
0xF5 => 'õ', # small o tilde | ||
0xF6 => 'ö', # small o dieresis or umlaut | ||
0xF7 => '÷', # division sign | ||
0xF8 => 'ø', # small o slash | ||
0xF9 => 'ù', # small u grave | ||
0xFA => 'ú', # small u acute | ||
0xFB => 'û', # small u circumflex | ||
0xFC => 'ü', # small u dieresis or umlaut | ||
0xFD => 'ý', # small y acute | ||
0xFE => 'þ', # small thorn | ||
0xFF => 'ÿ', # small y dieresis or umlaut | ||
) | ||
|
||
const ENCODE_ISO_LATIN_1 = Dict{Char, UInt8}(char => code for (code, char) in DECODE_ISO_LATIN_1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8df53d8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@JuliaRegistrator register
8df53d8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registration pull request created: JuliaRegistries/General/80988
After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.
This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via: