diff --git a/generate_countries_iso_vs_hardcoded_planned_migration_v6.py b/generate_countries_iso_vs_hardcoded_planned_migration_v6.py new file mode 100644 index 0000000..c725cd1 --- /dev/null +++ b/generate_countries_iso_vs_hardcoded_planned_migration_v6.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +import csv +import re + +import pycountry + +COUNTRY_LIST = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire, Sint Eustatius and Saba', 'Bosnia and Herzegovina', 'Botswana', 'Bouvet Island', 'Brazil', 'British Indian Ocean Territory', 'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Christmas Island', 'Cocos', 'Colombia', 'Comoros', 'Congo', 'Congo', 'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czechia', "Côte d'Ivoire", 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'French Southern Territories', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guam', 'Guatemala', 'Guernsey', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Heard Island and McDonald Islands', 'Holy See', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Korea', 'Korea', 'Kuwait', 'Kyrgyzstan', "Lao People's Democratic Republic", 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macao', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius', 'Mayotte', 'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'Norfolk Island', 'Northern Mariana Islands', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Palestine, State of', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Pitcairn', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', 'Republic of North Macedonia', 'Romania', 'Russian Federation', 'Rwanda', 'Réunion', 'Saint Barthélemy', 'Saint Helena, Ascension and Tristan da Cunha', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Martin', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Sint Maarten', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Georgia and the South Sandwich Islands', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Svalbard and Jan Mayen', 'Sweden', 'Switzerland', 'Syrian Arab Republic', 'Taiwan', 'Tajikistan', 'Tanzania, United Republic of', 'Thailand', 'Timor-Leste', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom of Great Britain and Northern Ireland', 'United States Minor Outlying Islands', 'United States of America', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Viet Nam', 'Virgin Islands', 'Virgin Islands', 'Wallis and Futuna', 'Western Sahara', 'Yemen', 'Zambia', 'Zimbabwe', 'Åland Islands'] + +def is_alpha2(code: str) -> bool: + return bool(re.fullmatch(r"[A-Z]{2}", code)) + +def get_country_iso_code(country_name: str) -> str | None: + """Return the 2-letter ISO representation for a country (backend logic).""" + try: + country = pycountry.countries.get(name=country_name) + if country: + return country.alpha_2 + country = pycountry.countries.get(official_name=country_name) + if country: + return country.alpha_2 + country = pycountry.countries.search_fuzzy(country_name) + if country: + return country[0].alpha_2 + except Exception: + return None + return None + +def planned_migration_code(label: str) -> str | None: + if label == "Korea": + return "KR" + if label == "Virgin Islands": + return "VI" + if label == "Sint Maarten": + return "SX" + if label == "Saint Martin": + return "MF" + return get_country_iso_code(label) + +def main(): + # Column D: accumulate multiple distinct hardcoded names per code using backend logic + backend_code_to_labels = {} + for label in COUNTRY_LIST: + code = get_country_iso_code(label) + if not code or not is_alpha2(code): + continue + existing = backend_code_to_labels.get(code) + if not existing: + backend_code_to_labels[code] = label + else: + parts = [p.strip() for p in existing.split(";")] + if label not in parts: + parts.append(label) + backend_code_to_labels[code] = "; ".join(parts) + + planned_code_to_label = {} + planned_unmatched = [] + for label in COUNTRY_LIST: + code = planned_migration_code(label) + if not code or not is_alpha2(code): + planned_unmatched.append(label) + continue + if code not in planned_code_to_label: + planned_code_to_label[code] = label + + current_set = set(COUNTRY_LIST) + + rows = [] + for c in pycountry.countries: + code = getattr(c, "alpha_2", None) + if not code or not is_alpha2(code): + continue + + iso_canonical = c.name + planned = planned_code_to_label.get(code, "") + backend_maps_to = backend_code_to_labels.get(code, "") + + hardcoded_list_contains_iso = "Y" if iso_canonical in current_set else "N" + hard_coded_name_maps_to_code = "Y" if backend_maps_to else "N" + + # New check: does canonical ISO name map back to the same code via backend logic? + iso_maps_to = get_country_iso_code(iso_canonical) + iso_canonical_maps_to_same_code = "Y" if iso_maps_to == code else "N" + + rows.append(( + code, + planned, + iso_canonical, + backend_maps_to, + hardcoded_list_contains_iso, + hard_coded_name_maps_to_code, + iso_canonical_maps_to_same_code, + )) + + seen = set() + for label in planned_unmatched: + if label in seen: + continue + seen.add(label) + rows.append(("", label, "", "", "", "", "")) + + rows.sort(key=lambda r: (r[0] == "", r[0])) + + out_path = "countries_iso_vs_hardcoded_planned_migration_v5_utf8_bom.csv" + with open(out_path, "w", newline="", encoding="utf-8-sig") as f: + w = csv.writer(f) + w.writerow([ + "alpha2", + "planned_migration", + "iso_short_name_canonical_en", + "how_backend_logic_maps_hardcoded_names_to_country_codes", + "hardcoded_list_contains_iso_canonical", + "hard_coded_name_maps_to_code", + "iso_canonical_maps_to_same_code_via_backend_logic", + ]) + w.writerows(rows) + + print(f"Wrote {len(rows)} rows to {out_path}") + +if __name__ == "__main__": + main()