#include "healthyAddress.h"

// avoids branching
const unsigned int ALPHABET_ENC[256] =
  {26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
   26, 26};

static unsigned int enc_alphabet(unsigned char x) {
#if 'A' == 65 && ALPHABET_SIZE == 27
  unsigned int y = x;
  return ALPHABET_ENC[y];
#else
  unsigned int o = x - 'A';
  if (o >= ALPHABET_SIZE) {
    o = ALPHABET_SIZE;
  }
  return o;
#endif
}

SEXP C_test_ALPHABET_ENC(SEXP x) {
  const char * xp = CHAR(STRING_ELT(x, 0));
  unsigned int y = (unsigned char)xp[0];
  return ScalarInteger(ALPHABET_ENC[y]);
}

// US[, .N, keyby = .(STREET_NAME)][order(-N)]
// There was one street (with a mere 9 addresses Australia-wide) that contained
// anything other than A-Z or ' ', viz THE YACHTSMAN'S DRIVE
// At this time (2023-12-31) unsure how to deal with these things
const char * THE_XXXs[N_THE_XXXS] =
  {"ESPLANADE", "AVENUE", "CRESCENT", "BOULEVARDE", "BOULEVARD", "STRAND", "ENTRANCE ROAD", "PARADE", "HORSLEY DRIVE",
   "PARKWAY", "PROMENADE", "TERRACE", "CORSO", "GRAND PARADE", "GROVE", "NORTHERN ROAD", "LAKES WAY", "BUCKETTS WAY", "RIDGE",
   "GRANGE", "RIDGEWAY", "RIVER ROAD", "WOOL ROAD", "PENINSULA", "SCENIC ROAD", "TRONGATE", "ROUND DRIVE", "CREST", "CAUSEWAY",
   "FAIRWAY", "MALL", "CIRCLE", "CIRCUS", "ANCHORAGE", "CIRCUIT", "MEWS", "GLEN", "OUTLOOK", "PARK DRIVE",
   "GLADE", "GATEWAY", "DRIVE", "RISE", "QUARTERDECK", "BROADWAY", "LAKES BOULEVARD", "SPRINGS ROAD", "COMENARRA PARKWAY", "HEIGHTS",
   "CLOSE", "GLEN ROAD", "ESCORT WAY", "PIAZZA", "ELMS BOULEVARD", "LANE", "KINGSWAY", "PALLADIO", "RAMBLE", "EYRIE",
   "COVE ROAD", "PANORAMA", "CONCOURSE", "GOLDEN WAY", "PONDS BOULEVARD", "LAKE CIRCUIT", "RAMPART", "LOOP", "SUMMERLAND WAY", "CROSSING",
   "RIGHI", "LAKES DRIVE", "COVE", "DRESS CIRCLE", "SOUTHERN PARKWAY", "GREENWAY", "POINT DRIVE", "DRIFTWAY", "VISTAS DRIVE", "SERPENTINE",
   "RIDGE ROAD", "CROSSWAY", "BULWARK", "SANCTUARY", "POINT ROAD", "APPIAN WAY", "KRAAL DRIVE", "OUTBACK HIGHWAY", "SOLDIERS ROAD", "INLET DRIVE",
   "ROCK-COLLINGULLIE ROAD", "SNOWY RIVER WAY", "COMMON", "PINNACLE", "POINT", "GAP ROAD", "WALK", "PARAPET", "VALLEY AVENUE", "HIGHWAY",
   "BOGAN WAY", "BROADWATERS", "CHASE ROAD", "BASTION", "POCKET ROAD", "GROVE WAY", "RUINS WAY", "COCKLESHELL", "HALYARD", "COURT",
   "CHASE", "LINKS", "SOVEREIGN MILE", "BASIN ROAD", "KNOLL", "RETREAT", "ARCADE", "HAVEN", "WOODS CIRCUIT", "PALLADIAN DRIVE",
   "SANCTUARY DRIVE", "BOARDWALK", "COVE CRESCENT", "JIB", "CITADEL", "DOMAIN", "OLD STOCK RUN", "INLET", "GLADES", "POINT CIRCUIT",
   "BRIDLE TRACK", "HERMITAGE", "STRAIGHT", "LOOKOUT", "GALLEY", "SUMMIT ROAD", "MAINBRACE", "OVAL DRIVE", "CENTRE", "NOOK",
   "GABLES", "GARDENWAY", "GULLY ROAD", "CENTREWAY", "PLATEAU", "PROMONTORY", "BROADVIEW", "BATTLEMENT", "MARINA", "RANGE BOULEVARD",
   "VINES DRIVE", "ESTUARY", "OAKS", "PROMONTORY DRIVE", "ADDRESS", "OAKS ROAD", "PLAZA", "MAINSAIL", "HELM", "MANSE ROAD",
   "ANNIE WATT CIRCUIT", "CASCADES", "CONCORD", "EMBANKMENT", "HERMITAGE WAY", "VALE", "INLET ROAD", "PEAK ROAD", "TRIANGLE", "BITTERN BOULEVARD",
   "FORESHORE", "BOULDERS ROAD", "ELMS", "FAIRWAYS", "PASSAGE", "AGORA", "DRIVEWAY", "LANES", "LINK", "PARK CHASE",
   "PASS", "BACKWATER", "HEIGHTS BOULEVARD", "NARROWS ROAD", "BILLABONG", "CARRIAGEWAY", "BREAKWATER", "ENTRANCE", "HIGH ROAD", "JACK",
   "OAKS PARADE", "WATERMARK", "BARTIZAN", "COTTELL WAY", "REGENCY", "ACRES WAY", "LEE", "ASPECT", "HEIGHTS ROAD", "OLD OAKS ROAD",
   "PALISADE", "PROMINADE", "STRAIGHT ROAD", "TOR WALK", "BOOMERANG", "LANDINGS", "SADDLE", "SKYLINE", "PARK", "CATTLE TRACK",
   "GLENN", "GRANDSTAND", "VISTA", "SNAKE TRACK", "TROSSACHS", "BEND", "BRIARS", "COVE DRIVE", "GARDENS ROAD", "VAUCLUSE",
   "MAINDECK", "MOUNTAIN WAY", "VALLEY WAY", "WEIR ROAD", "BOWSPRIT", "CIRCLET", "COBB AND CO WAY", "RIDGE WAY", "VILLAGE AVENUE", "WAVES",
   "CUTTING", "COMPANIONWAY", "GLEN CRESCENT", "IRONS DRIVE", "MCGRANE WAY", "OVAL", "ROAD", "SPINNAKER", "HAWTHORNS", "WILLOWS",
   "ENCLAVE", "SPUR", "SUMMIT", "VILLAGE", "DEVIATION", "OUTLOOK ROAD", "QUADRANT", "RINGERS ROAD", "TILLER", "BOOM",
   "QUARTER DECK", "WALKWAY", "BULKHEAD", "ESCARPMENT", "GARDEN WALK", "GREEN", "GUMS", "OLD ROAD", "PATCH ROAD", "QUAY",
   "RETURN", "BLUFF ROAD", "BROADWATER", "CEDARS AVENUE", "CLEARWATER", "HILL", "PINERY", "SCARP", "BRANCH LANE", "ESCARPMENTS",
   "GRANGE ROAD", "MEADOWS", "SPRINGS CLOSE", "BLOCKS", "CHANNON ROAD", "LANDING", "ROCKS ROAD", "BARRACKS", "LODGE", "LOOKDOWN ROAD",
   "PULPIT", "ARCHES", "BEAM", "BIRCHES", "FOREST ROAD", "GARLANDS", "GREAT EASTERN WAY", "SEEKERS CRESCENT", "VINTAGE", "BARONS DRIVE",
   "BEACONS", "BRENTWOODS", "FOREDECK", "PINES GROVE", "BOUNTY", "COTTAGE WAY", "DELL", "LINKS ROAD", "PARKWAY PLACE", "QUADRANGLE",
   "SANCTUARY CLOSE", "SEVEN WAYS", "CLOISTERS", "CORNICHE", "CROFT", "HEATH", "SADDLE ROAD", "WELKIN TRAIL", "YARDARM", "BRIDGE",
   "CORONADO", "CROFTS", "GALLERY", "GIPPS WAY", "POSTERN", "ROCKS BOULEVARD", "SANDS", "ESTATE", "FALLS ROAD", "HORSESHOE",
   "LIDO", "ROCK MANGOPLAH ROAD", "UPPER SANCTUARY DRIVE", "GOOSE WALK", "RAMEO", "RIVERS EDGE", "WALLABY RUN", "WOODLAND", "WRIDGEWAY", "ARENA",
   "BOWERY", "HORIZON", "IRONBARKS", "OVERFLOW", "PATIO", "PINES", "PINES OUTLOOK", "QUAYS", "RIALTO", "VINEYARD",
   "GRAND", "STRAND WEST", "VALLEY ROAD", "WATER COURSE", "WOOL TRACK", "BARBETTE", "BINNACLE", "LANTERNS", "PROVENCE", "VERGE",
   "VILLAGE WAY", "BREEZEWATER", "MOOR", "PARKVIEW", "PEAK", "SHELF ROAD", "SHORES WAY", "BRIDGEWAY", "FRED HOLLOWS WAY", "GETAWAY",
   "LOCALE", "WATER LANE", "BALCONY", "LINKWAY", "MOUNTAIN WAY ROW", "REACH COURT", "STRAITS", "TRACK", "TUNNELL ROAD", "WELCOME ROAD",
   "WOOL LANE", "CEDUS", "DALES CRESCENT", "HATCH ROAD", "JETTY", "LAMBETH WALK", "VILLAGE PLACE", "WATERS", "CAMELS HUMP ROAD", "CULDESAC",
   "CUPS DRIVE", "FARM WAY", "LIMITS ROAD", "MORES", "OTWAY VISTA", "WATERWAYS BOULEVARD", "WHITEWATER", "ZENITH", "BURCHETT", "CROWSNEST",
   "DECKS", "ESPERENCE", "GRANARY", "KNOLL CRESCENT", "MEADOW", "OTWAY", "PONDS WAY", "RIVERSIDE", "WATCHTOWER", "YACHTSMANS DRIVE",
   "BELFRY", "BEND ROAD", "CAPSTAN", "GRAND CIRCUIT", "ROCK COLLINGULLIE ROAD", "SNOWY WAY", "VINES COURT", "WHARF", "WHEELHOUSE", "CEDARS CLOSE",
   "GROVE STREET", "GULLIES ROAD", "HIGHWATER", "MOUNT DRIVE", "PAINTERS ROAD", "PINES AVENUE", "PINES CLOSE", "WATERFRONT", "ATRIUM", "BARBICAN",
   "BLUFF COURT", "COOMBE", "CRITERION", "ISLAND COURT", "MEARS", "OLD NORTHERN ROAD", "POINTE", "RANGE", "SANDS WAY", "BROOK DRIVE",
   "BROW", "BULLOCK TRACK", "CEDARS DRIVE", "CORONET", "CREST COVE", "CUT LINE", "HAVEN DRIVE", "LAIRIDGE", "MERIDIAN", "MIDWAY",
   "RIVULET", "ROBBINS", "ROCK-NARRANDERA ROAD", "BURLINGS", "DOG FENCE ROAD", "FELL", "HIGH TOR", "PARKS", "PAVILION", "PONDS",
   "REEF ROAD", "RIVERWALK", "ROCK NARRANDERA ROAD", "SHUNTOFF", "CENTRE WAY", "CLUMP ROAD", "EXPRESSWAY", "FARM LANE", "GREY GUMS", "PIER",
   "PINES COURT", "PRIORY", "SHEEDY WAY", "STRAND EAST", "VINES", "BALLABOURNEEN", "BRIDLE PATH", "CRESTWAY", "DEN ROAD", "DIP ROAD",
   "FIELDS", "ISLAND ROAD", "ISLE", "LINKS DRIVE", "MARLOWS", "OUTPOST", "PANNICLE", "PORTICO", "TAFFRAIL", "WOODS BOULEVARD",
   "WOODS CLOSE", "ALBENS DRIVE", "BOLTONS", "COPSE", "DELL RETREAT", "RAPIDS", "SILHOUETTE", "STEAD", "STOCKYARD", "CREEK APPROACH",
   "CUT", "EAGLES NEST", "ELBOW", "FRESHWATER", "GUMS COURT", "HABITAT", "HILL COURT", "LAXTON", "MEAD", "MOORINGS",
   "MOUNTAIN ROAD", "QUARRY", "SHEILING", "SPIT", "BLOODWOODS ROAD", "CROSS ROAD", "CROSSOVER", "DESERT", "DUNES", "FLAT",
   "HOLLIES", "LINK ROAD", "SIDELING", "VINEYARDS ROAD", "BARRICADE", "BOLLARD", "CALLISTER", "CIRCLE ROAD", "COTSWOLDS", "DALE",
   "DOVECOTE", "EAGLES NEST ROAD", "GALLEON", "LOCH", "LOOK", "PASS ROAD", "REDOUBT", "RIALTO WEST", "ROPE WALK", "SAND TRAP",
   "SELECTION", "SEMI CIRCLE", "VILLAGE GREEN", "YACHTSMANS DRIVE", "BEATEN TRACK", "COIGN", "GANGWAY", "HOLLOW ROAD", "HOMESTEAD", "OAKS AVENUE",
   "OLD FORD", "TUNNEL ROAD", "WHIM ROAD", "BARRACKS CLOSE", "COAST ROAD", "CREEL", "DAIRY", "DECK", "DOWNS", "GRUB ROAD",
   "LAKELANDS", "LAKES", "LAWRENCE LANE", "LINE ROAD", "OLD COACH ROAD", "PINT POT", "PORT HOLE", "SILO", "TROFFS ROAD", "VINTAGE WAY",
   "WELL", "BLUFF", "BRAES ROAD", "BRIGANTINE", "BUNKER", "COURTYARD", "EAGLES PLACE", "FARM ROAD", "KEEP", "LAKELANDS ROAD",
   "LINKS COURT", "LOUGH COURT", "MAPLES", "MOUNT", "OLIVE GROVE", "PACK TRACK", "PINES ROAD", "RESERVE", "SHORT RUN", "SLIPWAY",
   "SPRINGS AVENUE", "TURKEY ROAD", "ABBEY PLACE", "ARK AVENUE", "BARB", "BASIN", "BOARD WALK", "BOULEVARDS", "BOWER", "BREAKERS ROAD",
   "BUCHAN BYPASS ROAD", "CEDARS", "CORAL GROVE", "CORKSCREW", "CRESCENT ROAD", "CROFT WAY", "CROSSING ROAD", "DESERT ROAD", "GREEN LANE", "GUNBARREL ROAD",
   "HATCH", "HEADS", "LAKES ESPLANADE", "LITTLE TERRACE", "MILL", "MILLSTREAM", "NUGGETT ROAD", "PIPPIN", "PLACE", "RING ROAD",
   "ROYAL PLACE", "TOP DECK STREET", "WHITTON", "WINDING WAY", "ANCHORAGE LANE", "BEACHWAY", "CHANCERY", "COACH ROAD", "CRATER ROAD", "CURLEW",
   "DENE", "GAP", "HEART ARCADE", "OLD CROSSING", "ORANGE ROAD", "PINES LANE", "RANGE ROAD", "SHEEP TRACK", "SIDING", "SPINNEY",
   "SPRINGS", "TRUMP CLOSE", "BOREEN", "CEDARS ROAD", "CRESCENT DRIVE", "DEEP SCHOOL ROAD", "DEPOT ROAD", "GEORGIAN ROAD", "GRASS COUNTRY ROAD", "ISLAND LANE",
   "PEAKS TERRACE", "PERCH", "PINCH LANE", "POOLWAY", "RANCH ROAD", "REST", "RUN WAY", "STABLES LANE", "TOR", "WATER WAY",
   "WATTLES ROAD", "WILLOWS ROAD", "BLOCKS ROAD", "BRIDGES ROAD", "COBBLESTONES", "DANUBE", "DRAWBRIDGE", "GLUT ROAD", "GOAT TRACK", "GRANGE LANE",
   "LAIRDS WAY", "LANEWAY", "MAJORS ROAD", "MASTERS ENCLAVE", "MINERS WAY", "PINES BOULEVARD", "PORTAL", "RAND", "RIDGE TRACK", "RIDGES ROAD",
   "YARDS ROAD", "AVENUE ROAD", "BREEZEWAY", "BROTHERS ROAD", "CELLARS", "CONDUIT", "DETOUR", "ELMS LANE", "FIVES ROAD", "FLAGS ROAD",
   "GORGE LOOP ROAD", "HALL ROAD", "HUMPS ROAD", "JUNGLE ROAD", "MILL ROAD", "MINE ROAD", "NORTHERN PARKWAY", "OAKS STATION ROAD", "OLD BRUNSWICK ROAD", "OUTPOST ROAD",
   "PADDOCK ROAD", "RANGES ROAD", "REST ROAD", "ROCK ROAD", "SANDY LANE", "SCHOOL MALL", "TWINS ROAD", "VALLEY LANE", "VIEW", "VILLAGE LANE",
   "W E MIDDLETON MEMORIAL DRIVE", "WANDERER", "WARRA WILLOWS ROAD",
   "PKWY"};

// HashStreetName of the above (including the 'THE'), note that the YACHTSMAN'S DRIVE does include the apostrophe
const int H_THE_XXX[N_THE_XXXS] =
  {1085685817, -757960720, 224584167, -1249983575, 92272236, -1123822046, 575376416, -1400317025, 813127430,
   1033948411, 1602809527, 319347146, 1545756190, -715139359, 1541060117, 31476242, -1932341501, 248666970, 1526032961,
   -684704540, -1390932018, 392385886, -593839169, -1784338325, 501563413, 119122956, -779601710, 1545797679, 154410194,
   -1673740849, -1255286352, -518090642, -518090431, -1688594010, 82885871, -1255282928, -1254795620, 1649072853, 645321016,
   1541421335, -463568988, 1532898448, -1255261263, -1193521183, -338694071, 1523517980, 511093446, -1616615631, -1688140528,
   1545892458, 1212101636, 286378063, -1410458395, -806285541, -1255320038, 438827691, -1238462066, -1171223447, 1533859614,
   1412705115, -1310254605, -2005922685, 15867672, -1877021910, -839462356, 5101557, -1255309856, 279974373, 136259746,
   1526029921, 192249760, -1254664029, 185989597, 1834594711, 1840998056, -1870255524, 502683294, -56291078, -634068779,
   1412161529, 136266701, -1583133368, 1389811944, 853619752, -424332012, 1779728837, -1557075848, -1305126787, -1892313526,
   -2094335095, -702443371, -528709135, 1850461550, 1518861680, 1615155442, -1255362963, 1034165055, -533109989, -1226318947,
   1180966742, 288237312, 1509523768, -818110128, 2061366338, -1750010854, 2052610016, 1675037167, -1531835113, 1545759523,
   1546020960, 1524382159, 1355211913, -474409869, 1537035094, -118132451, -753552308, 1528820744, -2109748433, 1525056738,
   850068420, -2139173929, 782299480, -2120437379, 84890286, -938196772, -1479790766, 1530205670, -672703580, 2033018531,
   576028775, -1487449936, 223299482, -2027890763, -664501370, -492909857, 583075520, -1474857948, -512940297, -1255102919,
   -664577726, -1786596856, 1477130127, 464088504, 506015268, 1283642061, 1708112139, -220631516, -1208011548, 64749075,
   -494840393, -132699263, -1255075126, -1062410143, 1154351722, 1913063826, 1518887770, -373520576, -1255180592, -1929494960,
   411899604, 370259741, -266087100, 430633152, 786403071, -1255396862, -1227919106, -832338853, -118925390, -911587522,
   1029837249, 1761626672, -1255006517, 601126908, 1037159854, 1539224486, 521049343, 1524111625, -1255311844, 654396808,
   -1255463571, 930650498, 1112180608, 845774656, -350834062, 1381456411, 447861782, -1135152008, 169984106, -1254965569,
   200207145, -889109188, -1041589243, -1427618374, -140262019, -728917547, -2120448272, -758659828, -1317113592, 1497283253,
   -1243808709, 1598353595, -1098295998, 2053429252, -1837809780, -1717041656, -1134126489, 714612659, -1255463660, -854789384,
   1541417554, 927704334, 1521307365, 873606241, -1017398836, -1254682415, -571972827, -632453617, 727954860, -1569985902,
   -372979586, 32588536, -1329035528, 1580956333, 1763810826, 82878074, 611815352, 1344296910, 308228364, 1522714506,
   -337924246, -932863835, -1489341305, -751522759, 2097353762, -1255089656, -1255272348, 52667915, 560080862, 62968905,
   501406410, -1255205608, -1120004185, -1154985344, 2036130623, -745464083, -1041101420, 625923218, -1566433802, -1254697037,
   -769482751, 357761890, -708848988, -548853704, -107872586, 1541062119, -1254801392, -662048029, -339336182, -1255417280,
   -1174941226, 1339731007, 529336563, -392390542, 1482584016, -1255168291, -1410230587, 1527229103, 2147311342, -932302997,
   -600791492, -1035796454, -289261816, -559748602, -998780465, 1900226299, -330919230, -1029580987, 1524460185, -1198050767,
   -1386753032, -753562190, -1254682249, -1125786888, -153105891, 1498331932, 378111980, 1180156763, -1151890406, 249803955,
   -949168501, -363361773, 551545643, 587285940, -566246809, 1882811548, -1255028899, 1524887735, 473731872, 111903694,
   843497662, 853342411, -391390026, -1268781621, 1545794832, 1528695148, -1727855201, 1299608780, 510202732, -571972157,
   -1272449495, -528378045, -453708428, 317392046, 489303101, 1271219850, 1527423735, -915053714, -1076099600, 1068055246,
   -1255311534, 1434668206, -1145275322, -259466948, 1526314888, 1062416022, 874902713, -526107192, -628507239, 1538973061,
   -566324440, -1478769989, 1508830863, 774269856, 1519369887, 1519072029, -1852592844, 1520902675, -1180268559, 622511910,
   1541057666, -736997481, -908320305, 1923938676, 1879547400, -1048677009, -2012400452, -1735580959, -1538194966, 1521744223,
   -467630065, 1393687814, -1255276157, -239466503, -1255468637, -125390448, 301296291, 750158604, -431972278, -316371036,
   -1232381644, 620787983, -810324664, -2100174605, 1312235122, 223510238, 1568573542, 1515081843, 664458784, 968460414,
   -594061695, 1546277656, -1033669256, -1455168750, 1535655594, -1435587562, 1844283483, -1289817606, 1070653777, 813561654,
   2103413189, 266826955, 2077816802, 1525559002, -342356623, -862902075, 1650593262, -1488231336, 142789783, 357911242,
   1533732806, 1562456512, -1120410740, -1437021939, -1202742487, 1531661736, 470412213, -1422684163, -51305554, -1259244779,
   -549617894, -952462231, 403333734, 1963107985, 1017207780, -1564621841, -502539772, 1522303574, -211446389, -790056408,
   1238701908, -1431402839, 273483815, -243862883, 1271922466, 2127229809, 592397003, 737792296, -764528438, -1048843126,
   1245980376, -528498539, 2127478831, -993238146, 1525359348, -101989835, -1417172171, 1526313923, 1427204280, -638901685,
   -1254703180, -1472395133, -798454318, -298859982, -636961776, 1638486672, 796003876, -2002644059, -393756851, -1198040621,
   -281878855, -677577995, -1466248562, 132783814, -994777693, -1254817761, -1816954373, 1519372231, -397319178, 1518865018,
   799315376, -1422836105, -619266013, 1164043271, -1860834120, 496191427, -1220653047, 214962018, 985697817, -1255473102,
   592331650, -1385648525, -434184235, -737355839, 1521308443, -1375843340, 407783493, 224261216, -21847669, 1426743641,
   -710500627, -1981747687, -1255162321, -1234167581, -1209435067, 1648092874, -1310175634, 486038976, 622897087, -896997460,
   -1062805078, -1111700921, -1482186053, 1545758358, -160612036, -1170328959, -1043373896, 1527769787, 421753042, -1831583312,
   -2120428546, -1919824439, 1534453103, -145272797, -1223877937, -1537684545, 76940994, -1244547780, -1255282223, -1217878128,
   1075600847, -1349803200, 532818183, -1255205246, -2067746930, -1908979654, 201558700, -926996627, 1533141909, -1254827037,
   -1457137788, 2109145988, 142030187, -1827943411, 374081237, -1481901146, 1912053991, 686980214, 264134324, -1255033008,
   -469661191, -1271171983, -453708322, -1255309452, -1255309829, -145415979, -136970941, -837356828, 1338784005, 1292410163,
   -1248634884, -1857767076, 1833671323, 662816914, 1196060106, 1545780880, -460881028, 870812507, 19304308, -218860410,
   -661338748, -876255252, -933093121, 278808723, -1317030066, 1545797345, 1533573691, -1255028491, 1533352445, -24372026,
   -1927666645, 1524110732, -484253377, 1717539530, -1608278011, 1877995348, 898104139, -1255227675, -2078407426, 1982232085,
   -1255358866, 1544824391, -863031357, -16074853, -568508295, 805966925, 1077442174, 215323740, -1254935929, 768288723,
   -1233782192, 534706842, -1208215206, 1525557681, 1757680492, -2054831254, -503332827, -125756260, 769389127, 654980021,
   -1036842542, 1901907424, -1170746950, 464171720, -1254678449, 1545278475, -2004203529, -1249983553, 1544644945, -1231243905,
   -444497059, -512424066, 1246925926, 834437657, -1336410849, -272012641, -1180517830, 297427669, -804154783, -96344430,
   1528822858, 1528695655, 1440037193, -1602445850, -1255278152, 1334653220, 1136326682, -1409752734, 1518886823, 1339142678,
   -1651380274, -1605667265, -59251093, -692143577, -1783797952, -1258122146, -318807115, 1696954818, 246394967, -531179578,
   -1255028970, -2120432278, -292601722, -336830427, 1969751604, -503110117, 1853892923, 778834616, -1143612446, 1731444586,
   1724841118, 15056868, -566286739, -1976807066, -1177637173, 705782980, -1130830962, -1468529126, -1685563656, -1982109593,
   1199042086, 1519228752, 224432870, 484591439, 1489500690, -1255256916, 481142074, 848574852, -2120457291, -892214650,
   -1454668792, -1991079183, 997263710, -1179978456, -708370423, -931432635, 1813364195, -1880423986, -469443378, -601161534,
   1254959730, 1900189749, 588167468, 2054681285, 1560983101, -703668275, -1416927576, -1255253307, -646370802, -1214697046,
   -633454887, -2145962328, 1744508028, 1548820675, 260650234, -266190382, -927172127, -1772826611, 38343275, 1469434971,
   -236373632, -868136211, 1218071, 795841467, 2027952736, 827515659, -631335347, -1521808440, 851092813, 1526095250,
   1042408402, 1040106280, -115355660, -695434479, 88306107, 1189951492, 541069331, -907392975, -1255405199, 1747683750,
   -1669419002, -1322141600, 1761039046,
   1033948411};

// Hash but with the H excluded (and the apostrophe)
const int H__XXXs[N_THE_XXXS] =
{1588178048, -1758728215, 1252464830, -1569467984, -1349064875, -938030917, 1333086233, -960240442, -1147152641,
 -1622790142, 1751548046, 1958048435, 213735591, 945281400, 209244460, 1467899435, -1237547142, -1808058045, 229273336,
 -1685470979, 1648524759, -1984052025, -704543738, 421512914, -665566228, 1292459285, -1704172821, 213964566, -2101571765,
 213585206, 2088810601, -1534262537, -1534262888, -403680225, 908931478, 2088815369, 2088750469, -2090585428, -1113901407,
 209293102, -272888739, 208333225, 2089355752, 333238680, 20326992, 1136228005, 1208265631, -1010932792, 786650985,
 213747795, -1987122243, 1996680470, -950098820, 413076930, 2088983683, 1922917106, -1174308713, -1033088016, 202632615,
 -1104034334, -1245101270, -1979964358, -1543457215, -1721043181, 1022066933, 267473932, 2088985913, 88456316, 1791439483,
 229276120, 943528345, 2088885370, -1081275516, -1529170546, -1387527439, -704815003, 761566215, 957301283, 936293900,
 -9464160, 1791436884, 1556565297, -1056045103, 499859825, -1949703731, 1880300284, 259781825, -2010538556, 827381107,
 -1316262384, 2082478316, -1537595672, -111128905, 231570121, -523482133, 2089309748, -1623141946, -1435491038, 632738020,
 -976181073, 347064953, 840382177, 1095142313, 2050026939, 1367692131, -1613713383, -290945130, 937831662, 213732634,
 213893721, 216714614, 1163680272, 517162154, 222469743, 91843044, -1754338411, 221746097, -1057315242, 1212554459,
 880037757, -81898770, 174093921, 193455108, 901773783, -1721495931, 166397291, 215634527, -1683262275, 877532506,
 -1588620354, -1294905975, 686449411, 152502732, -1700516385, -234338138, 291072505, 1479267133, -1548625426, 2088914272,
 -1700854373, 1246251825, -1978518890, 1234371073, -1407940003, -2112343148, 670676274, 1155070525, -1617287043, 803477290,
 303174798, 1548394104, 2089021651, 1570029496, 1533671315, -2043050965, 231400291, -1161940007, 2089123145, -345954615,
 -1194282547, -1847809660, 806692413, 2064360857, 776916870, 2089204123, 1195792167, -2106549662, 1378845419, -370253753,
 -1617596936, 772987273, 2088539090, -1541622875, -1621401641, 206607263, 743060902, 216984752, 2088975493, -1108898479,
 2089414580, 564676283, 871993337, 934498585, 123305547, 2007366754, 2081747535, 1728845345, 1803775955, 2089060070,
 129099088, 2040924677, 1736445724, -1476091005, 113427972, 671276460, 193453193, -1748956331, -592965039, 129840204,
 -1178575390, 1764916866, 1053344379, 871978333, 1755517237, -1880985071, -918366082, 120380490, 2089414541, -732106335,
 209288811, -511569961, 224520284, -230199976, 962181237, 2088768584, -1636790852, -2052525354, -1318409163, 950923531,
 -1162476889, -2014089951, 1992978401, -927250924, 1092411283, 908943747, 539066497, -1431937097, -645842155, 227721011,
 1030467091, -700345604, -898006594, -1001591424, 1344812059, 2089040401, 2089353917, 1553714098, 1882864103, -799147984,
 1208536883, 2089439233, -941566018, -300443783, -2037179898, 1928689140, -604558195, 1237561419, -1106792337, 2088778986,
 -1930403880, -1093805797, -158283523, 1970760737, 96732239, 209230046, 2088756233, -1112519942, 1838887827, 2089220185,
 -1038426865, 1559654, 661269738, 1249641291, -562808439, 2089119044, -950605540, 232541846, 1902150423, 610594834,
 1742858821, -1692092061, 1197722353, -1653610785, -1003101162, 73150978, -364276773, 1747860060, 217054496, -1172880888,
 -982868895, -1754328661, 2088768174, 1407849601, 341264740, -473231227, 414271061, 1970620162, -299073821, 668340586,
 918017266, 589024330, -308493966, 1560626573, -1651712258, -1474216667, 2088700868, -242952594, 1368792793, -1233862121,
 877391623, -1987252782, -1491020529, 1634644178, 213980201, 221593045, -532364698, -1546085035, -1808951275, -1636791398,
 1628824816, -1528588006, -282466291, -293416361, -1239429820, -45024205, 232482510, -1905349385, 1350942505, 1525879671,
 2088975307, 331125719, -1389631745, 1729003237, 228995505, -556759569, -1085750272, -1721339951, 1578827808, 207141564,
 -1651495887, 724047682, 739791286, 799592057, 231200806, 231494820, 614425677, 224789290, -1023751320, -1259703809,
 209226171, -2077928978, 1343873334, 852719213, -1781106351, 1766681142, -1867884059, -1862166856, -307680589, 224374758,
 -693571402, 940594687, 2088812698, -2012580576, 2089410810, 2018752137, -68014534, -1795433035, -1857676845, -125690787,
 -1427167699, -1497740138, 1078173489, -220988918, 4851627, 1672362343, -890252897, 226149834, -1553258183, -711934489,
 -704452040, 213362081, 1370535585, -1340246901, 219379987, -1151121713, 613606562, -1075208861, 1339529096, 565794671,
 -715404132, -1193810478, -1711994341, 211355747, 356621640, -1650634372, 1274312823, -724993151, -1166446834, -855685261,
 207646975, 1405816185, 214034165, 1367136148, -1613081808, 219048337, -598095156, 1762328900, 1540535223, 897836300,
 -1663745853, -1647793584, 592935583, -243641240, 1328581949, -2017663850, 292613693, 228123759, 2070528594, 2122143089,
 -722814195, -1419940688, 1856944030, 1152927652, -1485687141, -112791448, 1564851186, -1377847631, -1757174381, 1767121811,
 71542625, -1537384372, 951173270, -1973441113, 211420621, 139805516, -948120660, 228996730, -943800255, -1795384590,
 2088793325, 616113658, 2116728779, 830261579, -1080591223, -330855991, 329768093, -1898834820, 581274452, -1627110390,
 554310976, 498941964, 1617689943, -1160770081, -435222470, 2088633222, -596082398, 231203198, 2123130671, 231566531,
 900000009, 1762193870, -100020550, 1192831006, 2075573025, -1076723494, -511671216, -740790437, 1245876384, 2089423723,
 1564768443, -970173718, -1891665908, -2077139656, 224531362, 1704987853, -2130712260, 1253057977, 2076224082, -957356032,
 -1806314892, -1622215584, 2088977910, 583610842, -1830560196, -2089875789, -1245459017, -1240482375, 78778726, -774038763,
 -379929837, -1558190242, 1299241154, 213729327, 779309541, -1033830696, 964003745, 231865986, -1853234325, -824520727,
 193450247, 977962416, 202326486, -891419078, -323241322, 948699206, 1360232859, -1428949083, 2088815048, 1503371273,
 -2040354826, -1171891559, 1810674462, 2089439131, -2116711305, 1014661091, -1011858283, -1737025676, 208225068, 2088642874,
 725558909, -267404611, -1932933326, -1621987052, 1854278892, 1298935263, -564792866, 1900002319, -522223347, 2088696777,
 -1426320992, 252388712, -282466649, 2088985773, 2088985890, -590262292, 110411578, 649031837, 16291132, -967982710,
 -794642363, 1530019109, 633405794, 897836300, -104102317, 213702825, -279897851, -366258014, 1431502669, 548457983,
 -1112959395, 865974677, 411017286, -187531958, -1505132457, 213965272, 207514242, 2088700716, 208154180, -1882483585,
 -1253996654, 216981685, 131072774, -32600845, -885985316, -138244531, 2004026738, 2089461436, -1164198137, 475530412,
 2089313847, 210215550, -923518886, 296976450, -1644703264, -1774093708, -2099283673, -740742811, 2089161374, 1698337866,
 592573033, 942092579, -1617210493, 211357064, 126645973, -761519373, -82192644, 90000869, 276592702, 23492684,
 808528075, -1120783463, 1953347587, -678300463, 2088773078, 210036018, 1505017518, -1569467994, 210255720, -2074053818,
 1902494788, -1548997913, 100825183, 2036865696, -1075893850, -232522010, 35134467, 765791916, 797520952, -1650907125,
 221748211, 221592542, -2001651344, -402756865, 2088802401, 1546396349, -1828168637, -950669317, 231400990, -700510673,
 -602601225, -1403703418, -690731886, 1865928734, -1788012583, 229586887, -1324610836, -537215781, -1963449298, -1530656737,
 2088700815, 193445523, 3839455, 1573511612, -827684275, -82833278, -1776394270, -124212863, -908896901, -740036973,
 -733446041, 1785370589, -1651664140, -455678433, -1123881070, 704024861, 758986007, 1274536227, 2119268577, -1621984738,
 202054751, 231059177, -1694510401, -1225784138, 1606176395, 2089342709, -505266685, 910686173, 193462348, 2037003647,
 1660432465, 2141857768, -485704793, 552353073, 1252303120, -1741929060, -1850778886, 1261666423, 473852951, 1743081403,
 -2095187093, 73200204, 1339709653, -1743180676, -1599451612, -526581260, -948643791, 2089347676, -304512585, -1937817261,
 835893184, 1822685905, 71854917, 1175329018, 431892355, 806665131, -1737111944, 2085703092, 1777542194, 1602579394,
 -1463679047, -1300872236, -774532466, 669249858, -1056116135, 1014946482, -264227628, -673227023, -821226604, -1549166773,
 542211723, 436392145, -15351987, -1133118936, -1472100510, 81631741, -1388107894, 1343093960, 2089212584, -1412631105,
 2049820511, -1653320775, -1602686049,
 -1622790142};

// Some localities also contain 'THE'
// THE LOCALITIES are the indices of THE_XXXs which are also street names
// in Australia
#define N_THE_LOCALITIES 27
const int THE_LOCALITIES[N_THE_LOCALITIES] =
  {20, 36, 68, 71, 91, 92, 130, 151, 186, 207, 241, 255, 264,
   270, 334, 353, 426, 448, 463, 492, 502, 550, 560, 584, 599, 620, 629};

// This struct contains information about difficult postcodes, namely
// those which contain localities beginning with 'THE ' and have streets
// on the XXX list
typedef struct {
  int postcode;
  int m_locality; // the 1-index of the localities in this postcode that match an XXX
  int m_street; // the 1-index of the streets in this postcode that match an XXX
} XXXPostcode;

#define N_XXXPOSTCODE 436
const XXXPostcode THE_XXX_COMPLX[N_XXXPOSTCODE] =
  {{800, 0, 31}, {870, 621, 30}, {870, 621, 111}, {2000, 0, 11}, {2087, 503, 138},
   {2170, 0, 10}, {2261, 187, 2}, {2261, 187, 3}, {2261, 187, 7}, {2261, 187, 37},
   {2261, 187, 8}, {2261, 187, 24}, {2261, 187, 21}, {2261, 187, 2}, {2261, 187, 3},
   {2261, 187, 7}, {2261, 187, 37}, {2261, 187, 8}, {2261, 187, 24}, {2261, 187, 21},
   {2291, 0, 15}, {2291, 0, 250}, {2300, 265, 482}, {2300, 265, 12}, {2300, 0, 482},
   {2300, 0, 12}, {2300, 265, 482}, {2300, 265, 12}, {2423, 0, 17}, {2444, 600, 34},
   {2444, 600, 282}, {2444, 600, 346}, {2444, 600, 249}, {2444, 600, 224}, {2444, 600, 252},
   {2444, 600, 295}, {2444, 600, 30}, {2444, 600, 20}, {2444, 600, 458}, {2444, 600, 15},
   {2444, 600, 108}, {2444, 600, 373}, {2444, 600, 123}, {2444, 600, 157}, {2444, 600, 394},
   {2444, 600, 24}, {2444, 600, 93}, {2444, 600, 144}, {2444, 600, 76}, {2444, 600, 477},
   {2444, 600, 11}, {2444, 600, 106}, {2444, 600, 300}, {2444, 600, 237}, {2444, 600, 488},
   {2444, 600, 135}, {2444, 600, 248}, {2460, 0, 507}, {2460, 0, 597}, {2460, 0, 628},
   {2460, 0, 68}, {2474, 0, 68}, {2480, 0, 274}, {2480, 0, 274}, {2480, 0, 274},
   {2480, 0, 274}, {2480, 0, 274}, {2482, 0, 104}, {2483, 0, 1}, {2483, 0, 104},
   {2483, 0, 306}, {2483, 0, 12}, {2483, 0, 541}, {2483, 0, 368}, {2483, 0, 702},
   {2570, 152, 40}, {2570, 152, 403}, {2570, 152, 20}, {2570, 152, 177}, {2570, 152, 271},
   {2570, 152, 36}, {2570, 152, 16}, {2570, 152, 199}, {2570, 152, 38}, {2570, 152, 132},
   {2570, 152, 96}, {2570, 152, 40}, {2570, 152, 403}, {2570, 152, 20}, {2570, 152, 177},
   {2570, 152, 271}, {2570, 152, 36}, {2570, 152, 16}, {2570, 152, 199}, {2570, 152, 38},
   {2570, 152, 132}, {2570, 152, 96}, {2570, 152, 40}, {2570, 152, 403}, {2570, 152, 20},
   {2570, 152, 177}, {2570, 152, 271}, {2570, 152, 36}, {2570, 152, 16}, {2570, 152, 199},
   {2570, 152, 38}, {2570, 152, 132}, {2570, 152, 96}, {2571, 152, 216}, {2571, 152, 20},
   {2571, 152, 332}, {2571, 152, 233}, {2571, 152, 464}, {2571, 152, 288}, {2620, 21, 3},
   {2620, 21, 3}, {2630, 0, 167}, {2630, 0, 91}, {2630, 0, 167}, {2630, 0, 91},
   {2630, 0, 167}, {2630, 0, 91}, {2650, 621, 4}, {2650, 621, 1}, {2650, 621, 95},
   {2650, 621, 639}, {2650, 621, 640}, {2650, 621, 404}, {2650, 621, 693}, {2650, 621, 90},
   {2650, 0, 4}, {2650, 0, 1}, {2650, 0, 95}, {2650, 0, 639}, {2650, 0, 640},
   {2650, 0, 404}, {2650, 0, 693}, {2650, 0, 90}, {2652, 0, 321}, {2655, 0, 404},
   {2655, 0, 321}, {2655, 0, 90}, {2656, 0, 452}, {2656, 0, 442}, {2787, 0, 450},
   {2795, 0, 130}, {2795, 0, 21}, {2795, 0, 130}, {2795, 0, 21}, {2830, 630, 318},
   {2830, 630, 47}, {2830, 93, 318}, {2830, 93, 47}, {3787, 585, 3}, {3791, 0, 3},
   {4406, 256, 644}, {4406, 256, 644}, {4421, 256, 635}, {4520, 621, 420}, {4670, 0, 4},
   {4670, 0, 45}, {4670, 0, 29}, {4670, 0, 460}, {4670, 0, 6}, {4670, 0, 463},
   {4670, 0, 341}, {4694, 0, 183}, {4695, 0, 183}, {4700, 427, 215}, {4700, 427, 215},
   {4702, 0, 113}, {4702, 0, 1}, {4702, 0, 155}, {4702, 0, 113}, {4702, 0, 1},
   {4702, 0, 155}, {4702, 0, 113}, {4702, 0, 1}, {4702, 0, 155}, {4702, 0, 113},
   {4702, 0, 1}, {4702, 0, 155}, {4702, 0, 113}, {4702, 0, 1}, {4702, 0, 155},
   {4740, 0, 289}, {4740, 0, 3}, {4740, 0, 1}, {4740, 0, 323}, {4740, 0, 44},
   {4740, 0, 611}, {4740, 0, 289}, {4740, 0, 3}, {4740, 0, 1}, {4740, 0, 323},
   {4740, 0, 44}, {4740, 0, 611}, {4740, 0, 289}, {4740, 0, 3}, {4740, 0, 1},
   {4740, 0, 323}, {4740, 0, 44}, {4740, 0, 611}, {4740, 0, 289}, {4740, 0, 3},
   {4740, 0, 1}, {4740, 0, 323}, {4740, 0, 44}, {4740, 0, 611}, {4740, 0, 289},
   {4740, 0, 3}, {4740, 0, 1}, {4740, 0, 323}, {4740, 0, 44}, {4740, 0, 611},
   {4816, 0, 1}, {4816, 0, 406}, {5172, 427, 15}, {5575, 335, 1}, {5575, 335, 599},
   {5575, 335, 67}, {6010, 0, 371}, {6010, 0, 278}, {6010, 0, 474}, {6020, 0, 347},
   {6025, 0, 121}, {6025, 0, 303}, {6025, 0, 503}, {6027, 0, 72}, {6027, 0, 163},
   {6027, 0, 621}, {6027, 0, 41}, {6027, 0, 20}, {6027, 0, 551}, {6027, 0, 278},
   {6027, 0, 67}, {6027, 0, 72}, {6027, 0, 163}, {6027, 0, 621}, {6027, 0, 41},
   {6027, 0, 20}, {6027, 0, 551}, {6027, 0, 278}, {6027, 0, 67}, {6027, 0, 72},
   {6027, 0, 163}, {6027, 0, 621}, {6027, 0, 41}, {6027, 0, 20}, {6027, 0, 551},
   {6027, 0, 278}, {6027, 0, 67}, {6030, 0, 2}, {6030, 0, 58}, {6030, 0, 43},
   {6030, 0, 366}, {6030, 0, 12}, {6030, 0, 96}, {6055, 0, 654}, {6055, 0, 163},
   {6055, 0, 654}, {6055, 0, 163}, {6056, 0, 2}, {6056, 0, 50}, {6056, 0, 3},
   {6056, 0, 492}, {6056, 0, 501}, {6056, 0, 21}, {6058, 0, 124}, {6058, 0, 115},
   {6064, 0, 2}, {6064, 0, 31}, {6065, 0, 146}, {6065, 0, 32}, {6065, 0, 235},
   {6065, 0, 146}, {6065, 0, 32}, {6065, 0, 235}, {6065, 0, 146}, {6065, 0, 32},
   {6065, 0, 235}, {6065, 0, 146}, {6065, 0, 32}, {6065, 0, 235}, {6066, 0, 262},
   {6066, 0, 72}, {6066, 0, 172}, {6066, 0, 1}, {6066, 0, 140}, {6066, 0, 15},
   {6066, 0, 117}, {6066, 0, 49}, {6066, 0, 551}, {6066, 0, 44}, {6066, 0, 43},
   {6069, 0, 45}, {6069, 0, 173}, {6069, 0, 20}, {6069, 0, 152}, {6069, 0, 10},
   {6069, 0, 11}, {6069, 464, 45}, {6069, 464, 173}, {6069, 464, 20}, {6069, 464, 152},
   {6069, 464, 10}, {6069, 464, 11}, {6069, 0, 45}, {6069, 0, 173}, {6069, 0, 20},
   {6069, 0, 152}, {6069, 0, 10}, {6069, 0, 11}, {6107, 0, 11}, {6109, 0, 3},
   {6112, 0, 211}, {6112, 0, 15}, {6112, 0, 211}, {6112, 0, 15}, {6122, 0, 66},
   {6122, 0, 66}, {6122, 0, 66}, {6155, 0, 357}, {6155, 0, 82}, {6155, 0, 72},
   {6155, 0, 28}, {6155, 0, 389}, {6155, 0, 619}, {6155, 0, 296}, {6155, 0, 520},
   {6155, 0, 253}, {6155, 0, 59}, {6155, 0, 255}, {6155, 0, 117}, {6155, 0, 49},
   {6155, 0, 497}, {6155, 0, 196}, {6155, 0, 36}, {6155, 0, 97}, {6155, 0, 93},
   {6155, 0, 44}, {6155, 0, 58}, {6155, 0, 478}, {6155, 0, 648}, {6155, 0, 165},
   {6155, 0, 212}, {6155, 0, 357}, {6155, 0, 82}, {6155, 0, 72}, {6155, 0, 28},
   {6155, 0, 389}, {6155, 0, 619}, {6155, 0, 296}, {6155, 0, 520}, {6155, 0, 253},
   {6155, 0, 59}, {6155, 0, 255}, {6155, 0, 117}, {6155, 0, 49}, {6155, 0, 497},
   {6155, 0, 196}, {6155, 0, 36}, {6155, 0, 97}, {6155, 0, 93}, {6155, 0, 44},
   {6155, 0, 58}, {6155, 0, 478}, {6155, 0, 648}, {6155, 0, 165}, {6155, 0, 212},
   {6163, 0, 36}, {6163, 0, 36}, {6163, 0, 36}, {6164, 0, 28}, {6164, 0, 30},
   {6164, 0, 20}, {6164, 0, 46}, {6164, 0, 570}, {6164, 0, 293}, {6164, 0, 19},
   {6164, 0, 242}, {6164, 0, 28}, {6164, 0, 30}, {6164, 0, 20}, {6164, 0, 46},
   {6164, 0, 570}, {6164, 0, 293}, {6164, 0, 19}, {6164, 0, 242}, {6164, 0, 28},
   {6164, 0, 30}, {6164, 0, 20}, {6164, 0, 46}, {6164, 0, 570}, {6164, 0, 293},
   {6164, 0, 19}, {6164, 0, 242}, {6164, 0, 28}, {6164, 0, 30}, {6164, 0, 20},
   {6164, 0, 46}, {6164, 0, 570}, {6164, 0, 293}, {6164, 0, 19}, {6164, 0, 242},
   {6167, 0, 319}, {6167, 0, 58}, {6167, 0, 319}, {6167, 0, 58}, {6170, 0, 6},
   {6170, 0, 6}, {6170, 0, 6}, {6171, 0, 238}, {6171, 0, 238}, {6171, 0, 238},
   {6171, 0, 238}, {6171, 0, 238}, {6171, 0, 238}, {6210, 0, 37}, {6210, 0, 320},
   {6210, 0, 57}, {6210, 0, 19}, {6210, 0, 37}, {6210, 0, 320}, {6210, 0, 57},
   {6210, 0, 19}, {6230, 0, 6}, {6233, 354, 5}, {6233, 354, 11}, {6233, 354, 43},
   {6330, 0, 1}, {6330, 0, 38}, {6330, 0, 559}, {7140, 0, 2}, {7140, 0, 33},
   {7216, 0, 218}};

// Postcodes with 'THE' in the localities therein, e.g. THE SPIT at 2087
#define N_POSTCODES_WITH_THE_IN_NAME 112
const uint16_t THE_THE_POSTCODES[N_POSTCODES] =
  {800, 820, 870, 872,
   2000, 2087, 2170, 2261, 2291, 2300, 2354, 2372, 2423, 2444, 2460, 2474, 2480,
   2482, 2483, 2570, 2571, 2620, 2630, 2650, 2652, 2655, 2656, 2757, 2763, 2787,
   2795, 2799, 2824, 2830, 2831, 3265, 3561, 3787, 3791, 3793, 3945, 4055, 4066,
   4069, 4355, 4370, 4373, 4376, 4377, 4378, 4406, 4421, 4520, 4570, 4670, 4694,
   4695, 4700, 4702, 4704, 4720, 4740, 4741, 4816, 4820, 4825, 5171, 5172, 5271,
   5575, 6010, 6020, 6025, 6027, 6030, 6035, 6036, 6037, 6055, 6056, 6058, 6061,
   6064, 6065, 6066, 6069, 6103, 6107, 6109, 6110, 6112, 6122, 6124, 6155, 6163,
   6164, 6167, 6168, 6170, 6171, 6176, 6208, 6210, 6230, 6233, 6309, 6330, 6525,
   6558, 6728, 7140, 7216};

#define N_POSTCODES_WITH_THE_STREET_NAME 906
const uint16_t THE_POSTCODES_W_STREET_NAME[N_POSTCODES_WITH_THE_STREET_NAME] =
  {800, 830, 870, 2000, 2010, 2019, 2025, 2029, 2030, 2031, 2033, 2035, 2036,
   2037, 2038, 2040, 2041, 2046, 2047, 2049, 2060, 2062, 2063, 2066, 2067,
   2068, 2069, 2071, 2073, 2074, 2075, 2076, 2077, 2081, 2084, 2085, 2086,
   2087, 2088, 2094, 2095, 2096, 2097, 2099, 2101, 2103, 2105, 2106, 2107,
   2110, 2111, 2117, 2119, 2120, 2121, 2125, 2126, 2127, 2131, 2132, 2135,
   2136, 2138, 2140, 2141, 2142, 2144, 2145, 2146, 2148, 2150, 2151, 2153,
   2155, 2158, 2159, 2161, 2163, 2164, 2165, 2166, 2170, 2173, 2175, 2176,
   2178, 2192, 2193, 2195, 2196, 2199, 2200, 2203, 2205, 2206, 2207, 2208,
   2211, 2212, 2213, 2216, 2217, 2219, 2220, 2221, 2222, 2224, 2226, 2227,
   2228, 2229, 2230, 2232, 2233, 2234, 2250, 2251, 2256, 2257, 2259, 2260,
   2261, 2262, 2263, 2265, 2267, 2278, 2280, 2281, 2282, 2283, 2284, 2285,
   2287, 2290, 2291, 2293, 2296, 2299, 2300, 2303, 2315, 2316, 2317, 2318,
   2319, 2320, 2323, 2324, 2325, 2330, 2333, 2334, 2340, 2341, 2347, 2350,
   2355, 2358, 2360, 2361, 2380, 2381, 2382, 2390, 2397, 2403, 2408, 2415,
   2422, 2423, 2425, 2428, 2429, 2430, 2431, 2439, 2440, 2443, 2444, 2446,
   2448, 2450, 2452, 2455, 2456, 2460, 2463, 2464, 2469, 2470, 2472, 2474,
   2477, 2478, 2480, 2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, 2500,
   2508, 2515, 2517, 2518, 2519, 2525, 2526, 2527, 2528, 2529, 2530, 2533,
   2535, 2536, 2537, 2538, 2539, 2540, 2541, 2546, 2548, 2550, 2551, 2555,
   2556, 2557, 2560, 2567, 2570, 2571, 2572, 2573, 2575, 2576, 2577, 2580,
   2581, 2582, 2601, 2604, 2606, 2609, 2620, 2621, 2622, 2627, 2628, 2629,
   2630, 2631, 2632, 2640, 2646, 2650, 2652, 2653, 2655, 2656, 2659, 2665,
   2669, 2671, 2675, 2680, 2700, 2702, 2706, 2712, 2714, 2715, 2720, 2731,
   2738, 2745, 2747, 2748, 2749, 2750, 2753, 2756, 2759, 2760, 2765, 2769,
   2770, 2774, 2777, 2778, 2780, 2782, 2785, 2786, 2787, 2790, 2795, 2800,
   2806, 2817, 2818, 2820, 2821, 2825, 2830, 2835, 2836, 2840, 2843, 2846,
   2850, 2864, 2866, 2867, 2869, 2870, 2871, 2873, 2874, 2875, 2876, 2877,
   2880, 2899, 2912, 3000, 3008, 3011, 3013, 3015, 3016, 3020, 3023, 3024,
   3025, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3037, 3039, 3040, 3041,
   3042, 3043, 3044, 3046, 3047, 3049, 3052, 3058, 3061, 3063, 3064, 3068,
   3070, 3072, 3074, 3076, 3078, 3079, 3081, 3082, 3083, 3084, 3085, 3087,
   3088, 3089, 3090, 3095, 3099, 3101, 3104, 3105, 3106, 3107, 3108, 3109,
   3111, 3113, 3115, 3116, 3121, 3122, 3124, 3126, 3127, 3130, 3132, 3133,
   3134, 3135, 3136, 3138, 3140, 3141, 3142, 3143, 3145, 3149, 3150, 3152,
   3153, 3155, 3156, 3158, 3160, 3163, 3166, 3168, 3169, 3171, 3173, 3175,
   3177, 3178, 3179, 3180, 3181, 3182, 3183, 3188, 3190, 3191, 3193, 3195,
   3196, 3197, 3199, 3201, 3204, 3207, 3212, 3214, 3215, 3216, 3218, 3219,
   3220, 3222, 3223, 3224, 3225, 3226, 3227, 3228, 3233, 3234, 3237, 3240,
   3250, 3264, 3269, 3280, 3281, 3285, 3305, 3311, 3312, 3319, 3331, 3337,
   3350, 3352, 3356, 3358, 3363, 3371, 3373, 3377, 3385, 3393, 3419, 3429,
   3435, 3437, 3444, 3448, 3450, 3461, 3488, 3496, 3500, 3550, 3555, 3556,
   3564, 3608, 3620, 3630, 3658, 3660, 3672, 3677, 3690, 3711, 3713, 3717,
   3722, 3723, 3741, 3750, 3752, 3753, 3754, 3756, 3757, 3758, 3763, 3764,
   3765, 3767, 3775, 3777, 3779, 3783, 3787, 3788, 3789, 3791, 3792, 3796,
   3799, 3802, 3803, 3804, 3805, 3806, 3810, 3840, 3844, 3850, 3851, 3860,
   3870, 3875, 3878, 3880, 3882, 3888, 3902, 3904, 3909, 3910, 3912, 3913,
   3915, 3918, 3919, 3921, 3922, 3925, 3927, 3929, 3930, 3931, 3934, 3936,
   3938, 3939, 3940, 3941, 3942, 3943, 3944, 3959, 3976, 3977, 3979, 3984,
   3988, 3995, 3996, 4008, 4011, 4025, 4032, 4035, 4051, 4060, 4065, 4067,
   4078, 4108, 4109, 4110, 4116, 4118, 4119, 4125, 4151, 4152, 4164, 4165,
   4170, 4178, 4179, 4184, 4207, 4208, 4209, 4211, 4212, 4213, 4214, 4215,
   4216, 4217, 4218, 4220, 4221, 4223, 4226, 4272, 4285, 4300, 4301, 4305,
   4352, 4357, 4361, 4380, 4406, 4413, 4420, 4421, 4454, 4455, 4487, 4503,
   4505, 4507, 4509, 4510, 4516, 4519, 4520, 4551, 4553, 4554, 4558, 4560,
   4566, 4567, 4572, 4573, 4575, 4606, 4612, 4613, 4615, 4625, 4659, 4670,
   4677, 4678, 4680, 4694, 4695, 4700, 4702, 4703, 4710, 4719, 4722, 4723,
   4727, 4740, 4800, 4802, 4805, 4806, 4807, 4809, 4810, 4811, 4812, 4816,
   4817, 4818, 4819, 4850, 4852, 4860, 4861, 4870, 4871, 4875, 4879, 4883,
   4888, 4895, 5011, 5012, 5016, 5018, 5020, 5021, 5023, 5039, 5041, 5042,
   5043, 5046, 5047, 5048, 5049, 5050, 5051, 5052, 5062, 5064, 5065, 5066,
   5067, 5068, 5072, 5075, 5076, 5081, 5084, 5085, 5086, 5087, 5088, 5089,
   5091, 5092, 5095, 5098, 5107, 5108, 5109, 5114, 5115, 5118, 5125, 5127,
   5136, 5152, 5154, 5158, 5159, 5161, 5162, 5163, 5169, 5172, 5204, 5211,
   5212, 5214, 5222, 5223, 5231, 5233, 5237, 5244, 5253, 5254, 5259, 5260,
   5267, 5272, 5273, 5280, 5291, 5311, 5330, 5333, 5344, 5351, 5355, 5356,
   5357, 5371, 5373, 5416, 5417, 5419, 5422, 5434, 5453, 5455, 5501, 5521,
   5523, 5540, 5555, 5570, 5571, 5575, 5576, 5602, 5606, 5607, 5632, 5660,
   5680, 5723, 5730, 5731, 5732, 5733, 6000, 6007, 6009, 6010, 6011, 6012,
   6014, 6015, 6016, 6018, 6019, 6020, 6024, 6025, 6026, 6027, 6030, 6044,
   6051, 6052, 6053, 6054, 6055, 6056, 6058, 6059, 6064, 6065, 6066, 6069,
   6076, 6077, 6083, 6100, 6102, 6104, 6107, 6109, 6112, 6122, 6126, 6152,
   6153, 6154, 6155, 6159, 6160, 6163, 6164, 6166, 6167, 6169, 6170, 6171,
   6172, 6210, 6211, 6224, 6229, 6230, 6233, 6236, 6281, 6282, 6285, 6324,
   6326, 6327, 6330, 6356, 6369, 6430, 6442, 6450, 6532, 6639, 6713, 6721,
   6722, 7019, 7052, 7109, 7140, 7172, 7209, 7215, 7216, 7249, 7252, 7253,
   7256, 7307, 7310, 7315, 7320};


SEXP C_getTHEXXX(SEXP x) {
  errifNotTF(x, "x");
  const bool x_ = asLogical(x);
  if (x_) {
    SEXP ans = PROTECT(allocVector(STRSXP, N_THE_XXXS));
    for (int i = 0; i < N_THE_XXXS; ++i) {
      SET_STRING_ELT(ans, i, mkCharCE(THE_XXXs[i], CE_UTF8));
    }
    UNPROTECT(1);
    return ans;
  }
  SEXP ans = PROTECT(allocVector(INTSXP, N_THE_LOCALITIES));
  for (int i = 0; i < N_THE_LOCALITIES; ++i) {
    INTEGER(ans)[i] = THE_LOCALITIES[i] + 1;
  }
  UNPROTECT(1);
  return ans;
}


TrieNode *getNode(void) {
  TrieNode *pNode = (TrieNode *)malloc(sizeof(TrieNode));

  if (pNode) {
    pNode->isEndOfWord = false;
    for (int i = 0; i < ALPHABET_SIZE; i++) {
      pNode->children[i] = NULL;
    }
  }

  return pNode;
}

void freeTrie(TrieNode *root) {
  if (!root) return;

  for (int i = 0; i < ALPHABET_SIZE; i++) {
    freeTrie(root->children[i]);
  }

  free(root);
}

void insert(TrieNode *root, const char *key, int code) {
  TrieNode *pCrawl = root;

  for (int i = 0; key[i] != '\0'; i++) {
    unsigned int index = enc_alphabet(key[i]);
    if (!pCrawl->children[index]) {
      pCrawl->children[index] = getNode();
    }
    pCrawl = pCrawl->children[index];
  }

  pCrawl->isEndOfWord = true;
  pCrawl->code = code;  // Set the code for the end node
}

int search(TrieNode *root, const char *key) {
  TrieNode *pCrawl = root;

  for (int i = 0; key[i] != '\0'; i++) {
    unsigned int index = enc_alphabet(key[i]);

    if (!pCrawl->children[index]) {
      return -1;  // Word not found
    }
    pCrawl = pCrawl->children[index];
  }

  if (pCrawl != NULL && pCrawl->isEndOfWord) {
    return pCrawl->code;  // Return the code associated with the word
  }

  return -1;  // Word not found
}

void insert_all(TrieNode *root) {
  for (int i = 0; i < N_THE_XXXS; ++i) {
    insert(root, THE_XXXs[i], i + 1);
  }
}

int THE_xxx3(TrieNode *root, WordData * wd, unsigned char p_postcode /* problem postcode */) {
  if (p_postcode == 0) {
    return 0; // nothing to check, postcode known to be free of any THE street
  }
  const char *x = wd->x;
  int n_words = wd->n_words;

  for (int w = 0; w < n_words - 1; ++w) {
    int lhs = wd->lhs[w];
    int rhs = wd->rhs[w];

    // Check if the current word is 'THE'
    if (rhs - lhs != 3 || strncmp(x + lhs, "THE", 3) != 0) {
      continue;
    }
    char streetName[MAX_STREET_NAME_LEN] = {0}; // Buffer for street name
    int currentLength = 0; // Current length of streetName

    // Iterate over the following words to construct potential street names
    // Want the longest match. By observation, only those with 3 words or
    // fewer are among the THE_XXXs whose prefixes are also THE_XXXs
    int trie_codes[3] = {0};
    int j_t = 0;
    for (int i = w + 1; i < n_words; ++i) {
      int wordStart = wd->lhs[i];
      int wordEnd = wd->rhs[i];
      int wordLength = wordEnd - wordStart;

      // Check if adding the next word exceeds the buffer size
      if (currentLength + wordLength >= MAX_STREET_NAME_LEN - 1) {
        break; // Buffer would overflow, stop appending
      }
      // Append a space before the word if it's not the first word
      if (currentLength > 0) {
        // ++ accounts for the space in current length
        streetName[currentLength++] = ' ';
      }

      // Append the word to streetName
      strncat(streetName, x + wordStart, wordLength);
      currentLength += wordLength;

      // Use trie to check if this is a known street name
      int trie_code = search(root, streetName);
      if (trie_code != -1) {
        if (j_t < 3) {
          trie_codes[j_t] = trie_code;
        }
        if (p_postcode == 2) {
          // need to investigate whether the 'THE' has occurred because of
          // the street name or the locality. Note that 'THE' won't be triggered
          // on locality names that are not also street names.
          if (followed_by_STE_POSTCODE(i, wd)) {
            // likely not a correct code, but the locality we've picked up
            return j_t < 1 ? 0 : trie_codes[j_t - 1];
          }
          // we rely on the forward movement of the address. That is, our
          // algorithm will pick up street names before localities if
          // both appear, because streets appear to the left in Australian
          // addresses
        }
        ++j_t;
      }
    }
    if (j_t) {
      return trie_codes[j_t - 1];
    } else {
      return 0;
    }
  }

  return 0; // No match found
}

void memoize_trie_postcodes(void) {
  for (int p = 0; p < SUP_POSTCODES; ++p) {
    M_POSTCODE[p].THE_code = 0;
  }
  for (int p = 0; p < N_POSTCODES_WITH_THE_STREET_NAME; ++p) {
    M_POSTCODE[THE_POSTCODES_W_STREET_NAME[p]].THE_code = 1;
  }
  for (int p = 0; p < N_XXXPOSTCODE; ++p) {
    M_POSTCODE[THE_XXX_COMPLX[p].postcode].THE_code = 2;
  }
}

SEXP C_do_the_xxx(SEXP x, SEXP Postcode, SEXP Hash) {
  errIfNotStr(x, "x");
  const bool do_hash = asLogical(Hash);
  R_xlen_t N = xlength(x);
  const SEXP * xp = STRING_PTR(x);
  bool postcode_was_null = !isInteger(Postcode);
  const int * postcodep = postcode_was_null ? NULL : INTEGER(Postcode);
  if (!postcode_was_null) {
    errIfNotLen(Postcode, "Postcode", N);
  }
  TrieNode * root = getNode();
  if (root == NULL) {
    error("Unable to allocate TrieNode * root == NULL"); // # nocov
  }
  insert_all(root);

  // This memoization will make THE easier:
  // 0 = no THE to search for
  // 1 = THE to search for in the postcode, but no risk of mistaking for locality
  // 2 = THE in STREET NAME and LOCALITY
  unsigned char * problem_postcodes = calloc(SUP_POSTCODES, sizeof(char));
  if (problem_postcodes == NULL) {
    freeTrie(root);  // # nocov
    error("Unable to calloc problem postcodes."); // # nocov
  }
  for (int p = 0; p < N_POSTCODES_WITH_THE_STREET_NAME; ++p) {
    problem_postcodes[THE_POSTCODES_W_STREET_NAME[p]] = 1;
  }
  for (int p = 0; p < N_XXXPOSTCODE; ++p) {
    problem_postcodes[THE_XXX_COMPLX[p].postcode] = 2;
  }


  SEXP ans = PROTECT(allocVector(INTSXP, N));
  int * restrict ansp = INTEGER(ans);
  for (R_xlen_t i = 0; i < N; ++i) {
    if (xp[i] == NA_STRING) {
      ansp[i] = NA_INTEGER;
      continue;
    }
    const char * xi = CHAR(xp[i]);
    int ni = length(xp[i]);
    if (ni <= 4) {
      ansp[i] = 0;
      continue;
    }

    WordData wd = word_data(xi, ni); // 9M/s
    if (postcode_was_null) {
      ansp[i] = THE_xxx3(root, &wd, 2); // pessimistic postcode
      continue;
    }
    unsigned int postcode = postcodep[i];
    // possibly NA
    if ((postcode - 800u) > SUP_POSTCODE_) {
      ansp[i] = THE_xxx3(root, &wd, 2);
      continue;
    }
    ansp[i] = THE_xxx3(root, &wd, problem_postcodes[postcode]);
  }
  freeTrie(root);
  free(problem_postcodes);
  if (do_hash) {
    for (R_xlen_t i = 0; i < N; ++i) {
      int anspi = ansp[i];
      if (anspi > 0 && anspi < N_THE_XXXS) {
        ansp[i] = H_THE_XXX[anspi - 1];
      }
    }
  }
  UNPROTECT(1);
  return ans;
}

void prepend_THE(char SN[MAX_STREET_NAME_LEN], const char * suffix) {
  SN[0] = 'T';
  SN[1] = 'H';
  SN[2] = 'E';
  SN[3] = ' ';
  int j = 0;
  while (1) {
    SN[j + 4] = suffix[j];
    if (suffix[j] == '\0') {
      break;
    }
    ++j;
  }
}


