PowerShell: Extract Root Domain from URL

# getRootDomainFromUrl.ps1
# This solves the problem of extracting the root domain from a URL
# The challenge here is to incorporate semantics of incorporating the array of TLDs during extraction

$url='https://intranet.kimconnect.co.uk'
$domainsDictionary=@{
    '.ac'='Ascension Island'
    '.ac.uk'='Second-level domain for United Kingdom (.uk) and most often used for academic sites.'
    '.ad'='Andorra'
    '.ae'='United Arab Emirates'
    '.aero'='Air Transportation Industry'
    '.af'='Afghanistan'
    '.ag'='Antigua and Barbuda'
    '.ai'='Anguilla'
    '.al'='Albania'
    '.am'='Armenia'
    '.an'='Netherlands Antilles'
    '.ao'='Angola'
    '.aq'='Antarctica'
    '.ar'='Argentina'
    '.arpa'='Internet infrastructure TLD'
    '.as'='American Somoa'
    '.asia'='Asian countries'
    '.at'='Austria'
    '.au'='Australia'
    '.aw'='Aruba'
    '.ax'='Aland Islands - part of Finland'
    '.az'='Azerbaijan'
    '.ba'='Bosnia and Herzegovinia'
    '.bb'='Barbados'
    '.bd'='Bangladesh'
    '.be'='Belgium'
    '.bf'='Burkina Faso'
    '.bg'='Bulgaria'
    '.bh'='Bahrain'
    '.bi'='Burundi'
    '.biz'='United States business site.'
    '.bj'='Benin'
    '.bm'='Bermuda'
    '.bn'='Brunei Darussalam'
    '.bo'='Bolivia'
    '.br'='Brazil'
    '.bs'='Bahamas'
    '.bt'='Bhutan'
    '.bv'='Bouvet Island'
    '.bw'='Botswana'
    '.by'='Belarus and Byelorussia'
    '.bz'='Belize'
    '.ca'='Canada'
    '.cat'='Catalan'
    '.cc'='Cocos Islands - Keelings'
    '.cd'='Democratic Republic of the Congo'
    '.cf'='Central African Republic'
    '.cg'='Congo'
    '.ch'='Switzerland'
    '.ci'='Cote dIvoire'
    '.ck'='Cook Islands'
    '.cl'='Chile'
    '.cm'='Cameroon'
    '.cn'='China'
    '.co'='Colombia'
    '.co.uk'='Second-level domain for United Kingdom (.uk) and most often used for commercial sites.'
    '.com'='United States commercial website.'
    '.coop'='Business cooperatives and organizations.'
    '.cr'='Costa Rica'
    '.cs'='Former Czechoslovakia'
    '.cu'='Cuba'
    '.cv'='Cape Verde'
    '.cw'='Curaçao'
    '.cx'='Christmas Island'
    '.cy'='Cyprus'
    '.cz'='Czech Republic'
    '.dd'='East Germany'
    '.de'='Germany'
    '.dj'='Djibouti'
    '.dk'='Denmark'
    '.dm'='Dominica'
    '.do'='Dominican Republic'
    '.dz'='Algeria'
    '.ec'='Ecuador'
    '.edu'='United States education site.'
    '.ee'='Estonia'
    '.eg'='Egypt'
    '.eh'='Western Sahara'
    '.er'='Eritrea'
    '.es'='Spain'
    '.et'='Ethiopia'
    '.eu'='European Union'
    '.fi'='Finland'
    '.firm'='Internet site for business or firm.'
    '.fj'='Fiji'
    '.fk'='Falkland Islands and Malvinas'
    '.fm'='Micronesia'
    '.fo'='Faroe Islands'
    '.fr'='France'
    '.fx'='Metropolitan France'
    '.ga'='Gabon'
    '.gb'='Great Britain'
    '.gd'='Grenada'
    '.ge'='Georgia'
    '.gf'='French Guiana'
    '.gg'='Guernsey'
    '.gh'='Ghana'
    '.gi'='Gibraltar'
    '.gl'='Greenland'
    '.gm'='Gambia'
    '.gn'='Guinea'
    '.gov'='United States Government site.'
    '.gov.uk'='Second-level domain for United Kingdom (.uk) and most often used for government sites.'
    '.gp'='Guadeloupe'
    '.gq'='Equatorial Guinea'
    '.gr'='Greece'
    '.gs'='South Georgia and South Sandwich Islands.'
    '.gt'='Guatemala'
    '.gu'='Guam'
    '.gw'='Guinea-Bissau'
    '.gy'='Guyana'
    '.hk'='Hong Kong'
    '.hm'='Heard and McDonald Islands'
    '.hn'='Honduras'
    '.hr'='Croatia/Hrvatska'
    '.ht'='Haiti'
    '.hu'='Hungary'
    '.id'='Indonesia'
    '.ie'='Ireland'
    '.il'='Israel'
    '.im'='Isle of Man'
    '.in'='India'
    '.info'='United States information site with no restrictions.'
    '.int'='International institute site.'
    '.io'='British Indian Ocean Territory'
    '.iq'='Iraq'
    '.ir'='Iran'
    '.is'='Iceland'
    '.it'='Italy'
    '.je'='Jersey - Channel Islands a UK dependency'
    '.jm'='Jamaica'
    '.jo'='Jordan'
    '.jobs'='Job related sites.'
    '.jp'='Japan'
    '.ke'='Kenya'
    '.kg'='Kyrgyzstan'
    '.kh'='Cambodia'
    '.ki'='Kiribati'
    '.km'='Comoros'
    '.kn'='Saint Kitts and Nevis'
    '.kp'='North Korea'
    '.kr'='South Korea'
    '.kw'='Kuwait'
    '.ky'='Cayman Islands'
    '.kz'='Kazakhstan'
    '.la'='Laos'
    '.lb'='Lebanon'
    '.lc'='Saint Lucia'
    '.li'='Liechtenstein'
    '.lk'='Sri Lanka'
    '.lr'='Liberia'
    '.ls'='Lesotho'
    '.lt'='Lithuania'
    '.ltd.uk'='Second-level domain for United Kingdom (.uk) and most often used for limited company sites.'
    '.lu'='Luxembourg'
    '.lv'='Latvia'
    '.ly'='Libya'
    '.ma'='Morocco'
    '.mc'='Monaco'
    '.md'='Moldova'
    '.me'='Montenegro'
    '.me.uk'='Second-level domain for United Kingdom (.uk) and most often used for personal sites.'
    '.mg'='Madagascar'
    '.mh'='Marshall Islands'
    '.mil'='United States Military site.'
    '.mk'='Macedonia'
    '.ml'='Mali'
    '.mm'='Myanmar'
    '.mn'='Mongolia'
    '.mo'='Macau'
    '.mobi'='Mobile devices'
    '.mod.uk'='Second-level domain for United Kingdom (.uk) and most often used for military of defence sites.'
    '.mp'='Northern Mariana Islands'
    '.mq'='Martinique'
    '.mr'='Mauritania'
    '.ms'='Montserrat'
    '.mt'='Malta'
    '.mu'='Mauritius'
    '.museum'='Worldwide museums'
    '.mv'='Maldives'
    '.mw'='Malawi'
    '.mx'='Mexico'
    '.my'='Malaysia'
    '.mz'='Mozambique'
    '.na'='Namibia'
    '.name'='Individual and family names'
    '.nato'='NATO site.'
    '.nc'='New Caledonia'
    '.ne'='Niger'
    '.net'='United States Internet administrative site. See the .net definition for alternative definitions.'
    '.net.uk'='Second-level domain for United Kingdom (.uk) and most often used for network company sites.'
    '.nf'='Norfolk Island'
    '.ng'='Nigeria'
    '.nhs.uk'='Second-level domain for United Kingdom (.uk) and most often used for national health service institutions'
    '.ni'='Nicaragua'
    '.nl'='Netherlands'
    '.no'='Norway'
    '.nom'='Personal site'
    '.np'='Nepal'
    '.nr'='Nauru'
    '.nt'='Neutral Zone'
    '.nu'='Niue'
    '.nz'='New Zealand'
    '.om'='Oman'
    '.org'='Organization (non-profit) sites.'
    '.org.uk'='Second-level domain for United Kingdom (.uk) and most often used for non-profit sites.'
    '.pa'='Panama'
    '.pe'='Peru'
    '.pf'='French Polynesia'
    '.pg'='Papua New Guinea'
    '.ph'='Philippines'
    '.pk'='Pakistan'
    '.pl'='Poland'
    '.plc.uk'='Second-level domain for United Kingdom (.uk) and most often used for public limited company sites.'
    '.pm'='St. Pierre and Miquelon'
    '.pn'='Pitcairn'
    '.post'='sTLD (sponsored top-level domain) available exclusively for the postal sector.'
    '.pr'='Puerto Rico'
    '.pro'='United States professional site for accountants'
    '.ps'='Palestinian territories'
    '.pt'='Portugal'
    '.pw'='Palau'
    '.py'='Paraguay'
    '.qa'='Qatar'
    '.re'='Reunion'
    '.ro'='Romania'
    '.rs'='Republic of Serbia'
    '.ru'='Russian Federation'
    '.rw'='Rwanda'
    '.sa'='Saudi Arabia'
    '.sb'='Solomon Islands'
    '.sc'='Seychelles'
    '.sch.uk'='Second-level domain for United Kingdom (.uk) and most often used for school sites.'
    '.sd'='Sudan'
    '.se'='Sweden'
    '.sg'='Singapore'
    '.sh'='Saint Helena'
    '.si'='Slovenia'
    '.sj'='Svalbard and Jan Mayen Islands'
    '.sk'='Slovakia'
    '.sl'='Sierra Leone'
    '.sm'='San Marino'
    '.sn'='Senegal'
    '.so'='Somalia'
    '.sr'='Suriname'
    '.ss'='South Sudan'
    '.st'='Sao Tome and Principe'
    '.store'='United States domain for retail business site.'
    '.su'='Former USSR'
    '.sv'='El Salvador'
    '.sy'='Syria'
    '.sz'='Swaziland'
    '.tc'='Turks and Caicos Islands'
    '.td'='Chad'
    '.tel'='Internet communication services'
    '.tf'='French Southern Territory and Antarctic Lands.'
    '.tg'='Togo'
    '.th'='Thailand'
    '.tj'='Tajikistan'
    '.tk'='Tokelau'
    '.tl'='East Timor'
    '.tm'='Turkmenistan'
    '.tn'='Tunisia'
    '.to'='Tonga'
    '.tp'='East Timor'
    '.tr'='Turkey'
    '.travel'='Travel related sites.'
    '.tt'='Trinidad and Tobago'
    '.tv'='Tuvalu'
    '.tw'='Taiwan'
    '.tz'='Tanzania'
    '.ua'='Ukraine'
    '.ug'='Uganda'
    '.uk'='United Kingdom'
    '.um'='United States minor outlying islands.'
    '.us'='United States'
    '.uy'='Uruguay'
    '.uz'='Uzbekistan'
    '.va'='Vatican City State'
    '.vc'='Saint Vincent and the Grenadines'
    '.ve'='Venezuela'
    '.vg'='British Virgin Islands'
    '.vi'='United States Virgin Islands'
    '.vn'='Vietnam'
    '.vu'='Vanuatu'
    '.web'='Internet site about the World Wide Web.'
    '.wf'='Wallis and Futuna Islands'
    '.ws'='Samoa'
    '.xxx'='Adult entertainment domain'
    '.ye'='Yemen'
    '.yt'='Mayotte'
    '.yu'='Yugoslavia'
    '.za'='South Africa'
    '.zm'='Zambia'
    '.zr'='Zaire'
    '.zw'='Zimbabwe'
}

function getRootDomain($url){
    $domain=([uri]$url).Host
    $matchedTwoDottedDomain=$domainsDictionary.keys|?{$domain -match "$_$"}|?{$_ -match '\.\w+\.'}
    $rootDomain=if(!$matchedTwoDottedDomain){$domain.split('.')[-2..-1] -join '.'}
                else{$domain.split('.')[-3..-1] -join '.'}
    return $rootDomain
}

getRootDomain $url

Leave a Reply

Your email address will not be published. Required fields are marked *