Skip to content

Instantly share code, notes, and snippets.

@marirs
Last active January 2, 2020 05:31
Show Gist options
  • Save marirs/3e519e59d4c97986c40cfd3d84397014 to your computer and use it in GitHub Desktop.
Save marirs/3e519e59d4c97986c40cfd3d84397014 to your computer and use it in GitHub Desktop.
Extract a Domain name from the url and also check if the domain was IP address or domain name
#!/usr/bin/env python3
from collections import namedtuple
from itertools import groupby
from fastnumbers import isint
from ipaddress import ip_address
def is_ip(test_string):
"""Checks whether a given string contains IP address or IP address with port
:param test_string: The string to test for IP
:return: a tuple of IP address & Port and IP Version if ip_address else None
eg: (ip='', port='', version=) or False
"""
if not isinstance(test_string, str):
return False
IP = namedtuple('Ip', ['ip', 'port', 'version'])
result = False
try:
# if it worked here then
# -> Either IPv6 or IPv4 without a :port_number
# and also yes its valid IP
result = ip_address(test_string.strip().strip('[').strip(']').strip())
result = IP(ip=str(test_string), port="", version=result.version)
except ValueError:
# try another method by splitting
# using ':' assuming we might have a port number
ip, port, *_ = (test_string.rsplit(':', 1) + [None])
ip = ip.strip('[').strip(']') # just in case if ipv6 with port was present
try:
# if it works here then
# we have a valid IPv6 or IPv4 with mostly a port number
result = ip_address(ip)
if isint(port):
# port number present
result = IP(ip=str(ip), port=str(port), version=result.version)
else:
# no port number present, but there was ':' with something after it
result = IP(ip=str(ip), port="", version=result.version)
except ValueError:
# ain't an ip
pass
return result
def extract_domain(url):
"""Extract the domain name from the given URL
:param url: the url to extract the domain from - string
:return: a tuple of type and result
eg: (url_type='domain', result='domain.tld') or (url_type='ip' result='ip:port') of None
"""
if not isinstance(url, str):
return None
R = namedtuple('Result', ['url_type', 'result'])
dom = url.strip().split("//")[-1]
dom = [''.join(g) for k, g in groupby(dom, '/?'.__contains__) if not k][0]
dom = dom.strip('[').strip('<').strip('(').strip() # just in case if it had to start with any of this
dom = dom.split('.', 1)[-1].strip() if dom.startswith('www') else dom
dom = dom.strip(']').strip('>').strip(')').strip() # just in case if it had to end with any of this
is_ip_dom = is_ip(dom)
if is_ip_dom:
# its a ip-address
if is_ip_dom.version == 6 and is_ip_dom.port:
ip = f'[{is_ip_dom.ip}]:{is_ip_dom.port}'
elif is_ip_dom.version == 6 and not is_ip_dom.port:
ip = is_ip_dom.ip
else:
ip = f'{is_ip_dom.ip}:{is_ip_dom.port}' if is_ip_dom.port else f'{is_ip_dom.ip}'
result = R(url_type='ip', result=f'{ip}')
else:
# assuming web-address
result = R(url_type='domain', result=f'{dom}')
return result
@marirs
Copy link
Author

marirs commented Dec 21, 2019

>>> extract_domain('2605:2700:0:3::4713:93e3/something')
Result(url_type='ip', result='2605:2700:0:3::4713:93e3')
>>> extract_domain('[2605:2700:0:3::4713:93e3]:80/something')
Result(url_type='ip', result='[2605:2700:0:3::4713:93e3]:80')
>>> extract_domain('https://[2605:2700:0:3::4713:93e3]:80/something')
Result(url_type='ip', result='[2605:2700:0:3::4713:93e3]:80')
>>> extract_domain('https://www.name.com')
Result(url_type='domain', result='name.com')
>>> extract_domain('name.com')
Result(url_type='domain', result='name.com')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment