English | 简体中文 | 繁體中文 | Русский язык | Français | Español | Português | Deutsch | 日本語 | 한국어 | Italiano | بالعربية

Collection of regular expressions for matching website URL

DNS specifications state that the labels in the domain name are composed of English letters and numbers, and each label does not exceed63Characters, and does not distinguish between uppercase and lowercase letters. The labels in the domain name, except for hyphens (-Characters, and cannot use other punctuation symbols. The lowest-level domain is written on the leftmost side, while the highest-level domain is written on the rightmost side. The total number of characters in a complete domain composed of multiple labels does not exceed255Each character. Therefore, the regular expression for validating the URL can be as follows

Method one:

function checkUrl(urlString){
if(urlString!=""){
var reg=/(http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?/;
if(!reg.test(urlString)){
alert("Is this not a correct URL? Please check it carefully");
						}
					}
}

Method two: recommended

function IsURL(str_url){
 var strRegex = "^((https|http|ftp|rtsp|mms)#63;://)" 
 + "#63;(([0-9a-z_!~*()'&=+$%-]+: )#63;[0-9a-z_!~*()'&=+$%-]+@)?" //ftp user@ 
  + "(([0-9]{1,3}\.)3}[0-9]{1,3"} // URL in IP format- 199.194.52.184 
  + "|" // allow IP and DOMAIN (domain name)
  + "([0-9a-z_!~*'()',-]+\.)*" // domain name- www. 
  + "([0-9a-z][0-9a-z-]{0,61})#63;[0-9a-z]\." // second level domain 
  + "[a-z]{2,6})" // first level domain- .com or .museum 
  + "(:[0-9]{1,4})#63;" // port- :80 
  + "((/?)|" // a slash isn't required if there is no file name 
  + "(/][0-9a-z_!~*'().;?:@&=+$,%#-]+)+/?)$"; 
  var re=new RegExp(strRegex); 
 //re.test()
  if (re.test(str_url)){
  return (true); 
  }else{ 
  return (false); 
  }
 }
var testUrl;
testUrl="http://harveyzeng.iteye.com/blog/1776991";
//var testUrl="https://www.oldtoolbag.com/article/1.htm";
alert(IsURL(testUrl));

Just found a code for a great multi-functional test function:

<script>
/**
 * Regular expression to determine if the URL is valid
 */
(function(){
  "use strict";
  var urlDict=[
    //Bad Case
    'www.baidu.com',           //Conventional URLs, addresses without protocol headers
    'w.baidu.com',            //Conventional URLs, short subdomain
    'baidu.com',             //conventional URL, only the main domain name
    'test.com',              //Unconventional legal URLs, Chinese domain names are not included in the reference list
    '1.2',                //
    ' WWWW '              //invalid string
    '111test',              //invalid string
    //correct case
    'http://baidu.com',          //conventional URL, only the main domain name
    'http://www.baidu.com',        //conventional URL, with subdomain
    'https://www.baidu.com/',       //conventional URL, using https protocol header, with root directory
    'http://www.baidu.com/api',      //conventional URL, there are resources under a primary directory
    'http://www.subdomain.baidu.com/index/subdir',   //conventional URL, multiple level subdomain, multiple level directory
    'http://www.www.subdomain.baidu.com/index/subdir/',//conventional URL, multiple level subdomain, multiple level directory, directory address closed
    'http://io.io'            //very unconventional URL, multiple level subdomain, multiple level directory, directory address closed
  ];
  // suggested regular expression
  function isURL(str){
    return !!str.match(/(((^https#63;:(?:\/\/)?);(?:[-;:&=+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=+\$,\w]+@)[A-Za-z0-9.-]+)((#63;:\/[\+~%\/.\w-_]*)?\#63;#63;(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))#63;)$/g);
  }
  // simple version of a terrible regular expression written by who knows who
  function badRegFn(str){
    return !!str.match(/(http[s]#63;|ftp):\/\/[^\/]+?\..+\w$/g);
  }
	//jb51
	function IsURL(str_url){
   var strRegex = "^((https|http|ftp|rtsp|mms)#63;://)" 
   + "#63;(([0-9a-z_!~*()'&=+$%-]+: )#63;[0-9a-z_!~*()'&=+$%-]+@)?" //ftp user@ 
      + "(([0-9]{1,3}\.)3}[0-9]{1,3"} // URL in IP format- 199.194.52.184 
      + "|" // allow IP and DOMAIN (domain name)
      + "([0-9a-z_!~*'()',-]+\.)*" // domain name- www. 
      + "([0-9a-z][0-9a-z-]{0,61})#63;[0-9a-z]\." // second level domain 
      + "[a-z]{2,6})" // first level domain- .com or .museum 
      + "(:[0-9]{1,4})#63;" // port- :80 
      + "((/?)|" // a slash isn't required if there is no file name 
      + "(/][0-9a-z_!~*'().;?:@&=+$,%#-]+)+/?)$"; 
      var re=new RegExp(strRegex); 
   //re.test()
      if (re.test(str_url)){
        return (true); 
      }else{ 
        return (false); 
      }
    }
  // Test case coverage
  (function(){
    var ret={}; 
    var collect=function(link){
      var obj={},fnList=[isURL,badRegFn,IsURL];
      for(var i=0,j=fnList.length;i<j;i++{
        var fn=fnList[i];
        obj[fn.name]=fn.call(null,link);
      }
      return obj;
    };
    for(var i=0,j=urlDict.length;i<j;i++{
      ret[urlDict[i]]=collect(urlDict[i]);
    }
    console.log(ret),console.table(ret);
  }());
}());
</script>

After running, check the effect through the F12View effect

The main introduction above is about the writing style and judgment method of js functions, below are some regular expressions about verifying website addresses compiled by the editor for your reference

Regular Expression (http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?
Match http://regxlib.com/Default.aspx | http://electronics.cnet.com/electronics/0-6342366-8-8994967-1.html
Mismatch www.yahoo.com

Regular Expression ^\\{2])[\w-]+\\(([\w-][\w-\s]*[\w-]+[$$]?$)|([\w-[$$]?$))
Match \\server\service | \\server\my service | \\serv_001\service$
Mismatch \\my server\service | \\server\ service | \\server$\service

Regular Expression ^(http|https|ftp)\://-zA-Z0-9,-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)?((25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.[a-zA-Z]{2,4))(\:[0-9]+)?(/^[^/][a-zA-Z0-9.\,\?\'\\/\+&%\$#\=~_\-@]*)*$
Match http://www.sysrage.net | https://64.81.85.161/site/file.php?cow=moo's |ftp://user:[email protected]:123
Mismatch sysrage.net

Regular Expression ^([a-zA-Z]\:|\\\\[^\/\\:*?"<>|]+)(\\[^\/\\:*?"<>|]+)(\\[^\/\\:*?"<>|]+)+(\.[^\/\\:*?"<>|]+)$
Match c:\Test.txt | \\server\shared\Test.txt | \\server\shared\Test.t
Mismatch c:\Test | \\server\shared | \\server\shared\Test.?

Regular Expression ^(http|https|ftp)\://-zA-Z0-9,-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25][0-5]|2][0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9.\,\?\'\\\+&%\$#\=~_\-]+))*$
Match http://site.com/dir/file.php?var=moo | https://localhost |ftp://user:[email protected]:21/file/dir
Mismatch site.com | http://site.com/dir//

Regular Expression ^([a-zA-Z]\:)(\\[^\\/:*?<>"|]*?<![ ]))*(\.[a-zA-Z]{2,6})$
Match C:\di___r\fi_sysle.txt | c:\dir\filename.txt
Mismatch c:\dir\file?name.txt

Regular Expression ^([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}$
Match regexlib.com | this.is.a.museum | 3com.com
Mismatch notadomain-.com | helloworld.c | .oops.org

Regular Expression ^(((ht|f)tp(s?))\://)?(www.|[a-zA-Z].)[a-zA-Z0-9\-]+(com|edu|gov|mil|net|org|biz|info|name|museum|us|ca|uk)(\:[0-9]+)*(/($|[a-zA-Z0-9.\,\;\?\'\\\+&%\$#\=~_\-]+))*$
Match www.blah.com:8103 | www.blah.com/blah.asp?sort=ASC |www.blah.com/blah.htm#blah
Mismatch www.state.ga | http://www.jb51.ru

Regular Expression \b(([\w-]+://?|www[.])[^\s()<>]+?:\([\w\d]+\)|([^[:punct:]\s]|/))
Match http://jb51.net/blah_blah | http://jb51.net/blah_blah/ | (Something like http://jb51.net/blah_blah) | http://jb51.net/blah_blah_(wikipedia) | (Something like http://jb51.net/blah_blah_(wikipedia)) | http://jb51.net/blah_blah. |http://jb51.net/blah_blah/. | <http://jb51.net/blah_blah> | <http://jb51.net/blah_blah/>| http://jb51.net/blah_blah, | http://www.example.com/wpstyle/?p=364. | http://?df.ws/123 | rdar://1234 | rdar:/1234 | http://userid:[email protected]:8080 |http://[email protected] | http://[email protected]:8080 |http://userid:[email protected]
Mismatch no_ws.example.com | no_proto_or_ws.com | /relative_resource.php

You May Also Like