ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

jsoup connect

2021-11-01 09:00:17  阅读:194  来源: 互联网

标签:String url URL jsoup handler connect spec


jsoup  connect

 

/**
 * Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML page.
 * <p>
 * Use examples:
 * <ul>
 *  <li><code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code></li>
 *  <li><code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();</code></li>
 * </ul>
 * @param url URL to connect to. The protocol must be {@code http} or {@code https}.
 * @return the connection. You can add data, cookies, and headers; set the user-agent, referrer, method; and then execute.
 */
public static Connection connect(String url) {
    return HttpConnection.connect(url);
}

 

 

public static Connection connect(String url) {
    Connection con = new HttpConnection();
    con.url(url);
    return con;
}

 

 

public Connection url(String url) {
    Validate.notEmpty(url, "Must supply a valid URL");
    try {
        req.url(new URL(encodeUrl(url)));
    } catch (MalformedURLException e) {
        throw new IllegalArgumentException("Malformed URL: " + url, e);
    }
    return this;
}

 

 

public T url(URL url) {
    Validate.notNull(url, "URL must not be null");
    this.url = url;
    return (T) this;
}

 

/**
* Creates a URL by parsing the given spec with the specified handler
* within a specified context. If the handler is null, the parsing
* occurs as with the two argument constructor.
*
* @param      context   the context in which to parse the specification.
* @param      spec      the {@code String} to parse as a URL.
* @param      handler   the stream handler for the URL.
* @exception  MalformedURLException  if no protocol is specified, or an
*               unknown protocol is found, or {@code spec} is {@code null}.
* @exception  SecurityException
*        if a security manager exists and its
*        {@code checkPermission} method doesn't allow
*        specifying a stream handler.
* @see        java.net.URL#URL(java.lang.String, java.lang.String,
*                  int, java.lang.String)
* @see        java.net.URLStreamHandler
* @see        java.net.URLStreamHandler#parseURL(java.net.URL,
*                  java.lang.String, int, int)
*/
public URL(URL context, String spec, URLStreamHandler handler)
throws MalformedURLException
{
    String original = spec;
    int i, limit, c;
    int start = 0;
    String newProtocol = null;
    boolean aRef=false;
    boolean isRelative = false;

    // Check for permission to specify a handler
    if (handler != null) {
        SecurityManager sm = System.getSecurityManager();
        if (sm != null) {
            checkSpecifyHandler(sm);
        }
    }

    try {
        limit = spec.length();
        while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
            limit--;        //eliminate trailing whitespace
        }
        while ((start < limit) && (spec.charAt(start) <= ' ')) {
            start++;        // eliminate leading whitespace
        }

        if (spec.regionMatches(true, start, "url:", 0, 4)) {
            start += 4;
        }
        if (start < spec.length() && spec.charAt(start) == '#') {
            /* we're assuming this is a ref relative to the context URL.
            * This means protocols cannot start w/ '#', but we must parse
            * ref URL's like: "hello:there" w/ a ':' in them.
            */
            aRef=true;
        }
        for (i = start ; !aRef && (i < limit) &&((c = spec.charAt(i)) != '/') ; i++) {
            if (c == ':') {

                String s = spec.substring(start, i).toLowerCase();
                if (isValidProtocol(s)) {
                    newProtocol = s;
                    start = i + 1;
                }
                break;
            }
        }

        // Only use our context if the protocols match.
        protocol = newProtocol;
        if ((context != null) && ((newProtocol == null) ||
        newProtocol.equalsIgnoreCase(context.protocol))) {
            // inherit the protocol handler from the context
            // if not specified to the constructor
            if (handler == null) {
                handler = context.handler;
            }

            // If the context is a hierarchical URL scheme and the spec
            // contains a matching scheme then maintain backwards
            // compatibility and treat it as if the spec didn't contain
            // the scheme; see 5.2.3 of RFC2396
            if (context.path != null && context.path.startsWith("/"))
            {
                newProtocol = null;
            }

            if (newProtocol == null) {
                protocol = context.protocol;
                authority = context.authority;
                userInfo = context.userInfo;
                host = context.host;
                port = context.port;
                file = context.file;
                path = context.path;
                isRelative = true;
            }
        }

        if (protocol == null) {
            throw new MalformedURLException("no protocol: "+original);
        }

        // Get the protocol handler if not specified or the protocol
        // of the context could not be used
        if (handler == null &&(handler = getURLStreamHandler(protocol)) == null) {
            throw new MalformedURLException("unknown protocol: "+protocol);
        }

        this.handler = handler;

        i = spec.indexOf('#', start);
        if (i >= 0) {
            ref = spec.substring(i + 1, limit);
            limit = i;
        }

        /*
        * Handle special case inheritance of query and fragment
        * implied by RFC2396 section 5.2.2.
        */
        if (isRelative && start == limit) {
        query = context.query;
            if (ref == null) {
                ref = context.ref;
            }
        }

        handler.parseURL(this, spec, start, limit);

    } catch(MalformedURLException e) {
        throw e;
    } catch(Exception e) {
        MalformedURLException exception = new MalformedURLException(e.getMessage());
        exception.initCause(e);
        throw exception;
    }
}

 

 

####################################

标签:String,url,URL,jsoup,handler,connect,spec
来源: https://www.cnblogs.com/herd/p/15491963.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有