자바(Java)/어플리케이션

웹페이지에서 이메일주소 추출하는 자바 프로그램

xemaker 2020. 7. 7. 17:55
package getemail;

import java.io.IOException;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class GetEmailAddr {
	
	static String url="https://newsis.com/view/?id=NISX20200623_0001068975&cID=10101&pID=10100";
	
	public static void fillEmailsHashSet(String line,HashSet<String> container){
        Pattern p = Pattern.compile("([\\w\\-]([\\.\\w])+[\\w]+@([\\w\\-]+\\.)+[A-Za-z]{2,4})");
        Matcher m = p.matcher(line);
        while(m.find()) {
            container.add(m.group(1));
        }
	}
	
	public static void main(String[] args) throws IOException {
		Connection.Response response = Jsoup.connect(url)
                .method(Connection.Method.GET)
                .execute();
		Document d = response.parse();
		//System.out.println(d);
		String s=d.toString();
		
		HashSet<String> hs = new HashSet<>();
		fillEmailsHashSet(s, hs);
		for (String string : hs) {
			System.out.println(string);
		}
	}
}