자바(Java)/어플리케이션
웹페이지에서 이메일주소 추출하는 자바 프로그램
xemaker
2020. 7. 7. 17:55
package getemail;
import java.io.IOException;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class GetEmailAddr {
static String url="https://newsis.com/view/?id=NISX20200623_0001068975&cID=10101&pID=10100";
public static void fillEmailsHashSet(String line,HashSet<String> container){
Pattern p = Pattern.compile("([\\w\\-]([\\.\\w])+[\\w]+@([\\w\\-]+\\.)+[A-Za-z]{2,4})");
Matcher m = p.matcher(line);
while(m.find()) {
container.add(m.group(1));
}
}
public static void main(String[] args) throws IOException {
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.GET)
.execute();
Document d = response.parse();
//System.out.println(d);
String s=d.toString();
HashSet<String> hs = new HashSet<>();
fillEmailsHashSet(s, hs);
for (String string : hs) {
System.out.println(string);
}
}
}