티스토리 뷰

navernews_filesystem.zip
0.95MB
navernews.zip
0.95MB
Jdbc.java
0.01MB
NaverNews.java
0.00MB
jsoup-1.16.1.jar
0.42MB
json-simple-1.1.1.jar
0.02MB
mariadb-java-client-2.4.0.jar
0.56MB

자바로 네이버 지도 파싱 후 라이믹스 게시판에 넣기를 해보겠습니다.

우선 간단하게 1건만 라이믹스 게시판에 insert 하는 코드 입니다.

 

 

 

 

package naver_news;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;


public class Jdbc {
	
	final static int MODULE_SRL = 52;
	final String DRIVER = "org.mariadb.jdbc.Driver";
	final String DB_IP = "localhost";
	final String DB_PORT = "3306";
	final String DB_NAME = "rx";
	final String DB_USER_ID = "rx";
	final String DB_PW = "YOUR_PASSWORD";
	final String DB_URL = 
			"jdbc:mariadb://" + DB_IP + ":" + DB_PORT + "/" + DB_NAME;
	
	List<String> listNick=null;
	
	public Jdbc() {
		listNick=new ArrayList<>();
		listNick.add("홍길동");
		listNick.add("성춘향");
		listNick.add("이몽룡");
	}
	
	public Connection getConn(){
		Connection conn = null;
		try {
			Class.forName(DRIVER);
			conn = DriverManager.getConnection(DB_URL, DB_USER_ID, DB_PW);
			if (conn != null) {
				System.out.println("DB연결 성공");
			}
		} catch (ClassNotFoundException e) {
			System.out.println("클래스 없음 오류");
			e.printStackTrace();
		} catch (SQLException e) {
			System.out.println("DB SQL 에러");
			e.printStackTrace();
		}
		return conn;
	}
	
	public int getSeq() {
		Connection conn = null;
		conn = getConn();
		int seq = 0;
		PreparedStatement pstmt = null;
		ResultSet rs = null;
		try {
			String sql = "insert into rx_sequence values(?)";
			//pstmt = conn.prepareStatement(sql);
			pstmt = conn.prepareStatement(sql, Statement.RETURN_GENERATED_KEYS);
			pstmt.setString(1, "0");
			

					
			pstmt.executeUpdate();
			rs = pstmt.getGeneratedKeys();
			while (rs.next()) {
				seq = rs.getInt(1);
			}
		} catch (Exception e) {
			System.out.println("error: " + e);
		} finally {
			try {
				if (rs != null) { rs.close(); }
				if (pstmt != null) { pstmt.close();	}
				if (conn != null) {	conn.close(); }
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
		return seq;
	}
	
	public void insertDb(String tit, String cont) {
		Connection conn = null;
		PreparedStatement pstmt = null;
		ResultSet rs = null;
		
		try {
			LocalDateTime now=LocalDateTime.now();
			String date=now.format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
			
			int document_srl = getSeq();
			System.out.println("document_srl="+document_srl);
			int module_srl = MODULE_SRL;
			String lang_code = "ko";
			String is_notice = "N";
			String title = tit;
			String title_bold = "N";
			String title_color = "N";
			String content = cont;
			
			Random random = new Random();
			int i = random.nextInt(listNick.size());
			String nick=listNick.get(i);

			String user_name = nick;
			String nick_name = nick;
			int member_srl = 4;
			
			String email_address = "jung945@hanmail.net";
			String homepage = "";
			String ipaddress = "127.0.0.1";
			String regdate = date;
			String last_update = date;

			int list_order = -document_srl;
			int update_order = -document_srl;
			
			String allow_trackback = "N";
			String notify_message = "N";
			String status = "PUBLIC";
			String comment_status = "ALLOW";
		
			conn = getConn();
			
			String sql = "insert into rx_documents (document_srl, module_srl, lang_code, is_notice, title"
					+ ", title_bold, title_color, content, user_name, nick_name"
					+ ", member_srl, email_address, homepage, ipaddress, list_order"
					+ ", regdate, last_update, update_order, allow_trackback, notify_message"
					+ ", status, comment_status)"
	                     + " values("
	                     + "?,?,?,?,?"
	                     + ",?,?,?,?,?"
	                     + ",?,?,?,?,?"
	                     + ",?,?,?,?,?"
	                     + ",?,?"
	                     + ")";
			
			pstmt = conn.prepareStatement(sql);
			pstmt.setInt(1, document_srl);
			pstmt.setInt(2, module_srl);
			pstmt.setString(3, lang_code);
			pstmt.setString(4, is_notice);
			pstmt.setString(5, title);
			
			pstmt.setString(6,title_bold);
			pstmt.setString(7,title_color);
			pstmt.setString(8,content);
			pstmt.setString(9,user_name);
			pstmt.setString(10,nick_name);
			
			pstmt.setInt(11,member_srl);
			pstmt.setString(12,email_address);
			pstmt.setString(13,homepage);
			pstmt.setString(14,ipaddress);
			pstmt.setInt(15,list_order);
			
			pstmt.setString(16,regdate);
			pstmt.setString(17,last_update);
			pstmt.setInt(18,update_order);
			pstmt.setString(19,allow_trackback);
			pstmt.setString(20,notify_message);
			
			pstmt.setString(21,status);
			pstmt.setString(22,comment_status);
			
			pstmt.executeUpdate();
			
		} catch (Exception e) {
			System.out.println("error: " + e);
		} finally {
			try {
				if (rs != null) { rs.close(); }
				if (pstmt != null) { pstmt.close();	}
				if (conn != null) {	conn.close(); }
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}

	public static void main(String[] args) throws Exception{
		System.out.println("시작");
		Jdbc jdbc=new Jdbc();
		//jdbc.getConn();
		
		jdbc.insertDb("제목","본문");
		System.out.println("끝");
		
	}

}

위 소스를 실행하면 1건이 게시판에 insert 됩니다.

자 그럼 이제 네이버  뉴스 파싱 소스 입니다.

package naver_news;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public class NaverNews {
	
	Jdbc jdbc = new Jdbc();

	public static void main(String[] args) throws Exception {
		
		String url = "https://news.naver.com/";
		Document doc = Jsoup.connect(url).get();
		Elements es=doc.getElementsByClass("cjs_ctitle _item_title");
		//System.out.println(es);

		NaverNews nn=new NaverNews();
		for (Element e : es) {
			Element eh=e.getElementsByTag("a").first();
			String h=eh.attr("href");
			//System.out.println(h);
			nn.parseListbyUrl(h);
		}
	}
	
	public void parseListbyUrl(String url) throws IOException{
		Document doc = Jsoup.connect(url).get();
		Elements es=doc.getElementsByClass("press_edit_news_list");
		Element eCont=es.get(0);
		
		Element eh=eCont.getElementsByTag("a").first();
		String href=eh.attr("href");
		System.out.println("href="+href);
		parseContByUrl(href);
	}
	
	public void parseContByUrl(String url) throws IOException{
		Document doc = Jsoup.connect(url).get();
		
		Elements etitle =doc.selectXpath("/html/body/div[1]/div[2]/div/div[1]/div[1]/div[1]/div[2]/h2/span");
		String title = etitle.text();
		
		Elements esCont=doc.getElementsByClass("newsct_article _article_body");
		System.out.println("esCont="+esCont);
		String cont = esCont.toString();
		
		System.out.println("title="+title);
		System.out.println("content="+cont);
		System.out.println();
		
		jdbc.insertDb(title, cont);
		
	}

}

위소스를 실행시키면 네이버 뉴스 파싱 후 아래 처럼 게시판에 insert가 됩니다.

결과를 보니 이미지가 안나왔다. 소스를 보니

<img id="img1" data-src=" https://imgnews.pstatic.net/image/029/2023/06/20/0002807896_001_20230620091303472.jpg?type=w647 " class="_LAZY_LOADING"> 

이미지가 위와같이 되어 있었다. 흠.. 속도를 위해 저렇게 LAZY_LOADING 으로 한건가.. 예전에는 그냥 <img src=""> 이렇게 되어 있어서 그냥 나왔는데..

그래서 id=\"img1\" data- 이부분을 빼버리면 <img src=" https://imgnews.pstatic.net/image/029/2023/06/20/0002807896_001_20230620091303472.jpg?type=w647 " class="_LAZY_LOADING">

이렇게 이미지 소스로 바뀌게 되니 나올것 같았다.

public String getImg(String cont) {
    return cont.replaceAll("id=\"img1\" data-", "");
}

위의 메소드를 추가하고 cont=getImg(cont); 본문 추출하는 부분에서 호출하였다. 전체 소스는 아래와 같다.

package naver_news;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public class NaverNews {
	
	Jdbc jdbc = new Jdbc();

	public static void main(String[] args) throws Exception {
		
		String url = "https://news.naver.com/";
		Document doc = Jsoup.connect(url).get();
		Elements es=doc.getElementsByClass("cjs_ctitle _item_title");
		//System.out.println(es);

		NaverNews nn=new NaverNews();
		for (Element e : es) {
			Element eh=e.getElementsByTag("a").first();
			String h=eh.attr("href");
			//System.out.println(h);
			nn.parseListbyUrl(h);
		}
	}
	
	public void parseListbyUrl(String url) throws IOException{
		Document doc = Jsoup.connect(url).get();
		Elements es=doc.getElementsByClass("press_edit_news_list");
		Element eCont=es.get(0);
		
		Element eh=eCont.getElementsByTag("a").first();
		String href=eh.attr("href");
		System.out.println("href="+href);
		parseContByUrl(href);
	}
	
	public void parseContByUrl(String url) throws IOException{
		Document doc = Jsoup.connect(url).get();
		
		Elements etitle =doc.selectXpath("/html/body/div[1]/div[2]/div/div[1]/div[1]/div[1]/div[2]/h2/span");
		String title = etitle.text();
		
		Elements esCont=doc.getElementsByClass("newsct_article _article_body");
		System.out.println("esCont="+esCont);
		String cont = esCont.toString();
		cont=getImg(cont);
		
		System.out.println("title="+title);
		System.out.println("content="+cont);
		System.out.println();
		
		jdbc.insertDb(title, cont);
		
	}
	
	public String getImg(String cont) {
		return cont.replaceAll("id=\"img1\" data-", "");
	}

}

본문의 이미지도 잘 나온다.

댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2024/05   »
1 2 3 4
5 6 7 8 9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
글 보관함