vb6.0/vba [vb6.0/vba] 서울교차로(newspaper.seoulkcr.com)구인 크롤링
페이지 정보
본문
서울교차로(newspaper.seoulkcr.com)구인 크롤링을 해보자
지식인 답변내용입니다.
Private Sub program1472_com()
Dim elem As Object
Dim oHTML As Object
Set oHTML = CreateObject("htmlfile")
Dim URL As String, Cookie As String, T As String, P As Integer
Cells.Clear
Do
P = P + 1
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", URL
.SetRequestHeader "Accept", "text/html, application/xhtml+xml, */*"
.SetRequestHeader "Accept-Language", "ko-KR"
.SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
.SetRequestHeader "Host", "newspaper.seoulkcr.com"
.SetRequestHeader "Connection", "Keep-Alive"
If Len(Cookie) Then .SetRequestHeader "Cookie", Cookie
.Send
.WaitForResponse: DoEvents
T = .ResponseText
'T = StrConv(.ResponseBody, vbUnicode)
End With
oHTML.body.innerHTML = T
Dim O As Object, i As Integer
Set O = oHTML.getElementById("container")
Set elem = getXPathElement("/div/div/ul", O)
Dim V(2) As Variant
For i = 1 To 20
Set O = getXPathElement("/li[" & i & "]", elem)
If O Is Nothing Then GoTo ExitSub
V(0) = (P - 1) * 20 + i
V(1) = getXPathElement("/ul/li[1]", O).innerTEXT
V(2) = getXPathElement("/ul/li[2]", O).innerTEXT
Cells(Rows.Count, "A").End(3)(2).Resize(, 3).Value = V
Next
Loop
ExitSub:
MsgBox "완료!!"
End Sub
Public Function getXPathElement(sXPath As String, objElement As Object) As Object
On Error GoTo ErrPass
Dim sXPathArray() As String
Dim sNodeName As String, sNodeNameIndex As String
Dim sRestOfXPath As String
Dim lNodeIndex As Long, lCount As Long
sXPathArray = Split(sXPath, "/")
sNodeNameIndex = sXPathArray(1)
If Not InStr(sNodeNameIndex, "[") > 0 Then
sNodeName = sNodeNameIndex
lNodeIndex = 1
Else
sXPathArray = Split(sNodeNameIndex, "[")
sNodeName = sXPathArray(0)
lNodeIndex = CLng(Left(sXPathArray(1), Len(sXPathArray(1)) - 1))
End If
sRestOfXPath = Right(sXPath, Len(sXPath) - (Len(sNodeNameIndex) + 1))
Set getXPathElement = Nothing
For lCount = 0 To objElement.ChildNodes().Length - 1
If UCase(objElement.ChildNodes().Item(lCount).nodeName) = UCase(sNodeName) Then
If lNodeIndex = 1 Then
If sRestOfXPath = "" Then
Set getXPathElement = objElement.ChildNodes().Item(lCount)
Else
Set getXPathElement = getXPathElement(sRestOfXPath, objElement.ChildNodes().Item(lCount))
End If
End If
lNodeIndex = lNodeIndex - 1
End If
Next lCount
ErrPass:
End Function
아래와 같은 결과를 뽑아냈다.
무엇보다 크롤링에는 많은 경험과 노하우가 필요한 분야중 하나라고 생각한다.
이와 같은 작업에 앞서 많은 사람들은 간단히 작업이 되는 줄 안다.
아무튼 도움이 되었길 바라면서....
- 이전글[Excel vba] 프로시져 실행방법 6가지 정리 20.07.18
- 다음글[vb6.0/vba] WinHttp로 사이트 파싱시 한글깨질경우 엔코딩 찿는 방법 20.07.14
댓글목록
등록된 댓글이 없습니다.