[vb6.0/vba] [vb6.0/vba] 서울교차로(newspaper.seoulkcr.com)구인 크롤링
하나를하더라도최선을
2020-07-16 18:26
4,296
0
본문
서울교차로(newspaper.seoulkcr.com)구인 크롤링을 해보자
지식인 답변내용입니다.
Private Sub program1472_com()
Dim elem As Object
Dim oHTML As Object
Set oHTML = CreateObject("htmlfile")
Dim URL As String, Cookie As String, T As String, P As Integer
Cells.Clear
Do
P = P + 1
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", URL
.SetRequestHeader "Accept", "text/html, application/xhtml+xml, */*"
.SetRequestHeader "Accept-Language", "ko-KR"
.SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
.SetRequestHeader "Host", "newspaper.seoulkcr.com"
.SetRequestHeader "Connection", "Keep-Alive"
If Len(Cookie) Then .SetRequestHeader "Cookie", Cookie
.Send
.WaitForResponse: DoEvents
T = .ResponseText
'T = StrConv(.ResponseBody, vbUnicode)
End With
oHTML.body.innerHTML = T
Dim O As Object, i As Integer
Set O = oHTML.getElementById("container")
Set elem = getXPathElement("/div/div/ul", O)
Dim V(2) As Variant
For i = 1 To 20
Set O = getXPathElement("/li[" & i & "]", elem)
If O Is Nothing Then GoTo ExitSub
V(0) = (P - 1) * 20 + i
V(1) = getXPathElement("/ul/li[1]", O).innerTEXT
V(2) = getXPathElement("/ul/li[2]", O).innerTEXT
Cells(Rows.Count, "A").End(3)(2).Resize(, 3).Value = V
Next
Loop
ExitSub:
MsgBox "완료!!"
End Sub
Public Function getXPathElement(sXPath As String, objElement As Object) As Object
On Error GoTo ErrPass
Dim sXPathArray() As String
Dim sNodeName As String, sNodeNameIndex As String
Dim sRestOfXPath As String
Dim lNodeIndex As Long, lCount As Long
sXPathArray = Split(sXPath, "/")
sNodeNameIndex = sXPathArray(1)
If Not InStr(sNodeNameIndex, "[") > 0 Then
sNodeName = sNodeNameIndex
lNodeIndex = 1
Else
sXPathArray = Split(sNodeNameIndex, "[")
sNodeName = sXPathArray(0)
lNodeIndex = CLng(Left(sXPathArray(1), Len(sXPathArray(1)) - 1))
End If
sRestOfXPath = Right(sXPath, Len(sXPath) - (Len(sNodeNameIndex) + 1))
Set getXPathElement = Nothing
For lCount = 0 To objElement.ChildNodes().Length - 1
If UCase(objElement.ChildNodes().Item(lCount).nodeName) = UCase(sNodeName) Then
If lNodeIndex = 1 Then
If sRestOfXPath = "" Then
Set getXPathElement = objElement.ChildNodes().Item(lCount)
Else
Set getXPathElement = getXPathElement(sRestOfXPath, objElement.ChildNodes().Item(lCount))
End If
End If
lNodeIndex = lNodeIndex - 1
End If
Next lCount
ErrPass:
End Function
아래와 같은 결과를 뽑아냈다.
무엇보다 크롤링에는 많은 경험과 노하우가 필요한 분야중 하나라고 생각한다.
이와 같은 작업에 앞서 많은 사람들은 간단히 작업이 되는 줄 안다.
아무튼 도움이 되었길 바라면서....
댓글목록0