Skip to content

Commit a2c77a9

Browse files
authoredJun 14, 2017
Create scrapper.py
1 parent 603e7c2 commit a2c77a9

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed
 

‎scrapper.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from config_parser import Configuration_Parser
2+
import urllib.request
3+
import ssl
4+
from bs4 import BeautifulSoup
5+
6+
7+
class Apartment():
8+
def __init__(self):
9+
self.property_name = ''
10+
self.url = ''
11+
self.rent = ''
12+
self.distance = ''
13+
self.contact = ''
14+
self.address = ''
15+
self.description = ''
16+
17+
def get_property_name(self):
18+
return self.property_name
19+
20+
def set_property_name(self, property_name):
21+
self.property_name = property_name
22+
23+
def get_url(self):
24+
return self.url
25+
26+
def set_url(self, url):
27+
self.url = url
28+
29+
def get_rent(self):
30+
return self.rent
31+
32+
def set_rent(self, rent):
33+
self.rent = rent
34+
35+
def get_distance(self):
36+
return self.distance
37+
38+
def set_distance(self, distance):
39+
self.distance = distance
40+
41+
def get_address(self):
42+
return self.address
43+
44+
def set_address(self, address):
45+
self.address = address
46+
47+
def get_description(self):
48+
return self.description
49+
50+
def set_description(self, description):
51+
self.description = description
52+
53+
def get_contact(self):
54+
return self.contact
55+
56+
def set_contact(self, contact):
57+
self.contact = contact
58+
59+
def __str__(self):
60+
apartmentString = '==============\n' + self.property_name + '\n' + self.rent + '\n'
61+
return apartmentString
62+
63+
class Scrapper():
64+
def __init__(self, config):
65+
self.config = config
66+
self.url = config.get_apartment_url()
67+
self.apartments = []
68+
request = urllib.request.Request(self.url)
69+
response = urllib.request.urlopen(request, context=ssl._create_unverified_context())
70+
page = response.read()
71+
soup = BeautifulSoup(page, 'html.parser')
72+
self.soup = soup.find('div', class_ = "placardContainer")
73+
self.soup.prettify()
74+
75+
76+
def get_apartments_info(self):
77+
for item in self.soup.find_all('article', class_ = 'placard'):
78+
79+
apartment = Apartment()
80+
81+
property_name = item.find('a', class_='placardTitle').get('title')
82+
if property_name is not None:
83+
apartment.set_property_name(property_name)
84+
85+
86+
url = item.find('a', class_ = 'placardTitle').get('href')
87+
if url is not None:
88+
apartment.set_url(url)
89+
90+
rent= item.find('span',class_ = "altRentDisplay")
91+
if rent is not None:
92+
apartment.set_rent(rent.getText().strip())
93+
94+
contact = item.find('div', class_ = 'phone')
95+
if contact is not None:
96+
apartment.set_contact(contact)
97+
98+
self.apartments.append(apartment)
99+
100+
return self.apartments
101+
102+
103+

0 commit comments

Comments
 (0)
Please sign in to comment.