Skip to content

Commit 63f023e

Browse files
authored
Merge pull request #149 from hugovk/update-script
Update for congress.gov changes
2 parents 0066ed5 + 3bc05fd commit 63f023e

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

scripts/gpo_member_photos.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@
2626
# pip install -r requirements.txt
2727
import mechanicalsoup
2828

29+
USER_AGENT = ('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 '
30+
'(KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
2931

3032
regex1 = re.compile(
31-
'<h2><a href="https://www.congress.gov/member/[^/]+/(\w+)\?'
32-
'resultIndex=\d+">[^<]+</a></h2>\s*<div class="memberImage">'
33-
'<img src="/img/member/([^"]+)\"')
33+
'<a href="https://www.congress.gov/member/[^/]+/(\w+)[^<]+</a></span>'
34+
'[^<]*<div[^<]+<div class="member-image"><img src="/img/member/([^\"]+)"')
3435

35-
regex2 = re.compile('<a class="next" id="pagebottom_next" href="([^"]+)">')
36+
regex2 = re.compile('<a class="next" href="([^"]+)">')
3637

3738

3839
def pause(last, delay):
@@ -170,6 +171,8 @@ def resize_photos():
170171
args = parser.parse_args()
171172

172173
br = mechanicalsoup.Browser()
174+
br.set_user_agent(USER_AGENT)
175+
173176
photo_list = get_photo_list(br, args.congress, args.delay)
174177

175178
number = download_photos(br, photo_list, args.outdir, args.delay)

0 commit comments

Comments
 (0)