Copy of Basic Web Scraping

Print Links

import requests
from bs4 import BeautifulSoup
result = requests.get("[<http://www.google.com>](<http://www.google.com/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
links = soup.find_all("a")
print(links)
print("\\n")

Find Links w/ Particular Style Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("[<https://www.whitehouse.gov/briefings-statements/>](<https://www.whitehouse.gov/briefings-statements/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
urls = []
for h2_tag in soup.find_all("h2"):
	a_tag = h2_tag.find('a')
	urls.append(a_tag.attrs['href'])
print(urls)

Find First Occurrence of Style Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("<https://www.whitehouse.gov/briefings-statements/>")
src = result.content
soup = BeautifulSoup(src, 'lxml')
print(soup.b)

Replace A Style Tag With Another

import requests
from bs4 import BeautifulSoup
result = requests.get("<https://www.whitehouse.gov/briefings-statements/>")
src = result.content
soup = BeautifulSoup(src, 'lxml')
tag = soup.b
tag.name = "blockquote"
print(tag)

Access Particular HTML Attribute Inside Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("[<https://www.whitehouse.gov/briefings-statements/>](<https://www.whitehouse.gov/briefings-statements/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
tag = soup.find_all('h2')[2]
print(tag)
print(tag['id'])

Find All Attributes Inside HTML Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("[<https://www.whitehouse.gov/briefings-statements/>](<https://www.whitehouse.gov/briefings-statements/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
tag = soup.find_all('h2')[3]
print(tag)
print(tag.attrs)

Find Contents Of Attribute Inside HTML Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("[<https://www.whitehouse.gov/briefings-statements/>](<https://www.whitehouse.gov/briefings-statements/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
tag = soup.find_all('h2')[9]
print(tag)
print(tag.string)

Find Links w/ Particular Word

import requests
from bs4 import BeautifulSoup
result = requests.get("[<http://www.google.com>](<http://www.google.com/>)")
src = result.content
soup = BeautifulSoup(src, 'lxml')
links = soup.find_all("a")
for link in links:
	if "About" in link.text:
		print(link)
		print(link.attrs['href'])

Print HTML of WebPage

import requests
from bs4 import BeautifulSoup
result = requests.get("<https://www.whitehouse.gov/briefings-statements/>")
src = result.content
soup = BeautifulSoup(src, 'lxml')
print(soup.prettify())

Find All Occurrences of Style Tag

import requests
from bs4 import BeautifulSoup
result = requests.get("<https://www.whitehouse.gov/briefings-statements/>")
src = result.content
soup = BeautifulSoup(src, 'lxml')
print(soup.find_all('h2'))