Source code for slidedeckai.helpers.image_search
"""
Search photos using Pexels API.
"""
import logging
import os
import random
import warnings
from io import BytesIO
from typing import Union, Literal
from urllib.parse import urlparse, parse_qs
import requests
from dotenv import load_dotenv
load_dotenv()
# If PEXEL_API_KEY env var is unavailable, issue a one-time warning
if not os.getenv('PEXEL_API_KEY'):
warnings.warn(
'PEXEL_API_KEY environment variable is not set. '
'Image search functionality will not work without it.',
stacklevel=2
)
PEXELS_URL = 'https://api.pexels.com/v1/search'
REQUEST_HEADER = {
'Authorization': os.getenv('PEXEL_API_KEY'),
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
}
REQUEST_TIMEOUT = 12
MAX_PHOTOS = 3
# Only show errors
logging.getLogger('urllib3').setLevel(logging.ERROR)
# Disable all child loggers of urllib3, e.g. urllib3.connectionpool
# logging.getLogger('urllib3').propagate = True
[docs]
def search_pexels(
query: str,
size: Literal['small', 'medium', 'large'] = 'medium',
per_page: int = MAX_PHOTOS
) -> dict:
"""
Searches for images on Pexels using the provided query.
This function sends a GET request to the Pexels API with the specified search query
and authorization header containing the API key. It returns the JSON response from the API.
[2024-08-31] Note:
`curl` succeeds but API call via Python `requests` fail. Apparently, this could be due to
Cloudflare (or others) blocking the requests, perhaps identifying as Web-scraping. So,
changing the user-agent to Firefox.
https://stackoverflow.com/a/74674276/147021
https://stackoverflow.com/a/51268523/147021
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent/Firefox#linux
Args:
query: The search query for finding images.
size: The size of the images: small, medium, or large.
per_page: No. of results to be displayed per page.
Returns:
The JSON response from the Pexels API containing search results. Empty dict if API key
is not set.
Raises:
requests.exceptions.RequestException: If the request to the Pexels API fails.
"""
if not os.getenv('PEXEL_API_KEY'):
return {}
params = {
'query': query,
'size': size,
'page': 1,
'per_page': per_page
}
response = requests.get(
PEXELS_URL,
headers=REQUEST_HEADER,
params=params,
timeout=REQUEST_TIMEOUT
)
response.raise_for_status() # Ensure the request was successful
return response.json()
[docs]
def get_photo_url_from_api_response(
json_response: dict
) -> tuple[Union[str, None], Union[str, None]]:
"""
Return a randomly chosen photo from a Pexels search API response. In addition, also return
the original URL of the page on Pexels.
Args:
json_response: The JSON response.
Returns:
The selected photo URL and page URL or `None`. Empty tuple if no photos found or API key
is not set.
"""
if not os.getenv('PEXEL_API_KEY'):
return None, None
page_url = None
photo_url = None
if 'photos' in json_response:
photos = json_response['photos']
if photos:
photo_idx = random.choice(list(range(MAX_PHOTOS)))
photo = photos[photo_idx]
if 'url' in photo:
page_url = photo['url']
if 'src' in photo:
if 'large' in photo['src']:
photo_url = photo['src']['large']
elif 'original' in photo['src']:
photo_url = photo['src']['original']
return photo_url, page_url
[docs]
def get_image_from_url(url: str) -> BytesIO:
"""
Fetches an image from the specified URL and returns it as a BytesIO object.
This function sends a GET request to the provided URL, retrieves the image data,
and wraps it in a BytesIO object, which can be used like a file.
Args:
url: The URL of the image to be fetched.
Returns:
A BytesIO object containing the image data.
Raises:
requests.exceptions.RequestException: If the request to the URL fails.
"""
response = requests.get(url, headers=REQUEST_HEADER, stream=True, timeout=REQUEST_TIMEOUT)
response.raise_for_status()
image_data = BytesIO(response.content)
return image_data
if __name__ == '__main__':
print(
search_pexels(
query='people'
)
)