Hello Hacker Gentlemen.
Below are the 3 pieces of code we have at the moment. Your task is to edit the allrecipes.py code to make it more useful. At current, it creates an enormous list of completely unsorted recipes, and is functionally equivalent to throwing a box with thousands of recipe cards into a pile on the floor. You are now going to add some sorting to this to make it more useful. In total, your changes will probably be no more than 2-3 lines of code in allrecipes.py. Use regular expressions to, for example, create a separate text file for recipes that contain the word “pie” in the title. That is just one possibility, the sorting criteria is entirely up to you! If you would like to review/finish the lessons on the regex website, click here.
import re #re stands for regular expressions sentence = "The rain in Spain ComPLaiN, 8ain ain't 5ai9t ai" # 01234567891111111 # 0123456 x = re.search("ai", sentence) print(x.span()) # . is the metacharacter for any character (except for newline) #[a-z] represents any lower case alpha character #[A-Z] represents any upper case alpha character #[0-9] represents any numeric character #\w represents "word" characters, a-z, A-Z, 0-9, _ underscore #\W represents anything that is NOT a "word" character #\s string contains a whitespace character (space, tab, newline, return, feed) #\S anything that is not a whitespace character #* represents any number of the thing it follows x = re.findall('[\w]*ai[\w]*',sentence) print(x)
import requests, time, re from recipe_scrapers import scrape_me f = open("last_recipe_checked.txt",'rt') content = f.readlines() start = int(content[-1]) f.close() currentrecipe = start def main(): validrecipes = [] for i in range(start,9999999): f = open("last_recipe_checked.txt",'w')######## f.write(str(i)+'\n')######## f.close() ######## url = 'https://cooking.nytimes.com/recipes/' + str(i) time.sleep(0.25) if requests.get(url).status_code == 200: currentrecipe = i recipe = scrape_me(url,wild_mode = True) print(recipe.title(),i) f = open("nytimesrecipes.txt","at") f.write(recipe.title() + " " + url + '\n') f.close() validrecipes += [i] if (__name__ == "__main__"): main()
import requests, time import requests, time import shutil from recipe_scrapers import scrape_me url = 'https://cooking.nytimes.com/recipes/103' recipe = scrape_me(url,wild_mode = True) image_url = recipe.image() file_name = recipe.title() + ' image.jpg' res = requests.get(image_url, stream = True) if res.status_code == 200: with open(file_name,'wb') as f: shutil.copyfileobj(res.raw, f) print('Image sucessfully Downloaded: ',file_name) else: print('Image Couldn\'t be retrieved') ################################################## from fpdf import FPDF pdf = FPDF(orientation='P', unit = 'in', format = 'letter') pdf.add_page() pdf.set_xy(0.0,0.0) pdf.set_font('Arial','B',16) pdf.set_text_color(0,0,0) pdf.cell(w=8.5,h=1.0, align = 'C', txt = recipe.title(), border = 0) pdf.image(file_name, x = 2.75, y = 1, w = 3, h = 3, type = 'jpg', link = '') pdf.set_font('Arial','',12) pdf.set_text_color(0,0,0) x = 4 for ingredient in recipe.ingredients(): pdf.cell(w=3.5,h=.25, align = 'L', txt = ingredient, border = 0) x += 0.25 pdf.set_xy(1.0,x) x+=1 pdf.set_xy(1.0,x) pdf.multi_cell(w=7,h=0.25, align = 'L', txt = recipe.instructions(), border = 0) pdf.output(file_name+'.pdf','F')