#
# *** ddg.py ***
#
# This is a Python library for fetching search results from DuckDuckGo.
# It gets search results directly from links.duckduckgo.com.

from urllib.request import urlopen, Request
import re, json

headers = {'User-Agent': 'ddg.py'}

def loadPage(url):
    #
    # WARNING: The following line of code is necessary to make this program
    # a user agent rather than a robot. The user decides when and if
    # they want to load more pages. You are strongly encouraged not
    # to remove or "comment out" the following line.
    #
    input("\n[???] ENTER to fetch web page, CTRL+C to cancel ")
    page = urlopen(Request(url, headers=headers)).read().decode("utf-8")
    return page

def getVQD(page):
    return (re.search(',vqd="[0-9]-[0-9]*"', page)[0].replace(',vqd="', "").replace('"', ""))
def fetchAll(search):
    resultsList = []

    searchTerm = search.replace(" ", "+")
    print("[DDG] Search term is: " + searchTerm)

    # Get the VQD of this search from the first human readable page
    print("[DDG] Fetching first human readable page...")
    currentUrl = "https://duckduckgo.com/?q=" + searchTerm +"&ia=web"
    currentPage = loadPage(currentUrl)
    print("[DDG] Extracting VQD number...")
    VQD = getVQD(currentPage)
    print("[DDG] VQD number is: " + VQD)

    # Use the VQD to access the links subdomain
    print("[DDG] Getting JSON format SERP from links.duckduckgo.com...")
    currentUrl = ("https://links.duckduckgo.com/d.js?q=" + searchTerm + "&s=0&vqd=" + VQD)

    resultsFromLastPage = ['']

    while True:
        currentPage = loadPage(currentUrl)

    # Extract the results in JSON format
        try:
            JSONresultsString = (re.search(r"load\(\'d\'\,.*}]\);", currentPage)[0].replace("load('d',", "")[0:-2])
        except:
            break
        # Add the current page of JSON results to the results list
            resultsFromCurrentPage = json.loads(JSONresultsString.replace("\t", ""))
            if resultsFromCurrentPage[0:-1] == resultsFromLastPage[0:-1]:
                print("[DDG] Current page identical to last, assuming end reached")
                break
            resultsFromLastPage = resultsFromCurrentPage
            resultsList += resultsFromCurrentPage[0:-1]
            print("[DDG] Got " + str(len(resultsFromCurrentPage[0:-1])) + " results from current page")

            # Move to next page
            print("[DDG] Moving to next page...")
            try:
                currentUrl = ("https://links.duckduckgo.com/" + resultsFromCurrentPage[-1]['n'])
            except:
                print("[DDG] End of results")
                break

            return resultsList
        if __name__ == "__main__":
        # If running as main program, get search term from user and tell user how
        # to use the results object
            results = fetchAll(input("\n[???] Search term: "))
            print("""
            [***] To look at the results, browse the list called 'results' using the below
            [***] console. For example, try typing: results[0]['a']
            [***] This will show you the description of the first result.""")

# # *** ddg_analysis.py *** # # This is a script that uses the ddg library to show the user detailed # #i import ddg while True: # Fetch the search results for a user specified search term results = ddg.fetchAll(input("\n[???] Search term: ")) print("") i = 1 # Print out a list of data for result in results: title = result["t"][0:24] if len(title) < 24: title += (" " * (27 - len(title))) else: title += "..." try: source = result["s"] except: source = "Not Available" try: timestamp = result["e"] except: timestamp = "****** Not Available ******" print(" " + str(i) + ":" + (" " * (5 - len(str(i)))) + "'s': " + source + " " + "Title: " + title + " " # Uncomment the following line to also print out the timestamp #+ "Timestamp: " + timestamp + " " + "URL: " + result["u"]) i += 1