How do I index a file system folder through the API?

It's easy. Just follow this simple python script to index a directory folder through the API : 

import os
import urllib2
 
def get_filepaths(directory):
 
    file_paths = []  # List which will store all of the full filepaths.
 
    # Walk the tree.
    for root, directories, files in os.walk(directory):
        for filename in files:
            # Join the two strings in order to form the full filepath.
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  # Add it to the list.
 
    return file_paths
 
dirpath = "/Applications/SearchbloxServer/webapps/searchblox" #folder to index
full_file_paths = get_filepaths(dirpath)
 
posturl = "http://localhost:8080/searchblox/api/rest/docadd" # SearchBlox REST endpoint
apikey = "XXXXXXXXXXXXXXXXXXXX" # SearchBlox API Key
collection = "FS" # SearchBlox collection name (Filesystem Collection only)
 
for line in full_file_paths:
    xml = """<?xml version="1.0" encoding="utf-8"?>
        <searchblox apikey=\""""+apikey+"""\">
        <document colname=\""""+collection+"""\" location=\""""+line+"""\">
        </document> </searchblox>"""
 
    req = urllib2.Request(posturl)
    req.add_header('Content-Type', 'application/xml')
    res = urllib2.urlopen(req, xml)
 
    response = res.readlines()
    print response
 
 
Have more questions? Submit a request

Comments