It's easy. Just follow this simple python script to index a directory folder through the API :
import os
import urllib2
def get_filepaths(directory):
file_paths = [] # List which will store all of the full filepaths.
# Walk the tree.
for root, directories, files in os.walk(directory):
for filename in files:
# Join the two strings in order to form the full filepath.
filepath = os.path.join(root, filename)
file_paths.append(filepath) # Add it to the list.
return file_paths
dirpath = "/Applications/SearchbloxServer/webapps/searchblox" #folder to index
full_file_paths = get_filepaths(dirpath)
posturl = "http://localhost:8080/searchblox/api/rest/docadd" # SearchBlox REST endpoint
apikey = "XXXXXXXXXXXXXXXXXXXX" # SearchBlox API Key
collection = "FS" # SearchBlox collection name (Filesystem Collection only)
for line in full_file_paths:
xml = """<?xml version="1.0" encoding="utf-8"?>
<searchblox apikey=\""""+apikey+"""\">
<document colname=\""""+collection+"""\" location=\""""+line+"""\">
</document> </searchblox>"""
req = urllib2.Request(posturl)
req.add_header('Content-Type', 'application/xml')
res = urllib2.urlopen(req, xml)
response = res.readlines()
print response
Comments