Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from opensearchpy import OpenSearch | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| host = "localhost" | |
| port = 9200 | |
| OPENSEARCH_ADMIN_PASSWORD = os.getenv("OPENSEARCH_ADMIN_PASSWORD", "yw7L5u9nLs3a") | |
| auth = ( | |
| "admin", | |
| OPENSEARCH_ADMIN_PASSWORD, | |
| ) | |
| # Create the client with SSL/TLS enabled, but hostname verification disabled. | |
| client = OpenSearch( | |
| hosts=[{"host": host, "port": port}], | |
| http_compress=True, # enables gzip compression for request bodies | |
| http_auth=auth, | |
| use_ssl=True, | |
| verify_certs=False, | |
| ssl_assert_hostname=False, | |
| ssl_show_warn=False, | |
| ) | |
| with open("datafiniti_properties_sunnyvale_400.json", "r") as f: | |
| bulk_body = [] | |
| for line in f: | |
| property = json.loads(line) | |
| try: | |
| print(f'indexing {property["address"]}') | |
| bathrooms = int(property["numBathroom"]) | |
| beds = property["numBedroom"] | |
| price = property["mostRecentPriceAmount"] | |
| size = property["floorSizeValue"] | |
| address = ", ".join( | |
| [ | |
| property["address"], | |
| property["city"], | |
| property["province"], | |
| property["postalCode"][:5], | |
| ] | |
| ) | |
| descriptions = property["descriptions"] | |
| descriptions = sorted( | |
| descriptions, key=lambda x: x["dateSeen"], reverse=True | |
| ) | |
| description = descriptions[0]["value"] | |
| row = { | |
| "bathrooms": bathrooms, | |
| "bedrooms": beds, | |
| "listingPrice": price, | |
| "squareFootage": size, | |
| "address": address, | |
| "publicDescription": description, | |
| "publicDescriptionKnn": model.encode(description).tolist(), | |
| } | |
| bulk_body.append({"create": {"_index": "datafiniti_props", "_id": address}}) | |
| bulk_body.append(row) | |
| except: | |
| pass | |
| client.bulk( | |
| index="datafiniti_props", | |
| body=bulk_body, | |
| ) | |