Elasticsearch

  

Find duplicates filtered by a query

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "filter": [
              {
                "term": {
                  "tenantId.keyword": {
                    "value": "1234"
                  }
                }
              }
            ]
          }
        }
      ]
    }
  },  
  "aggs": {
    "duplicateCount": {
      "terms": {
        "field": "metadata.ExternalId.keyword", "min_doc_count": 2,
        "size": 100
      },
      "aggs": {
        "duplicateExternalIds": {
          "top_hits": { 
            "sort": [
              { "updated": { "order": "desc"} }
            ], 
            "_source": {
              "include": [
                "_id", "metadata.ExternalId"
              ]
            }, 
            "size": 1
          }
        }
      }
    }
  }
}

_delete_by_query

Delete and keep some

{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "tenantId.keyword": {
              "value": "1234"
            }
          }
        },
        {
          "terms": {
            "metadata.ExternalId.keyword": [
              "10241776",
              "10374100",
              "10470184"
            ]
          }
        }
      ],
      "must_not": [
        {
          "terms": {
            "_id": [
              "263fbfcf-4cba-4cfe-b048-a2a5dcff5a58"
            ]
          }
        }
      ]
    }
  }
}