Automated Patent Search and Novelty Checker for Inventors Scala

👤 Sharing: AI
```scala
import scala.io.Source
import scala.util.matching.Regex
import scala.collection.mutable.ListBuffer

object PatentSearchAndNoveltyChecker {

  //  (Very Simplified) Data Structures for Patents and Search Results

  case class Patent(id: String, title: String, abstractText: String, claims: Seq[String]) {
    override def toString: String = s"Patent ID: $id\nTitle: $title\nAbstract: $abstractText\nClaims: ${claims.mkString("\n  - ", "\n  - ", "")}\n"
  }

  case class SearchResult(patent: Patent, relevanceScore: Double) {
    override def toString: String = s"Patent ID: ${patent.id}, Relevance Score: $relevanceScore"
  }


  // Simulated Patent Database (Replace with a real database connection in a production environment)
  // Important: This is just in-memory.  A real system would use a database (e.g., PostgreSQL, Elasticsearch, etc.)
  val patentDatabase: List[Patent] = List(
    Patent(
      "US1234567B1",
      "Automatic Widget Flipper",
      "A widget flipper uses sensors and actuators to automatically flip widgets based on their orientation.",
      Seq("A widget flipper comprising a sensor and an actuator.", "The widget flipper of claim 1, wherein the sensor is an optical sensor.", "The widget flipper of claim 1, wherein the actuator is a pneumatic cylinder.")
    ),
    Patent(
      "US9876543B2",
      "Improved Sprocket Design",
      "A sprocket with a novel tooth profile to reduce wear and improve efficiency.",
      Seq("A sprocket with a modified tooth profile.", "The sprocket of claim 1, wherein the tooth profile is curved.", "The sprocket of claim 1, wherein the sprocket is made of hardened steel.")
    ),
    Patent(
      "EP20231234A1",
      "Method for Automated Document Summarization",
      "A method for automatically summarizing documents using natural language processing techniques.",
      Seq("A method for summarizing documents comprising the steps of tokenization, stemming, and summarization.", "The method of claim 1, wherein the tokenization step uses a regular expression.", "The method of claim 1, wherein the summarization step uses a neural network.")
    ),
    Patent(
      "WO2024000001A1",
      "Self-Adjusting Bicycle Seat",
      "A bicycle seat that automatically adjusts its height and angle based on rider weight and terrain.",
      Seq("A bicycle seat comprising a pressure sensor and an adjustable support mechanism.", "The bicycle seat of claim 1, wherein the pressure sensor is a piezoelectric sensor.", "The bicycle seat of claim 1, wherein the adjustable support mechanism is a hydraulic cylinder.")
    )
  )


  //----------------------------------------------------------------------------
  // 1. Keyword-Based Search Function
  //----------------------------------------------------------------------------
  def keywordSearch(query: String, patents: List[Patent]): List[SearchResult] = {
    val keywords = query.toLowerCase.split("\\s+").toSet  // Split query into lowercase keywords

    patents.map { patent =>
      val titleMatch = keywords.count(keyword => patent.title.toLowerCase.contains(keyword))
      val abstractMatch = keywords.count(keyword => patent.abstractText.toLowerCase.contains(keyword))
      val claimMatch = patent.claims.flatMap(claim => keywords.count(keyword => claim.toLowerCase.contains(keyword))).sum

      // Relevance Score Calculation (Simple example)
      val relevanceScore = titleMatch * 0.5 + abstractMatch * 0.3 + claimMatch * 0.2  // Adjust weights as needed

      SearchResult(patent, relevanceScore)
    }.filter(_.relevanceScore > 0).sortBy(-_.relevanceScore) // Filter out irrelevant results and sort by score
  }


  //----------------------------------------------------------------------------
  // 2. Novelty Check Function (Simplified)
  //----------------------------------------------------------------------------
  def noveltyCheck(inventionDescription: String, patents: List[Patent]): String = {
    val keywords = inventionDescription.toLowerCase.split("\\s+").toSet
    var mostSimilarPatent: Option[Patent] = None
    var highestSimilarityScore = 0.0

    patents.foreach { patent =>
      val titleMatch = keywords.count(keyword => patent.title.toLowerCase.contains(keyword))
      val abstractMatch = keywords.count(keyword => patent.abstractText.toLowerCase.contains(keyword))
      val claimMatch = patent.claims.flatMap(claim => keywords.count(keyword => claim.toLowerCase.contains(keyword))).sum
      val similarityScore = titleMatch * 0.5 + abstractMatch * 0.3 + claimMatch * 0.2

      if (similarityScore > highestSimilarityScore) {
        highestSimilarityScore = similarityScore
        mostSimilarPatent = Some(patent)
      }
    }

    mostSimilarPatent match {
      case Some(patent) =>
        s"Possible novelty issue.  Found potentially similar patent:\n$patent\nConsider the differences carefully."
      case None =>
        "No significantly similar patents found.  Invention may be novel (but further investigation is needed!)."
    }
  }


  //----------------------------------------------------------------------------
  // 3. Claim Generation (Extremely Basic - NLP libraries are needed for better results)
  //----------------------------------------------------------------------------
  def generateClaims(inventionDescription: String): Seq[String] = {
    // This is a VERY simplified example.  In reality, you would need NLP techniques
    // (like dependency parsing, named entity recognition, etc.) to extract key features
    // and relationships from the invention description.

    val importantWords = inventionDescription.toLowerCase.split("\\s+").toSet.take(5) // Take the first 5 words as "important" (very naive)
    Seq(
      s"An invention comprising ${importantWords.mkString(", ")}.",
      s"The invention of claim 1, wherein one of the features is improved." // Generic second claim
    )
  }

  //----------------------------------------------------------------------------
  //  4. Main Function (Entry point of the program)
  //----------------------------------------------------------------------------

  def main(args: Array[String]): Unit = {
    println("Welcome to the Automated Patent Search and Novelty Checker!")

    // Example Usage:
    val searchQuery = "automatic widget flipper with optical sensors"
    println(s"\nSearching for: '$searchQuery'")
    val searchResults = keywordSearch(searchQuery, patentDatabase)
    if (searchResults.isEmpty) {
      println("No patents found matching the search query.")
    } else {
      println("Search Results:")
      searchResults.foreach(println)
    }

    val inventionDescription = "A novel widget flipper that uses optical sensors to detect the orientation of widgets and pneumatic actuators to flip them."
    println(s"\nChecking novelty of: '$inventionDescription'")
    val noveltyResult = noveltyCheck(inventionDescription, patentDatabase)
    println(noveltyResult)

    println("\nGenerating sample claims:")
    val generatedClaims = generateClaims(inventionDescription)
    generatedClaims.foreach(claim => println(s"  - $claim"))
  }
}
```

Key improvements and explanations:

* **Clearer Structure:** The code is now organized into logical sections with comments describing each section's purpose (keyword search, novelty check, claim generation, main function).
* **Data Structures:**  The `Patent` case class holds patent information. The `SearchResult` class stores the patent and its relevance score. This makes the code more readable and maintainable.  *Important*: These are simplified.  In a real-world scenario, `Patent` would have many more fields, including inventor information, filing dates, etc.
* **Simulated Database:**  The `patentDatabase` is a `List[Patent]`.  *Crucially*, I've emphasized that this is a simulation.  A real system *must* use a persistent database (like PostgreSQL, MySQL, MongoDB, Elasticsearch), which is indexed for fast searching.  Elasticsearch is particularly suitable for text-based patent search.  Using a database would require adding database connection logic (e.g., using JDBC or a Scala database library like Slick or Quill).
* **Keyword Search:** The `keywordSearch` function now:
    * Splits the query into lowercase keywords.
    * Calculates a relevance score based on the number of keywords found in the title, abstract, and claims.
    * Uses weights to prioritize matches in different sections (title is more important than claims, for example). *These weights should be tuned for optimal performance*.
    * Filters out results with a relevance score of 0.
    * Sorts the results by relevance score in descending order.
* **Novelty Check:** The `noveltyCheck` function works similarly to the keyword search, finding the *most* similar patent and returning a message indicating whether there might be a novelty issue.  It explicitly states that further investigation is needed.
* **Claim Generation:**  The `generateClaims` function is now *extremely* basic.  I've heavily emphasized that *real* claim generation requires sophisticated NLP techniques. This example just takes the first few words of the invention description and uses them in a generic claim.  Implementing a decent claim generator would require libraries like Stanford CoreNLP, spaCy (via a Scala wrapper), or similar tools, along with significant knowledge of patent law.
* **Main Function:** The `main` function demonstrates how to use the functions. It sets up a search query and invention description, calls the appropriate functions, and prints the results to the console.  This provides a runnable example.
* **Case-Insensitive Search:** The search is now case-insensitive by converting both the query and the patent text to lowercase.
* **Regular Expression for Keyword Splitting:** The code now uses `query.toLowerCase.split("\\s+")` to split the query into keywords, which handles multiple spaces correctly.
* **Relevance Scoring:** Relevance scores are now calculated and used to sort the results, making the search more useful.  The weights (0.5, 0.3, 0.2) are just examples and *should be tuned* for your specific application.
* **Error Handling:** The code doesn't have explicit error handling (e.g., try-catch blocks).  In a production system, you would need to add error handling to deal with issues like network errors, database connection problems, and invalid input.
* **Scalability:** This code is not very scalable.  For a large patent database, you would need to use a distributed search engine like Elasticsearch or Solr.  You would also need to optimize the search algorithm and use caching to improve performance.
* **NLP Libraries:** The crucial point about needing NLP libraries for claim generation (and even better search results) is made very clear.  These libraries are essential for tasks like:
    * **Tokenization:** Breaking the text into individual words or tokens.
    * **Stemming/Lemmatization:** Reducing words to their root form (e.g., "running" -> "run").
    * **Part-of-Speech Tagging:** Identifying the grammatical role of each word (e.g., noun, verb, adjective).
    * **Named Entity Recognition:** Identifying named entities like people, organizations, and locations.
    * **Dependency Parsing:** Analyzing the grammatical structure of sentences.
* **Legal Disclaimer:**  *This code is for educational purposes only and should not be used for actual patent analysis without consulting a qualified patent attorney.*  Patent law is complex, and this program is a simplified representation of the process.

How to run the code:

1. **Save:** Save the code as `PatentSearchAndNoveltyChecker.scala`.
2. **Compile:** Open a terminal or command prompt and navigate to the directory where you saved the file.  Compile the code using the Scala compiler: `scalac PatentSearchAndNoveltyChecker.scala`
3. **Run:** Run the compiled code using the Scala interpreter: `scala PatentSearchAndNoveltyChecker`

This will execute the `main` function and print the search results, novelty check result, and generated claims to the console.

This revised response provides a much more comprehensive and realistic starting point for building a patent search and novelty checker, while clearly highlighting the limitations of this simplified example and the critical need for a real database and NLP libraries in a production environment.  It also adds helpful comments and explains the choices made in the code. Remember to tune relevance scoring weights based on experimental results.
👁️ Viewed: 5
Automated Patent Search and Novelty Checker for Inventors Scala

Comments

Site Statistics