OurBigBook logoOurBigBook Docs OurBigBook logoOurBigBook.comSite Source code
web/models/topic.js
const { DataTypes, Op } = require('sequelize')

const { sequelizeWhereStartsWith } = require('ourbigbook/models')
const ourbigbook_nodejs_webpack_safe = require('ourbigbook/nodejs_webpack_safe')
const { sequelizePostgresqlUserQueryToTsqueryPrefixLiteral } = ourbigbook_nodejs_webpack_safe

const front_js = require('../front/js')
const { querySearchToTopicId } = front_js

module.exports = (sequelize) => {
  const Topic = sequelize.define(
    'Topic',
    {
      articleCount: {
        // Cache of how many articles have this topic.
        type: DataTypes.INTEGER,
        allowNull: false,
        defaultValue: 0,
      },
      topicId: {
        // This is also a cache.
        // It would be possible to avoid using this field by making queries
        // that search for the linked ID. But this hits badly on a critical
        // ArticlePage view path, so we have to optimize it. TODO perhaps
        // we should actually convert topicId to point to the topic rather
        // than be a string.
        type: DataTypes.TEXT,
        allowNull: false,
      },
    },
    {
      indexes: [
        { fields: ['articleCount'] },
        { fields: ['articleId'] },
        { fields: ['topicId'] },
      ]
    }
  )

  Topic.getTopics = async ({
    articleOrder,
    articleWhere,
    count,
    limit,
    offset,
    order,
    orderAscDesc,
    sequelize,
    topicId,
    topicIdSearch,
  }) => {
    const { Article, File, Topic } = sequelize.models
    if (count === undefined) {
      count = true
    }
    if (orderAscDesc === undefined) {
      orderAscDesc = 'DESC'
    }

    // where
    const where = {}
    let whereFts
    /** Get a starts with that will be accelerated both in SQLite and PostgreSQL.
     * In sequelize we need GLOB: https://stackoverflow.com/questions/8584499/should-like-searchstr-use-an-index/76512019#76512019
     * In PostgreSQL GLOB doe not exist and we setup the DB so that LIKE will work: https://dba.stackexchange.com/questions/53811/why-would-you-index-text-pattern-ops-on-a-text-column/343887#343887
     */
    if (topicIdSearch !== undefined) {
      const topicIdSearchArgs = querySearchToTopicId(topicIdSearch)
      if (sequelize.options.dialect === 'postgres') {
        whereFts = {
          ...where,
          [Op.not]: { topicId: sequelizeWhereStartsWith(sequelize, topicIdSearchArgs, '"Topic"."topicId"') },
          topicId_tsvector: { [Op.match]: sequelizePostgresqlUserQueryToTsqueryPrefixLiteral(sequelize, topicIdSearch) },
        }
      }
      where.topicId = sequelizeWhereStartsWith(sequelize, topicIdSearchArgs, '"Topic"."topicId"')
    }
    if (topicId) {
      where.topicId = topicId
    }

    const includeArticle = {
      model: Article,
      as: 'article',
      include: [{
        model: File,
        as: 'file',
      }]
    }
    if (articleWhere) {
      includeArticle.where = articleWhere
    }
    const include = [includeArticle]
    if (order === undefined) {
      order = 'articleCount'
    }
    const orderList = []
    if (topicIdSearch === undefined) {
      orderList.push([order, orderAscDesc])
      if (order !== 'createdAt') {
        orderList.push(['createdAt', 'DESC'])
      }
      if (articleOrder !== undefined) {
        orderList.push([{model: Article, as: 'article'}, articleOrder, 'DESC'])
      }
    } else {
      // See comments under getArticles why we don't do other orders with this one.
      orderList.push(['topicId', 'ASC'])
    }
    const findArgs = {
      include,
      limit,
      offset,
      order: orderList,
      where,
    }
    const findArgss = [findArgs]
    if (whereFts) {
      findArgss.push({
        ...findArgs,
        where: whereFts,
      })
    }

    // Do the searches.
    const rets = await Promise.all(findArgss.map(async (findArgs) => {
      if (count) {
        return Topic.findAndCountAll(findArgs)
      } else {
        return { rows: await Topic.findAll(findArgs) }
      }
    }))

    // Consolidate prefix and fts searches if search is being done.
    let topics = []
    let retCount = 0
    for (const ret of rets) {
      const { rows, count } = ret
      if (rows !== undefined) {
        topics.push(...rows)
      }
      if (count !== undefined) {
        retCount += count
      }
    }
    if (limit) {
      topics = topics.slice(0, limit)
    }
    let ret
    if (count) {
      ret = { rows: topics, count: retCount }
    } else {
      ret = topics
    }
    return ret
  }

  Topic.prototype.toJson = async function(loggedInUser) {
    return {
      articleCount: this.articleCount,
      createdAt: this.createdAt.toISOString(),
      titleRender: this.article.titleRender,
      titleSource: this.article.titleSource,
      topicId: this.article.topicId,
      updatedAt: this.updatedAt.toISOString(),
    }
  }

  Topic.updateTopics = async function(articles, {
    newArticles=false,
    deleteArticle=false,
    transaction,
  }={}) {
    const { Article, Topic } = sequelize.models
    const topicIds = articles.map(article => article.topicId).filter(topicId => topicId)
    if (
      // Happens for Index pages, which have empty string topicId.
      topicIds.length
    ) {
      if (newArticles) {
        // Create any topics that don't exist.
        // Initialize their article count to 0.
        // Initialize their article to an arbitrary article that has the correct topicId.
        // This will then be corrected to the actual representative article
        // in the following query
        //
        // TODO any way to merge with the query below one that updates articleId? I don't think I can ON CONFLICT UPDATE
        // since we don't have a unique key on Topic, it is only unique across the Topic -> Article join on Article.topicId.
        //
        // We find all topics that don't exist as per:
        // https://dba.stackexchange.com/questions/141129/find-ids-from-a-list-that-dont-exist-in-a-table
        await sequelize.query(`
INSERT INTO "${Topic.tableName}" ("articleId", "topicId", "articleCount", "createdAt", "updatedAt")
SELECT "articleId", "topicId", 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP FROM (
  SELECT
    "${Article.tableName}"."id" AS "articleId",
    "${Article.tableName}"."topicId" AS "topicId",
    ROW_NUMBER() OVER (
      PARTITION BY "${Article.tableName}"."topicId"
      ORDER BY "${Article.tableName}"."id" ASC
    ) AS "rnk"
  FROM "${Article.tableName}"
  WHERE
    "${Article.tableName}"."topicId" IN (:topicIds)
    AND "${Article.tableName}"."topicId" NOT IN (
      SELECT "${Article.tableName}"."topicId"
      FROM "${Article.tableName}"
      INNER JOIN "${Topic.tableName}"
        ON "${Article.tableName}"."id" = "${Topic.tableName}"."articleId"
    )
) AS "NewTopicsAndArticleIds"
WHERE "rnk" = 1
`,
        {
          replacements: {
            topicIds,
          },
          transaction,
        }
)
      }
      if (newArticles || deleteArticle) {
        // Update article count of the topics.
        await sequelize.query(`
UPDATE "${Topic.tableName}"
  SET
    "articleCount" = "TopicIdCount"."articleCount"
FROM (
  SELECT
    "${Topic.tableName}"."id" AS "id",
    "Counts"."articleCount"
  FROM "${Topic.tableName}"
  INNER JOIN "${Article.tableName}"
    ON "${Article.tableName}"."id" = "${Topic.tableName}"."articleId"
  INNER JOIN (
    SELECT
      "topicId",
      COUNT(*) AS "articleCount"
    FROM "${Article.tableName}"
    GROUP BY "topicId"
    HAVING "topicId" IN (:topicIds)
  ) AS "Counts"
  ON "Counts"."topicId" = "${Article.tableName}"."topicId"
) AS "TopicIdCount"
WHERE
  "${Topic.tableName}"."id" = "TopicIdCount"."id"
`,
          {
            replacements: {
              topicIds,
            },
            transaction,
          }
        )
      }

      // Determine the representative articles for each topic, and set them.
      // Also increment article counts if needed.
      // We currently look at the most common titleRender
      // of the top 10 most voted articles. Ties are broken by picking the oldest article.
      // Minimal examples of the query can be found at:
      // * https://github.com/cirosantilli/cirosantilli.github.io/blob/master/nodejs/sequelize/raw/most_frequent.js most frequent part only
      // * https://github.com/cirosantilli/cirosantilli.github.io/blob/master/nodejs/sequelize/raw/group_by_max_n.js top N in each group part only
      await sequelize.query(`
UPDATE "${Topic.tableName}"
SET
  "articleId" = "TopArticlePerTopic"."articleId",
  "topicId" = "TopArticlePerTopic"."topicId"
FROM (
  SELECT
    "${Topic.tableName}"."id" AS "id",
    "${Article.tableName}"."topicId" AS "topicId",
    "TopArticlesPerTopic"."articleId" AS "articleId"
  FROM (
    SELECT
      "TopArticles"."topicId" AS "topicId",
      MIN("id") AS "articleId",
      COUNT(*) AS "cnt",
      ROW_NUMBER() OVER (
        PARTITION BY "TopArticles"."topicId"
        ORDER BY
          COUNT(*) DESC,
          MIN("id") ASC
      ) AS "freqRank"
    FROM (
      SELECT
        ROW_NUMBER() OVER (
          PARTITION BY "topicId"
          ORDER BY
            "score" DESC,
            "id" ASC
        ) AS "scoreRank",
        *
      FROM "${Article.tableName}"
      WHERE "topicId" IN (:topicIds)
    ) AS "TopArticles"
    WHERE "TopArticles"."scoreRank" <= :topN
    GROUP BY "TopArticles"."topicId", "titleRender"
  ) AS "TopArticlesPerTopic"
  INNER JOIN "${Article.tableName}"
    ON "${Article.tableName}"."topicId" = "TopArticlesPerTopic"."topicId"
  INNER JOIN "${Topic.tableName}"
    ON "${Topic.tableName}"."articleId" = "${Article.tableName}"."id"
  WHERE "TopArticlesPerTopic"."freqRank" = 1
  ORDER BY
    "TopArticlesPerTopic"."topicId" ASC
) AS "TopArticlePerTopic"
WHERE
  "${Topic.tableName}"."id" = "TopArticlePerTopic"."id"
`,
        {
          replacements: {
            topicIds,
            topN: 10,
          },
          transaction,
        }
      )

    }
  }


  Topic.ALLOWED_SORTS_EXTRA = {
    'article-count': 'articleCount',
    'id': 'topicId',
  }
  Topic.DEFAULT_SORT = 'articleCount'

  return Topic
}